Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand use of .oindex and .vindex #8790

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
39830bd
refactor __getitem__() by removing vectorized and orthogonal indexing…
andersy005 Feb 28, 2024
fd5b162
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Feb 28, 2024
9f758ed
implement explicit routing of vectorized and outer indexers
andersy005 Feb 29, 2024
283e733
Add VectorizedIndexer and OuterIndexer to ScipyArrayWrapper's __getit…
andersy005 Feb 29, 2024
7b7451e
Refactor indexing in LazilyIndexedArray and LazilyVectorizedIndexedArray
andersy005 Feb 29, 2024
f84a3d6
Add vindex and oindex methods to StackedBytesArray
andersy005 Feb 29, 2024
d9dc5df
handle explicitlyindexed arrays
andersy005 Feb 29, 2024
9faa280
Refactor LazilyIndexedArray and LazilyVectorizedIndexedArray classes
andersy005 Feb 29, 2024
928296f
Remove TODO comments in indexing.py
andersy005 Feb 29, 2024
2bd21dc
use indexing.explicit_indexing_adapter() in scipy backend
andersy005 Mar 1, 2024
0becdeb
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 1, 2024
7af526d
update docstring
andersy005 Mar 1, 2024
96324a9
fix docstring
andersy005 Mar 1, 2024
ead0b90
Add _oindex_get and _vindex_get methods to NativeEndiannessArray and …
andersy005 Mar 1, 2024
30a3e9f
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 5, 2024
215036c
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 7, 2024
5a78ba5
Update indexing support in ScipyArrayWrapper
andersy005 Mar 7, 2024
bec65a7
Update xarray/tests/test_indexing.py
andersy005 Mar 7, 2024
a578fa6
Fix assert statement in test_decompose_indexers
andersy005 Mar 7, 2024
6b6163f
add comments to clarifying why the else branch is needed
andersy005 Mar 7, 2024
d806772
Add _oindex_get and _vindex_get methods to _ElementwiseFunctionArray
andersy005 Mar 7, 2024
14c62ac
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 8, 2024
30a1e9b
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 9, 2024
b9a3f0f
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 11, 2024
cc10b80
update whats-new
andersy005 Mar 11, 2024
f872083
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 14, 2024
97617af
Refactor apply_indexer function in indexing.py and variable.py for co…
andersy005 Mar 14, 2024
ebb1699
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
andersy005 Mar 14, 2024
0ec56cd
cleanup
dcherian Mar 15, 2024
ba9f386
Merge branch 'main' into simplify-explicitly-indexed-array-getitem
dcherian Mar 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions xarray/backends/scipy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
is_valid_nc3_name,
)
from xarray.backends.store import StoreBackendEntrypoint
from xarray.core.indexing import NumpyIndexingAdapter
from xarray.core import indexing
from xarray.core.utils import (
Frozen,
FrozenDict,
Expand Down Expand Up @@ -63,8 +63,15 @@ def get_variable(self, needs_lock=True):
ds = self.datastore._manager.acquire(needs_lock)
return ds.variables[self.variable_name]

def _getitem(self, key):
with self.datastore.lock:
data = self.get_variable(needs_lock=False).data
return data[key]

def __getitem__(self, key):
data = NumpyIndexingAdapter(self.get_variable().data)[key]
data = indexing.explicit_indexing_adapter(
dcherian marked this conversation as resolved.
Show resolved Hide resolved
key, self.shape, indexing.IndexingSupport.BASIC, self._getitem
)
# Copy data if the source file is mmapped. This makes things consistent
# with the netCDF4 library by ensuring we can safely read arrays even
# after closing associated files.
Expand Down
6 changes: 6 additions & 0 deletions xarray/coding/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,12 @@ def shape(self) -> tuple[int, ...]:
def __repr__(self):
return f"{type(self).__name__}({self.array!r})"

def _vindex_get(self, key):
return _numpy_char_to_bytes(self.array.vindex[key])

def _oindex_get(self, key):
return _numpy_char_to_bytes(self.array.oindex[key])

def __getitem__(self, key):
# require slicing the last dimension completely
key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim))
Expand Down
12 changes: 12 additions & 0 deletions xarray/coding/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,12 @@ def __init__(self, array) -> None:
def dtype(self) -> np.dtype:
return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize))

def _oindex_get(self, key):
return np.asarray(self.array.oindex[key], dtype=self.dtype)

def _vindex_get(self, key):
return np.asarray(self.array.vindex[key], dtype=self.dtype)

def __getitem__(self, key) -> np.ndarray:
return np.asarray(self.array[key], dtype=self.dtype)

Expand Down Expand Up @@ -141,6 +147,12 @@ def __init__(self, array) -> None:
def dtype(self) -> np.dtype:
return np.dtype("bool")

def _oindex_get(self, key):
return np.asarray(self.array.oindex[key], dtype=self.dtype)

def _vindex_get(self, key):
return np.asarray(self.array.vindex[key], dtype=self.dtype)

def __getitem__(self, key) -> np.ndarray:
return np.asarray(self.array[key], dtype=self.dtype)

Expand Down
99 changes: 67 additions & 32 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,13 @@ def _oindex_get(self, key):
def _vindex_get(self, key):
raise NotImplementedError("This method should be overridden")

def _check_and_raise_if_non_basic_indexer(self, key):
if isinstance(key, (VectorizedIndexer, OuterIndexer)):
raise TypeError(
"Vectorized indexing with vectorized or outer indexers is not supported. "
"Please use .vindex and .oindex properties to index the array."
)

@property
def oindex(self):
return IndexCallable(self._oindex_get)
Expand All @@ -517,7 +524,15 @@ def get_duck_array(self):

def __getitem__(self, key):
key = expanded_indexer(key, self.ndim)
result = self.array[self.indexer_cls(key)]
indexer = self.indexer_cls(key)

if isinstance(indexer, OuterIndexer):
result = self.array.oindex[indexer]
elif isinstance(indexer, VectorizedIndexer):
result = self.array.vindex[indexer]
else:
result = self.array[indexer]
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
dcherian marked this conversation as resolved.
Show resolved Hide resolved

if isinstance(result, ExplicitlyIndexed):
return type(self)(result, self.indexer_cls)
else:
Expand Down Expand Up @@ -577,7 +592,16 @@ def shape(self) -> tuple[int, ...]:
return tuple(shape)

def get_duck_array(self):
array = self.array[self.key]
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
if isinstance(self.key, VectorizedIndexer):
array = self.array.vindex[self.key]
elif isinstance(self.key, OuterIndexer):
array = self.array.oindex[self.key]
else:
array = self.array[self.key]
dcherian marked this conversation as resolved.
Show resolved Hide resolved
else:
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
array = self.array[self.key]

# self.array[self.key] is now a numpy array when
# self.array is a BackendArray subclass
# and self.key is BasicIndexer((slice(None, None, None),))
Expand All @@ -594,12 +618,10 @@ def _oindex_get(self, indexer):

def _vindex_get(self, indexer):
array = LazilyVectorizedIndexedArray(self.array, self.key)
return array[indexer]
return array.vindex[indexer]

def __getitem__(self, indexer):
if isinstance(indexer, VectorizedIndexer):
array = LazilyVectorizedIndexedArray(self.array, self.key)
return array[indexer]
self._check_and_raise_if_non_basic_indexer(indexer)
return type(self)(self.array, self._updated_key(indexer))

def __setitem__(self, key, value):
Expand Down Expand Up @@ -643,7 +665,16 @@ def shape(self) -> tuple[int, ...]:
return np.broadcast(*self.key.tuple).shape

def get_duck_array(self):
array = self.array[self.key]

if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
if isinstance(self.key, VectorizedIndexer):
array = self.array.vindex[self.key]
elif isinstance(self.key, OuterIndexer):
array = self.array.oindex[self.key]
else:
array = self.array[self.key]
dcherian marked this conversation as resolved.
Show resolved Hide resolved
else:
array = self.array[self.key]
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
# self.array[self.key] is now a numpy array when
# self.array is a BackendArray subclass
# and self.key is BasicIndexer((slice(None, None, None),))
Expand All @@ -662,6 +693,7 @@ def _vindex_get(self, indexer):
return type(self)(self.array, self._updated_key(indexer))

def __getitem__(self, indexer):
self._check_and_raise_if_non_basic_indexer(indexer)
# If the indexed array becomes a scalar, return LazilyIndexedArray
if all(isinstance(ind, integer_types) for ind in indexer.tuple):
key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple))
Expand Down Expand Up @@ -706,12 +738,13 @@ def get_duck_array(self):
return self.array.get_duck_array()

def _oindex_get(self, key):
return type(self)(_wrap_numpy_scalars(self.array[key]))
return type(self)(_wrap_numpy_scalars(self.array.oindex[key]))

def _vindex_get(self, key):
return type(self)(_wrap_numpy_scalars(self.array[key]))
return type(self)(_wrap_numpy_scalars(self.array.vindex[key]))

def __getitem__(self, key):
self._check_and_raise_if_non_basic_indexer(key)
return type(self)(_wrap_numpy_scalars(self.array[key]))

def transpose(self, order):
Expand Down Expand Up @@ -745,12 +778,13 @@ def get_duck_array(self):
return self.array.get_duck_array()

def _oindex_get(self, key):
return type(self)(_wrap_numpy_scalars(self.array[key]))
return type(self)(_wrap_numpy_scalars(self.array.oindex[key]))

def _vindex_get(self, key):
return type(self)(_wrap_numpy_scalars(self.array[key]))
return type(self)(_wrap_numpy_scalars(self.array.vindex[key]))

def __getitem__(self, key):
self._check_and_raise_if_non_basic_indexer(key)
return type(self)(_wrap_numpy_scalars(self.array[key]))

def transpose(self, order):
Expand Down Expand Up @@ -912,7 +946,13 @@ def explicit_indexing_adapter(
result = raw_indexing_method(raw_key.tuple)
if numpy_indices.tuple:
# index the loaded np.ndarray
result = NumpyIndexingAdapter(result)[numpy_indices]
indexable = NumpyIndexingAdapter(result)
if isinstance(numpy_indices, VectorizedIndexer):
result = indexable.vindex[numpy_indices]
elif isinstance(numpy_indices, OuterIndexer):
result = indexable.oindex[numpy_indices]
else:
result = indexable[numpy_indices]
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
return result


Expand Down Expand Up @@ -987,10 +1027,10 @@ def _decompose_vectorized_indexer(
>>> array = np.arange(36).reshape(6, 6)
>>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3])))
>>> # load subslice of the array
... array = NumpyIndexingAdapter(array)[backend_indexer]
... array = NumpyIndexingAdapter(array).oindex[backend_indexer]
>>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0])))
>>> # vectorized indexing for on-memory np.ndarray.
... NumpyIndexingAdapter(array)[np_indexer]
... NumpyIndexingAdapter(array).vindex[np_indexer]
array([ 2, 21, 8])
"""
assert isinstance(indexer, VectorizedIndexer)
Expand Down Expand Up @@ -1072,7 +1112,7 @@ def _decompose_outer_indexer(
... array = NumpyIndexingAdapter(array)[backend_indexer]
>>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0])))
>>> # outer indexing for on-memory np.ndarray.
... NumpyIndexingAdapter(array)[np_indexer]
... NumpyIndexingAdapter(array).oindex[np_indexer]
array([[ 2, 3, 2],
[14, 15, 14],
[ 8, 9, 8]])
Expand Down Expand Up @@ -1395,6 +1435,7 @@ def _vindex_get(self, key):
return array[key.tuple]

def __getitem__(self, key):
self._check_and_raise_if_non_basic_indexer(key)
array, key = self._indexing_array_and_key(key)
return array[key]

Expand Down Expand Up @@ -1450,15 +1491,8 @@ def _vindex_get(self, key):
raise TypeError("Vectorized indexing is not supported")

def __getitem__(self, key):
if isinstance(key, BasicIndexer):
return self.array[key.tuple]
elif isinstance(key, OuterIndexer):
return self.oindex[key]
else:
if isinstance(key, VectorizedIndexer):
raise TypeError("Vectorized indexing is not supported")
else:
raise TypeError(f"Unrecognized indexer: {key}")
self._check_and_raise_if_non_basic_indexer(key)
return self.array[key.tuple]

def __setitem__(self, key, value):
if isinstance(key, (BasicIndexer, OuterIndexer)):
Expand Down Expand Up @@ -1499,13 +1533,8 @@ def _vindex_get(self, key):
return self.array.vindex[key.tuple]

def __getitem__(self, key):
if isinstance(key, BasicIndexer):
return self.array[key.tuple]
elif isinstance(key, VectorizedIndexer):
return self.vindex[key]
else:
assert isinstance(key, OuterIndexer)
return self.oindex[key]
self._check_and_raise_if_non_basic_indexer(key)
return self.array[key.tuple]

def __setitem__(self, key, value):
if isinstance(key, BasicIndexer):
Expand Down Expand Up @@ -1603,7 +1632,13 @@ def __getitem__(
(key,) = key

if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional
return NumpyIndexingAdapter(np.asarray(self))[indexer]
indexable = NumpyIndexingAdapter(np.asarray(self))
if isinstance(indexer, VectorizedIndexer):
return indexable.vindex[indexer]
elif isinstance(indexer, OuterIndexer):
return indexable.oindex[indexer]
else:
return indexable[indexer]

result = self.array[key]

Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_coding_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None:

V = IndexerMaker(indexing.VectorizedIndexer)
indexer = V[np.array([[0, 1], [1, 0]])]
actual = stacked[indexer]
actual = stacked.vindex[indexer]
assert_array_equal(actual, expected)


Expand Down
47 changes: 40 additions & 7 deletions xarray/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ def test_lazily_indexed_array(self) -> None:
([0, 3, 5], arr[:2]),
]
for i, j in indexers:

expected_b = v[i][j]
actual = v_lazy[i][j]
assert expected_b.shape == actual.shape
Expand Down Expand Up @@ -360,8 +361,15 @@ def test_vectorized_lazily_indexed_array(self) -> None:

def check_indexing(v_eager, v_lazy, indexers):
for indexer in indexers:
actual = v_lazy[indexer]
expected = v_eager[indexer]
if isinstance(indexer, indexing.VectorizedIndexer):
actual = v_lazy.vindex[indexer]
expected = v_eager.vindex[indexer]
elif isinstance(indexer, indexing.OuterIndexer):
actual = v_lazy.oindex[indexer]
expected = v_eager.oindex[indexer]
else:
actual = v_lazy[indexer]
expected = v_eager[indexer]
assert expected.shape == actual.shape
assert isinstance(
actual._data,
Expand Down Expand Up @@ -556,7 +564,9 @@ def test_arrayize_vectorized_indexer(self) -> None:
vindex_array = indexing._arrayize_vectorized_indexer(
vindex, self.data.shape
)
np.testing.assert_array_equal(self.data[vindex], self.data[vindex_array])
np.testing.assert_array_equal(
self.data.vindex[vindex], self.data.vindex[vindex_array]
)

actual = indexing._arrayize_vectorized_indexer(
indexing.VectorizedIndexer((slice(None),)), shape=(5,)
Expand Down Expand Up @@ -667,16 +677,39 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None:
indexer = get_indexers(shape, indexer_mode)

backend_ind, np_ind = indexing.decompose_indexer(indexer, shape, indexing_support)
indexing_adapter = indexing.NumpyIndexingAdapter(data)

# Dispatch to appropriate indexing method
if indexer_mode.startswith("vectorized"):
expected = indexing_adapter.vindex[indexer]

elif indexer_mode.startswith("outer"):
expected = indexing_adapter.oindex[indexer]

else:
expected = indexing_adapter[indexer] # Basic indexing

if isinstance(backend_ind, indexing.VectorizedIndexer):
array = indexing_adapter.vindex[backend_ind]
elif isinstance(backend_ind, indexing.OuterIndexer):
array = indexing_adapter.oindex[backend_ind]
else:
array = indexing_adapter[backend_ind]

expected = indexing.NumpyIndexingAdapter(data)[indexer]
array = indexing.NumpyIndexingAdapter(data)[backend_ind]
if len(np_ind.tuple) > 0:
array = indexing.NumpyIndexingAdapter(array)[np_ind]
array_indexing_adapter = indexing.NumpyIndexingAdapter(array)
if isinstance(np_ind, indexing.VectorizedIndexer):
array = array_indexing_adapter.vindex[np_ind]
elif isinstance(np_ind, indexing.OuterIndexer):
array = array_indexing_adapter.oindex[np_ind]
else:
array = array_indexing_adapter[np_ind]
np.testing.assert_array_equal(expected, array)

if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple):
combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind)
array = indexing.NumpyIndexingAdapter(data)[combined_ind]
# combined_ind is a VectorizedIndexer
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
array = indexing_adapter.vindex[combined_ind]
np.testing.assert_array_equal(expected, array)


Expand Down
Loading