Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mode='a-': Do not overwrite coordinates when appending to Zarr with append_dim #8428

Merged
merged 9 commits into from
Dec 1, 2023
22 changes: 12 additions & 10 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk
from xarray.core.indexes import Index
from xarray.core.parallelcompat import guess_chunkmanager
from xarray.core.types import ZarrWriteModes
from xarray.core.utils import is_remote_uri

if TYPE_CHECKING:
Expand Down Expand Up @@ -68,7 +69,6 @@
"NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"
]


DATAARRAY_NAME = "__xarray_dataarray_name__"
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"

Expand Down Expand Up @@ -1524,7 +1524,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand All @@ -1548,7 +1548,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand All @@ -1570,7 +1570,7 @@ def to_zarr(
dataset: Dataset,
store: MutableMapping | str | os.PathLike[str] | None = None,
chunk_store: MutableMapping | str | os.PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand Down Expand Up @@ -1627,16 +1627,18 @@ def to_zarr(
else:
mode = "w-"

if mode != "a" and append_dim is not None:
if mode not in ["a", "a-"] and append_dim is not None:
raise ValueError("cannot set append_dim unless mode='a' or mode=None")

if mode not in ["a", "r+"] and region is not None:
raise ValueError("cannot set region unless mode='a', mode='r+' or mode=None")
if mode not in ["a", "a-", "r+"] and region is not None:
raise ValueError(
"cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
)

if mode not in ["w", "w-", "a", "r+"]:
if mode not in ["w", "w-", "a", "a-", "r+"]:
raise ValueError(
"The only supported options for mode are 'w', "
f"'w-', 'a' and 'r+', but mode={mode!r}"
f"'w-', 'a', 'a-', and 'r+', but mode={mode!r}"
)

# validate Dataset keys, DataArray names
Expand Down Expand Up @@ -1679,7 +1681,7 @@ def to_zarr(
write_empty=write_empty_chunks,
)

if mode in ["a", "r+"]:
if mode in ["a", "a-", "r+"]:
_validate_datatypes_for_zarr_append(zstore, dataset)
if append_dim is not None:
existing_dims = zstore.get_dimensions()
Expand Down
21 changes: 18 additions & 3 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from xarray.core import indexing
from xarray.core.parallelcompat import guess_chunkmanager
from xarray.core.pycompat import integer_types
from xarray.core.types import ZarrWriteModes
from xarray.core.utils import (
FrozenDict,
HiddenKeyDict,
Expand Down Expand Up @@ -377,7 +378,7 @@ class ZarrStore(AbstractWritableDataStore):
def open_group(
cls,
store,
mode="r",
mode: ZarrWriteModes = "r",
synchronizer=None,
group=None,
consolidated=False,
Expand All @@ -402,7 +403,8 @@ def open_group(
zarr_version = getattr(store, "_store_version", 2)

open_kwargs = dict(
mode=mode,
# mode='a-' is a handcrafted xarray specialty
mode="a" if mode == "a-" else mode,
synchronizer=synchronizer,
path=group,
)
Expand Down Expand Up @@ -628,8 +630,21 @@ def store(
self.set_attributes(attributes)
self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims)

# if we are appending to an append_dim, only write either
# - new variables not already present, OR
# - variables with the append_dim in their dimensions
# We do NOT overwrite other variables.
if self._mode == "a-" and self._append_dim is not None:
variables_to_set = {
k: v
for k, v in variables_encoded.items()
if (k not in existing_variable_names) or (self._append_dim in v.dims)
}
else:
variables_to_set = variables_encoded

self.set_variables(
variables_encoded, check_encoding_set, writer, unlimited_dims=unlimited_dims
variables_to_set, check_encoding_set, writer, unlimited_dims=unlimited_dims
)
if self._consolidate_on_close:
zarr.consolidate_metadata(self.zarr_group.store)
Expand Down
18 changes: 12 additions & 6 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@
from xarray.core.indexing import is_fancy_indexer, map_index_queries
from xarray.core.merge import PANDAS_TYPES, MergeError
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.types import DaCompatible, T_DataArray, T_DataArrayOrSet
from xarray.core.types import (
DaCompatible,
T_DataArray,
T_DataArrayOrSet,
ZarrWriteModes,
)
from xarray.core.utils import (
Default,
HybridMappingProxy,
Expand Down Expand Up @@ -4064,7 +4069,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
*,
Expand All @@ -4085,7 +4090,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand All @@ -4104,7 +4109,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand Down Expand Up @@ -4140,10 +4145,11 @@ def to_zarr(
chunk_store : MutableMapping, str or path-like, optional
Store or path to directory in local or remote file system only for Zarr
array chunks. Requires zarr-python v2.4.0 or later.
mode : {"w", "w-", "a", "r+", None}, optional
mode : {"w", "w-", "a", "a-", r+", None}, optional
Persistence mode: "w" means create (overwrite if exists);
"w-" means create (fail if exists);
"a" means override existing variables (create if does not exist);
"a" means override all existing variables including dimension coordinates (create if does not exist);
"a-" means only append those variables that have ``append_dim``.
"r+" means modify existing array *values* only (raise an error if
any metadata or shapes would change).
The default mode is "a" if ``append_dim`` is set. Otherwise, it is
Expand Down
12 changes: 7 additions & 5 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
T_Chunks,
T_DataArrayOrSet,
T_Dataset,
ZarrWriteModes,
)
from xarray.core.utils import (
Default,
Expand Down Expand Up @@ -2297,7 +2298,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand All @@ -2320,7 +2321,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand All @@ -2341,7 +2342,7 @@ def to_zarr(
self,
store: MutableMapping | str | PathLike[str] | None = None,
chunk_store: MutableMapping | str | PathLike | None = None,
mode: Literal["w", "w-", "a", "r+", None] = None,
mode: ZarrWriteModes | None = None,
synchronizer=None,
group: str | None = None,
encoding: Mapping | None = None,
Expand Down Expand Up @@ -2379,10 +2380,11 @@ def to_zarr(
chunk_store : MutableMapping, str or path-like, optional
Store or path to directory in local or remote file system only for Zarr
array chunks. Requires zarr-python v2.4.0 or later.
mode : {"w", "w-", "a", "r+", None}, optional
mode : {"w", "w-", "a", "a-", r+", None}, optional
Persistence mode: "w" means create (overwrite if exists);
"w-" means create (fail if exists);
"a" means override existing variables (create if does not exist);
"a" means override all existing variables including dimension coordinates (create if does not exist);
"a-" means only append those variables that have ``append_dim``.
"r+" means modify existing array *values* only (raise an error if
any metadata or shapes would change).
The default mode is "a" if ``append_dim`` is set. Otherwise, it is
Expand Down
3 changes: 3 additions & 0 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,6 @@ def copy(
"midpoint",
"nearest",
]


ZarrWriteModes = Literal["w", "w-", "a", "a-", "r+", "r"]
25 changes: 24 additions & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2382,6 +2382,29 @@ def test_append_with_new_variable(self) -> None:
xr.open_dataset(store_target, engine="zarr", **self.version_kwargs),
)

def test_append_with_append_dim_no_overwrite(self) -> None:
ds, ds_to_append, _ = create_append_test_data()
with self.create_zarr_target() as store_target:
ds.to_zarr(store_target, mode="w", **self.version_kwargs)
original = xr.concat([ds, ds_to_append], dim="time")
original2 = xr.concat([original, ds_to_append], dim="time")

# overwrite a coordinate;
# for mode='a-', this will not get written to the store
# because it does not have the append_dim as a dim
ds_to_append.lon.data[:] = -999
ds_to_append.to_zarr(
store_target, mode="a-", append_dim="time", **self.version_kwargs
)
actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
assert_identical(original, actual)

# by default, mode="a" will overwrite all coordinates.
ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs)
actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
original2.lon.data[:] = -999
assert_identical(original2, actual)

@requires_dask
def test_to_zarr_compute_false_roundtrip(self) -> None:
from dask.delayed import Delayed
Expand Down Expand Up @@ -2578,7 +2601,7 @@ def setup_and_verify_store(expected=data):
with pytest.raises(
ValueError,
match=re.escape(
"cannot set region unless mode='a', mode='r+' or mode=None"
"cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
),
):
data.to_zarr(
Expand Down
Loading