zarr-developers · d-v-b · Jul 30, 2024 · Jul 29, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py
@@ -30,7 +30,7 @@ def to_dict(self) -> JSON:
             elif isinstance(value, str):
                 out_dict[key] = value
             elif isinstance(value, Sequence):
-                out_dict[key] = [v.to_dict() if isinstance(v, Metadata) else v for v in value]
+                out_dict[key] = tuple(v.to_dict() if isinstance(v, Metadata) else v for v in value)
             else:
                 out_dict[key] = value
 

diff --git a/src/zarr/chunk_grids.py b/src/zarr/chunk_grids.py
@@ -131,7 +131,7 @@ def _from_dict(cls, data: dict[str, JSON]) -> Self:
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": "regular", "configuration": {"chunk_shape": list(self.chunk_shape)}}
+        return {"name": "regular", "configuration": {"chunk_shape": tuple(self.chunk_shape)}}
 
     def all_chunk_coords(self, array_shape: ChunkCoords) -> Iterator[ChunkCoords]:
         return itertools.product(

diff --git a/src/zarr/chunk_key_encodings.py b/src/zarr/chunk_key_encodings.py
@@ -38,7 +38,13 @@ def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncoding) -> ChunkKeyEncoding
         if isinstance(data, ChunkKeyEncoding):
             return data
 
-        name_parsed, configuration_parsed = parse_named_configuration(data)
+        # configuration is optional for chunk key encodings
+        name_parsed, configuration_parsed = parse_named_configuration(
+            data, require_configuration=False
+        )
+        # normalize missing configuration to the default "/" separator.
+        if configuration_parsed is None:
+            configuration_parsed = {"separator": "/"}
         if name_parsed == "default":
             return DefaultChunkKeyEncoding(**configuration_parsed)  # type: ignore[arg-type]
         if name_parsed == "v2":

diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py
@@ -283,15 +283,19 @@ def _json_convert(o: Any) -> Any:
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> ArrayV3Metadata:
+        # make a copy because we are modifying the dict
+        _data = data.copy()
         # TODO: Remove the type: ignores[] comments below and use a TypedDict to type `data`
         # check that the zarr_format attribute is correct
-        _ = parse_zarr_format_v3(data.pop("zarr_format"))  # type: ignore[arg-type]
+        _ = parse_zarr_format_v3(_data.pop("zarr_format"))  # type: ignore[arg-type]
         # check that the node_type attribute is correct
-        _ = parse_node_type_array(data.pop("node_type"))  # type: ignore[arg-type]
+        _ = parse_node_type_array(_data.pop("node_type"))  # type: ignore[arg-type]
 
-        data["dimension_names"] = data.pop("dimension_names", None)
-
-        return cls(**data)  # type: ignore[arg-type]
+        # dimension_names key is optional, normalize missing to `None`
+        _data["dimension_names"] = _data.pop("dimension_names", None)
+        # attributes key is optional, normalize missing to `None`
+        _data["attributes"] = _data.pop("attributes", None)
+        return cls(**_data)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, Any]:
         out_dict = super().to_dict()
@@ -446,10 +450,10 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:
         return replace(self, attributes=attributes)
 
 
-def parse_dimension_names(data: None | Iterable[str]) -> tuple[str, ...] | None:
+def parse_dimension_names(data: None | Iterable[str | None]) -> tuple[str | None, ...] | None:
     if data is None:
         return data
-    elif all(isinstance(x, str) for x in data):
+    elif all(isinstance(x, type(None) | str) for x in data):
         return tuple(data)
     else:
         msg = f"Expected either None or a iterable of str, got {type(data)}"

diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py
@@ -1,7 +1,11 @@
 from __future__ import annotations
 
 import re
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
+
+from zarr.abc.codec import Codec
+from zarr.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
+from zarr.codecs.bytes import BytesCodec
 
 if TYPE_CHECKING:
     from typing import Any
@@ -11,7 +15,7 @@
 import numpy as np
 import pytest
 
-from zarr.metadata import parse_dimension_names
+from zarr.metadata import ArrayV3Metadata, parse_dimension_names
 from zarr.metadata import parse_fill_value_v3 as parse_fill_value
 from zarr.metadata import parse_zarr_format_v3 as parse_zarr_format
 
@@ -157,3 +161,72 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
     match = f"Cannot parse non-string sequence {fill_value} as a scalar with type {dtype}"
     with pytest.raises(TypeError, match=re.escape(match)):
         parse_fill_value(fill_value, dtype)
+
+
+@pytest.mark.parametrize("chunk_grid", ["regular"])
+@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
+@pytest.mark.parametrize("codecs", [[BytesCodec()]])
+@pytest.mark.parametrize("fill_value", [0, 1])
+@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
+@pytest.mark.parametrize("dimension_separator", [".", "/", None])
+@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
+def test_metadata_to_dict(
+    chunk_grid: str,
+    codecs: list[Codec],
+    fill_value: Any,
+    chunk_key_encoding: Literal["v2", "default"],
+    dimension_separator: Literal[".", "/"] | None,
+    dimension_names: Literal["nones", "strings", "missing"],
+    attributes: None | dict[str, Any],
+) -> None:
+    shape = (1, 2, 3)
+    data_type = "uint8"
+    if chunk_grid == "regular":
+        cgrid = {"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}}
+
+    cke: dict[str, Any]
+    cke_name_dict = {"name": chunk_key_encoding}
+    if dimension_separator is not None:
+        cke = cke_name_dict | {"configuration": {"separator": dimension_separator}}
+    else:
+        cke = cke_name_dict
+    dnames: tuple[str | None, ...] | None
+
+    if dimension_names == "strings":
+        dnames = tuple(map(str, range(len(shape))))
+    elif dimension_names == "missing":
+        dnames = None
+    elif dimension_names == "nones":
+        dnames = (None,) * len(shape)
+
+    metadata_dict = {
+        "zarr_format": 3,
+        "node_type": "array",
+        "shape": shape,
+        "chunk_grid": cgrid,
+        "data_type": data_type,
+        "chunk_key_encoding": cke,
+        "codecs": tuple(c.to_dict() for c in codecs),
+        "fill_value": fill_value,
+    }
+
+    if attributes is not None:
+        metadata_dict["attributes"] = attributes
+    if dnames is not None:
+        metadata_dict["dimension_names"] = dnames
+
+    metadata = ArrayV3Metadata.from_dict(metadata_dict)
+    observed = metadata.to_dict()
+    expected = metadata_dict
+    if attributes is None:
+        assert observed["attributes"] == {}
+        observed.pop("attributes")
+    if dimension_separator is None:
+        if chunk_key_encoding == "default":
+            expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()
+        else:
+            expected_cke_dict = V2ChunkKeyEncoding(separator="/").to_dict()
+        assert observed["chunk_key_encoding"] == expected_cke_dict
+        observed.pop("chunk_key_encoding")
+        expected.pop("chunk_key_encoding")
+    assert observed == expected