Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Default zarr.open to open_group if shape is not provided #2158

Open
wants to merge 9 commits into
base: v3
Choose a base branch
from
14 changes: 13 additions & 1 deletion src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import numpy.typing as npt

from zarr.core.array import Array, AsyncArray
from zarr.core.array import Array, AsyncArray, get_array_metadata
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat
from zarr.core.config import config
from zarr.core.group import AsyncGroup
Expand Down Expand Up @@ -226,6 +226,18 @@ async def open(
if path is not None:
store_path = store_path / path

if "shape" not in kwargs and mode in {"a", "w", "w-"}:
try:
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
# for v2, the above would already have raised an exception if not an array
zarr_format = metadata_dict["zarr_format"]
is_v3_array = zarr_format == 3 and metadata_dict.get("node_type") == "array"
if is_v3_array or zarr_format == 2:
return AsyncArray(store_path=store_path, metadata=metadata_dict)
except (AssertionError, FileNotFoundError):
pass
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)

try:
return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
except KeyError:
Expand Down
102 changes: 56 additions & 46 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,53 @@ def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecP
raise TypeError


async def get_array_metadata(
store_path: StorePath, zarr_format: ZarrFormat | None = 3
) -> dict[str, Any]:
if zarr_format == 2:
zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
)
if zarray_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format == 3:
zarr_json_bytes = await (store_path / ZARR_JSON).get()
if zarr_json_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format is None:
zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARR_JSON).get(),
(store_path / ZARRAY_JSON).get(),
(store_path / ZATTRS_JSON).get(),
)
if zarr_json_bytes is not None and zarray_bytes is not None:
# TODO: revisit this exception type
# alternatively, we could warn and favor v3
raise ValueError("Both zarr.json and .zarray objects exist")
if zarr_json_bytes is None and zarray_bytes is None:
raise FileNotFoundError(store_path)
# set zarr_format based on which keys were found
if zarr_json_bytes is not None:
zarr_format = 3
else:
zarr_format = 2
else:
raise ValueError(f"unexpected zarr_format: {zarr_format}")

metadata_dict: dict[str, Any]
if zarr_format == 2:
# V2 arrays are comprised of a .zarray and .zattrs objects
assert zarray_bytes is not None
metadata_dict = json.loads(zarray_bytes.to_bytes())
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
metadata_dict["attributes"] = zattrs_dict
else:
# V3 arrays are comprised of a zarr.json object
assert zarr_json_bytes is not None
metadata_dict = json.loads(zarr_json_bytes.to_bytes())
return metadata_dict


@dataclass(frozen=True)
class AsyncArray:
metadata: ArrayMetadata
Expand All @@ -105,10 +152,16 @@ class AsyncArray:

def __init__(
self,
metadata: ArrayMetadata,
metadata: ArrayMetadata | dict[str, Any],
store_path: StorePath,
order: Literal["C", "F"] | None = None,
):
if isinstance(metadata, dict):
zarr_format = metadata["zarr_format"]
if zarr_format == 2:
metadata = ArrayV2Metadata.from_dict(metadata)
else:
metadata = ArrayV3Metadata.from_dict(metadata)
metadata_parsed = parse_array_metadata(metadata)
order_parsed = parse_indexing_order(order or config.get("array.order"))

Expand Down Expand Up @@ -347,51 +400,8 @@ async def open(
zarr_format: ZarrFormat | None = 3,
) -> AsyncArray:
store_path = await make_store_path(store)

if zarr_format == 2:
zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
)
if zarray_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format == 3:
zarr_json_bytes = await (store_path / ZARR_JSON).get()
if zarr_json_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format is None:
zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
(store_path / ZARR_JSON).get(),
(store_path / ZARRAY_JSON).get(),
(store_path / ZATTRS_JSON).get(),
)
if zarr_json_bytes is not None and zarray_bytes is not None:
# TODO: revisit this exception type
# alternatively, we could warn and favor v3
raise ValueError("Both zarr.json and .zarray objects exist")
if zarr_json_bytes is None and zarray_bytes is None:
raise FileNotFoundError(store_path)
# set zarr_format based on which keys were found
if zarr_json_bytes is not None:
zarr_format = 3
else:
zarr_format = 2
else:
raise ValueError(f"unexpected zarr_format: {zarr_format}")

if zarr_format == 2:
# V2 arrays are comprised of a .zarray and .zattrs objects
assert zarray_bytes is not None
zarray_dict = json.loads(zarray_bytes.to_bytes())
zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
zarray_dict["attributes"] = zattrs_dict
return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(zarray_dict))
else:
# V3 arrays are comprised of a zarr.json object
assert zarr_json_bytes is not None
return cls(
store_path=store_path,
metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())),
)
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
return cls(store_path=store_path, metadata=metadata_dict)

@property
def ndim(self) -> int:
Expand Down
7 changes: 6 additions & 1 deletion tests/v3/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,12 @@ def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None:
z2[:] = 3


def test_open_with_mode_a(tmp_path: pathlib.Path) -> None:
async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None:
# Open without shape argument should default to group
g = zarr.open(store=tmp_path, mode="a")
assert isinstance(g, Group)
await g.store_path.delete()
agoodm marked this conversation as resolved.
Show resolved Hide resolved

# 'a' means read/write (create if doesn't exist)
arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3))
assert isinstance(arr, Array)
agoodm marked this conversation as resolved.
Show resolved Hide resolved
Expand Down