Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API docs #56

Merged
merged 8 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions docs/api.md

This file was deleted.

56 changes: 56 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#############
API Reference
#############

.. currentmodule:: virtualizarr


Manifests
=========

.. currentmodule:: virtualizarr.manifests
.. autosummary::
:nosignatures:
:toctree: generated/

ChunkManifest
ManifestArray


Xarray
======

.. currentmodule:: virtualizarr.xarray
.. autosummary::
:nosignatures:
:toctree: generated/

open_virtual_dataset


Serialization
=============

.. currentmodule:: virtualizarr.xarray
.. autosummary::
:nosignatures:
:toctree: generated/

VirtualiZarrDatasetAccessor.to_kerchunk
VirtualiZarrDatasetAccessor.to_zarr


Array API
=========

VirtualiZarr's :py:class:`~virtualizarr.ManifestArray` objects support a limited subset of the Python Array API standard in :py:mod:`virtualizarr.manifests.array_api`.

.. currentmodule:: virtualizarr.manifests.array_api
.. autosummary::
:nosignatures:
:toctree: generated/

concatenate
stack
expand_dims
broadcast_to
26 changes: 24 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,36 @@
extensions = [
"myst_nb",
"sphinx.ext.autodoc",
"sphinx.ext.extlinks",
"sphinx_autodoc_typehints",
"sphinx.ext.autosummary",
"sphinx_copybutton",
"sphinx_togglebutton",
"sphinx_design",
"sphinx.ext.napoleon",
]


# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]


# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False


# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
Expand All @@ -36,6 +57,7 @@
"repository_branch": "main",
"path_to_docs": "docs",
}
html_title = "VirtualiZarr"

html_logo = "_static/_future_logo.png"

Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .manifests import ManifestArray # type: ignore # noqa
from .manifests import ChunkManifest, ManifestArray # type: ignore # noqa
from .xarray import VirtualiZarrDatasetAccessor # type: ignore # noqa
from .xarray import open_virtual_dataset # noqa: F401
19 changes: 3 additions & 16 deletions virtualizarr/manifests/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(
self._manifest = _chunkmanifest

@classmethod
def from_kerchunk_refs(cls, arr_refs: KerchunkArrRefs) -> "ManifestArray":
def _from_kerchunk_refs(cls, arr_refs: KerchunkArrRefs) -> "ManifestArray":
from virtualizarr.kerchunk import fully_decode_arr_refs

decoded_arr_refs = fully_decode_arr_refs(arr_refs)
Expand All @@ -74,7 +74,7 @@ def from_kerchunk_refs(cls, arr_refs: KerchunkArrRefs) -> "ManifestArray":
kerchunk_chunk_dict = {
k: v for k, v in decoded_arr_refs.items() if re.match(_CHUNK_KEY, k)
}
chunkmanifest = ChunkManifest.from_kerchunk_chunk_dict(kerchunk_chunk_dict)
chunkmanifest = ChunkManifest._from_kerchunk_chunk_dict(kerchunk_chunk_dict)

obj = object.__new__(cls)
obj._manifest = chunkmanifest
Expand Down Expand Up @@ -111,22 +111,9 @@ def ndim(self) -> int:
def size(self) -> int:
return int(np.prod(self.shape))

@property
def T(self) -> "ManifestArray":
raise NotImplementedError()

def __repr__(self) -> str:
return f"ManifestArray<shape={self.shape}, dtype={self.dtype}, chunks={self.chunks}>"

def to_kerchunk_refs(self) -> KerchunkArrRefs:
# TODO is there enough information to get the attrs and so on here?
raise NotImplementedError()

def to_zarr(self, store) -> None:
raise NotImplementedError(
"Requires the chunk manifest ZEP to be formalized before we know what to write out here."
)

def __array_function__(self, func, types, args, kwargs) -> Any:
"""
Hook to teach this class what to do if np.concat etc. is called on it.
Expand Down Expand Up @@ -193,7 +180,7 @@ def __eq__( # type: ignore[override]
return np.full(shape=self.shape, fill_value=False, dtype=np.dtype(bool))

def astype(self, dtype: np.dtype, /, *, copy: bool = True) -> "ManifestArray":
"""Needed because xarray will call this even when it's a no-op"""
"""Cannot change the dtype, but needed because xarray will call this even when it's a no-op."""
if dtype != self.dtype:
raise NotImplementedError()
else:
Expand Down
16 changes: 9 additions & 7 deletions virtualizarr/manifests/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,14 @@ class ChunkManifest(BaseModel):
Stores the manifest as a dictionary under the .chunks attribute, in this form:
{
"0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
"0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100},
"0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100},
"0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100},
}
{
"0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
"0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100},
"0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100},
"0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100},
}
See the chunk manifest SPEC proposal in https://github.com/zarr-developers/zarr-specs/issues/287 .
Expand Down Expand Up @@ -118,7 +120,7 @@ def to_zarr_json(self, filepath: str) -> None:
raise NotImplementedError()

@classmethod
def from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
def _from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
chunkentries = {
k: ChunkEntry.from_kerchunk(v) for k, v in kerchunk_chunk_dict.items()
}
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_create_manifestarray_from_kerchunk_refs(self):
".zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
"0.0": ["test1.nc", 6144, 48],
}
marr = ManifestArray.from_kerchunk_refs(arr_refs)
marr = ManifestArray._from_kerchunk_refs(arr_refs)

assert marr.shape == (2, 3)
assert marr.chunks == (2, 3)
Expand Down
14 changes: 10 additions & 4 deletions virtualizarr/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def variable_from_kerchunk_refs(

arr_refs = kerchunk.extract_array_refs(refs, var_name)
chunk_dict, zarray, zattrs = kerchunk.parse_array_refs(arr_refs)
manifest = ChunkManifest.from_kerchunk_chunk_dict(chunk_dict)
manifest = ChunkManifest._from_kerchunk_chunk_dict(chunk_dict)
dims = zattrs["_ARRAY_DIMENSIONS"]
varr = virtual_array_class(zarray=zarray, chunkmanifest=manifest)

Expand Down Expand Up @@ -167,16 +167,22 @@ def separate_coords(

@register_dataset_accessor("virtualize")
class VirtualiZarrDatasetAccessor:
"""
Xarray accessor for writing out virtual datasets to disk.
Methods on this object are called via `ds.virtualize.{method}`.
"""

def __init__(self, ds):
self.ds = ds

def to_zarr(self, storepath: str) -> None:
"""
Write out all virtualized arrays as a new Zarr store on disk.
Serialize all virtualized arrays in this xarray dataset as a Zarr store.
Parameters
----------
filepath : str, default: None
storepath : str
"""
raise NotImplementedError(
"No point in writing out these virtual arrays to Zarr until at least one Zarr reader can actually read them."
Expand All @@ -200,7 +206,7 @@ def to_kerchunk(
format: Union[Literal["dict"], Literal["json"], Literal["parquet"]] = "dict",
) -> Union[KerchunkStoreRefs, None]:
"""
Serialize all virtualized arrays into the kerchunk references format.
Serialize all virtualized arrays in this xarray dataset into the kerchunk references format.
Parameters
----------
Expand Down
Loading