Skip to content

Commit

Permalink
API docs (#56)
Browse files Browse the repository at this point in the history
* use autodoc2 to render API docs

* tidy up some docstrings

* use autodoc and rst for API docs instead of autodoc2 and md

* clean up some docstrings

* remove private or not implemented methods from public API docs

* add array API

* separate serialization methods into their own section
  • Loading branch information
TomNicholas authored Mar 27, 2024
1 parent 7927fe3 commit db7116e
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 52 deletions.
21 changes: 0 additions & 21 deletions docs/api.md

This file was deleted.

56 changes: 56 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#############
API Reference
#############

.. currentmodule:: virtualizarr


Manifests
=========

.. currentmodule:: virtualizarr.manifests
.. autosummary::
:nosignatures:
:toctree: generated/

ChunkManifest
ManifestArray


Xarray
======

.. currentmodule:: virtualizarr.xarray
.. autosummary::
:nosignatures:
:toctree: generated/

open_virtual_dataset


Serialization
=============

.. currentmodule:: virtualizarr.xarray
.. autosummary::
:nosignatures:
:toctree: generated/

VirtualiZarrDatasetAccessor.to_kerchunk
VirtualiZarrDatasetAccessor.to_zarr


Array API
=========

VirtualiZarr's :py:class:`~virtualizarr.ManifestArray` objects support a limited subset of the Python Array API standard in :py:mod:`virtualizarr.manifests.array_api`.

.. currentmodule:: virtualizarr.manifests.array_api
.. autosummary::
:nosignatures:
:toctree: generated/

concatenate
stack
expand_dims
broadcast_to
26 changes: 24 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,36 @@
extensions = [
"myst_nb",
"sphinx.ext.autodoc",
"sphinx.ext.extlinks",
"sphinx_autodoc_typehints",
"sphinx.ext.autosummary",
"sphinx_copybutton",
"sphinx_togglebutton",
"sphinx_design",
"sphinx.ext.napoleon",
]


# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]


# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False


# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
Expand All @@ -36,6 +57,7 @@
"repository_branch": "main",
"path_to_docs": "docs",
}
html_title = "VirtualiZarr"

html_logo = "_static/_future_logo.png"

Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .manifests import ManifestArray # type: ignore # noqa
from .manifests import ChunkManifest, ManifestArray # type: ignore # noqa
from .xarray import VirtualiZarrDatasetAccessor # type: ignore # noqa
from .xarray import open_virtual_dataset # noqa: F401
19 changes: 3 additions & 16 deletions virtualizarr/manifests/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(
self._manifest = _chunkmanifest

@classmethod
def from_kerchunk_refs(cls, arr_refs: KerchunkArrRefs) -> "ManifestArray":
def _from_kerchunk_refs(cls, arr_refs: KerchunkArrRefs) -> "ManifestArray":
from virtualizarr.kerchunk import fully_decode_arr_refs

decoded_arr_refs = fully_decode_arr_refs(arr_refs)
Expand All @@ -74,7 +74,7 @@ def from_kerchunk_refs(cls, arr_refs: KerchunkArrRefs) -> "ManifestArray":
kerchunk_chunk_dict = {
k: v for k, v in decoded_arr_refs.items() if re.match(_CHUNK_KEY, k)
}
chunkmanifest = ChunkManifest.from_kerchunk_chunk_dict(kerchunk_chunk_dict)
chunkmanifest = ChunkManifest._from_kerchunk_chunk_dict(kerchunk_chunk_dict)

obj = object.__new__(cls)
obj._manifest = chunkmanifest
Expand Down Expand Up @@ -111,22 +111,9 @@ def ndim(self) -> int:
def size(self) -> int:
return int(np.prod(self.shape))

@property
def T(self) -> "ManifestArray":
raise NotImplementedError()

def __repr__(self) -> str:
return f"ManifestArray<shape={self.shape}, dtype={self.dtype}, chunks={self.chunks}>"

def to_kerchunk_refs(self) -> KerchunkArrRefs:
# TODO is there enough information to get the attrs and so on here?
raise NotImplementedError()

def to_zarr(self, store) -> None:
raise NotImplementedError(
"Requires the chunk manifest ZEP to be formalized before we know what to write out here."
)

def __array_function__(self, func, types, args, kwargs) -> Any:
"""
Hook to teach this class what to do if np.concat etc. is called on it.
Expand Down Expand Up @@ -193,7 +180,7 @@ def __eq__( # type: ignore[override]
return np.full(shape=self.shape, fill_value=False, dtype=np.dtype(bool))

def astype(self, dtype: np.dtype, /, *, copy: bool = True) -> "ManifestArray":
"""Needed because xarray will call this even when it's a no-op"""
"""Cannot change the dtype, but needed because xarray will call this even when it's a no-op."""
if dtype != self.dtype:
raise NotImplementedError()
else:
Expand Down
16 changes: 9 additions & 7 deletions virtualizarr/manifests/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,14 @@ class ChunkManifest(BaseModel):
Stores the manifest as a dictionary under the .chunks attribute, in this form:
{
"0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
"0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100},
"0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100},
"0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100},
}
{
"0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
"0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100},
"0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100},
"0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100},
}
See the chunk manifest SPEC proposal in https://github.com/zarr-developers/zarr-specs/issues/287 .
Expand Down Expand Up @@ -118,7 +120,7 @@ def to_zarr_json(self, filepath: str) -> None:
raise NotImplementedError()

@classmethod
def from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
def _from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
chunkentries = {
k: ChunkEntry.from_kerchunk(v) for k, v in kerchunk_chunk_dict.items()
}
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_create_manifestarray_from_kerchunk_refs(self):
".zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
"0.0": ["test1.nc", 6144, 48],
}
marr = ManifestArray.from_kerchunk_refs(arr_refs)
marr = ManifestArray._from_kerchunk_refs(arr_refs)

assert marr.shape == (2, 3)
assert marr.chunks == (2, 3)
Expand Down
14 changes: 10 additions & 4 deletions virtualizarr/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def variable_from_kerchunk_refs(

arr_refs = kerchunk.extract_array_refs(refs, var_name)
chunk_dict, zarray, zattrs = kerchunk.parse_array_refs(arr_refs)
manifest = ChunkManifest.from_kerchunk_chunk_dict(chunk_dict)
manifest = ChunkManifest._from_kerchunk_chunk_dict(chunk_dict)
dims = zattrs["_ARRAY_DIMENSIONS"]
varr = virtual_array_class(zarray=zarray, chunkmanifest=manifest)

Expand Down Expand Up @@ -167,16 +167,22 @@ def separate_coords(

@register_dataset_accessor("virtualize")
class VirtualiZarrDatasetAccessor:
"""
Xarray accessor for writing out virtual datasets to disk.
Methods on this object are called via `ds.virtualize.{method}`.
"""

def __init__(self, ds):
self.ds = ds

def to_zarr(self, storepath: str) -> None:
"""
Write out all virtualized arrays as a new Zarr store on disk.
Serialize all virtualized arrays in this xarray dataset as a Zarr store.
Parameters
----------
filepath : str, default: None
storepath : str
"""
raise NotImplementedError(
"No point in writing out these virtual arrays to Zarr until at least one Zarr reader can actually read them."
Expand All @@ -200,7 +206,7 @@ def to_kerchunk(
format: Union[Literal["dict"], Literal["json"], Literal["parquet"]] = "dict",
) -> Union[KerchunkStoreRefs, None]:
"""
Serialize all virtualized arrays into the kerchunk references format.
Serialize all virtualized arrays in this xarray dataset into the kerchunk references format.
Parameters
----------
Expand Down

0 comments on commit db7116e

Please sign in to comment.