Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Also test exporting references to in-memory kerchunk reference dict #158

Merged
merged 1 commit into from
Jun 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 40 additions & 19 deletions virtualizarr/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_numpy_arrays_to_inlined_kerchunk_refs(
assert refs["refs"]["time/0"] == expected["refs"]["time/0"]


@pytest.mark.parametrize("format", ["json", "parquet"])
@pytest.mark.parametrize("format", ["dict", "json", "parquet"])
class TestKerchunkRoundtrip:
def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
# set up example xarray dataset
Expand All @@ -56,13 +56,20 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
# use open_dataset_via_kerchunk to read it as references
vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={})

# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)
if format == "dict":
# write those references to an in-memory kerchunk-formatted references dictionary
ds_refs = vds.virtualize.to_kerchunk(format=format)

# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)
# use fsspec to read the dataset from the kerchunk references dict
roundtrip = xr.open_dataset(ds_refs, engine="kerchunk", decode_times=False)
else:
# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)

# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)

# assert identical to original dataset
xrt.assert_identical(roundtrip, ds)
Expand All @@ -85,13 +92,20 @@ def test_kerchunk_roundtrip_concat(self, tmpdir, format):
# concatenate virtually along time
vds = xr.concat([vds1, vds2], dim="time", coords="minimal", compat="override")

# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)
if format == "dict":
# write those references to an in-memory kerchunk-formatted references dictionary
ds_refs = vds.virtualize.to_kerchunk(format=format)

# use fsspec to read the dataset from the kerchunk references dict
roundtrip = xr.open_dataset(ds_refs, engine="kerchunk", decode_times=False)
else:
# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)

# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)
# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)

# assert identical to original dataset
xrt.assert_identical(roundtrip, ds)
Expand All @@ -110,13 +124,20 @@ def test_non_dimension_coordinates(self, tmpdir, format):
assert "lat" in vds.coords
assert "coordinates" not in vds.attrs

# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)
if format == "dict":
# write those references to an in-memory kerchunk-formatted references dictionary
ds_refs = vds.virtualize.to_kerchunk(format=format)

# use fsspec to read the dataset from the kerchunk references dict
roundtrip = xr.open_dataset(ds_refs, engine="kerchunk", decode_times=False)
else:
# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)

# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)
# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)

# assert equal to original dataset
xrt.assert_identical(roundtrip, ds)
Expand Down
Loading