Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More thorough store handling during combine/append #488

Merged
merged 2 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions kerchunk/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def __init__(
raise ValueError("Values being mapped cannot also be identical")
self.preprocess = preprocess
self.postprocess = postprocess
self.out = out or {}
self.out = out if out is not None else {}
self.coos = None
self.done = set()

Expand Down Expand Up @@ -383,7 +383,9 @@ def store_coords(self):
"""
Write coordinate arrays into the output
"""
group = zarr.open(self.out)
kv = {}
store = zarr.storage.KVStore(kv)
group = zarr.open(store)
m = self.fss[0].get_mapper("")
z = zarr.open(m)
for k, v in self.coos.items():
Expand Down Expand Up @@ -435,6 +437,7 @@ def store_coords(self):
else:
arr.attrs.update(self.cf_units[k])
# TODO: rewrite .zarray/.zattrs with ujson to save space. Maybe make them by hand anyway.
self.out.update(kv)
logger.debug("Written coordinates")
for fn in [".zgroup", ".zattrs"]:
# top-level group attributes from first input
Expand Down
8 changes: 4 additions & 4 deletions kerchunk/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,29 +475,29 @@ def test_lazy_filler(tmpdir, refs):
mzz.second_pass()

# actual references don't show
assert set(out) == {
assert set(out.zmetadata) == {
".zattrs",
".zgroup",
".zmetadata",
"data/.zarray",
"data/.zattrs",
"static/.zarray",
"static/.zattrs",
"time/.zarray",
"time/.zattrs",
}
assert out._items
out.flush()
assert set(out) == {
assert set(out.zmetadata) == {
".zattrs",
".zgroup",
".zmetadata",
"data/.zarray",
"data/.zattrs",
"static/.zarray",
"static/.zattrs",
"time/.zarray",
"time/.zattrs",
}
assert set(out._items) == {".zmetadata"}
allfiles = fs.find(tmpdir)
assert [
f"{tmpdir}/{a}" in allfiles for a in ["static/refs.0.parq", "data/refs.0.parq"]
Expand Down
Loading