Skip to content

Commit

Permalink
chunk delayed export to limit the number of files
Browse files Browse the repository at this point in the history
  • Loading branch information
jenshnielsen committed Dec 20, 2023
1 parent 306d542 commit d41b0ea
Showing 1 changed file with 25 additions and 5 deletions.
30 changes: 25 additions & 5 deletions src/qcodes/dataset/data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import TYPE_CHECKING, Any, Literal

import numpy
from tqdm.auto import trange
from tqdm.auto import tqdm

import qcodes
from qcodes.dataset.data_set_protocol import (
Expand Down Expand Up @@ -246,7 +246,8 @@ def __init__(
#: In memory representation of the data in the dataset.
self._cache: DataSetCacheWithDBBackend = DataSetCacheWithDBBackend(self)
self._results: list[dict[str, VALUE]] = []
self._in_memory_cache = in_memory_cache
self._in_memory_cache: bool = in_memory_cache
self._max_num_files_export = 100
self._export_limit = 1000

if run_id is not None:
Expand Down Expand Up @@ -1484,6 +1485,21 @@ def _set_export_info(self, export_info: ExportInfo) -> None:

def _export_as_netcdf(self, path: Path, file_name: str) -> Path:
"""Export data as netcdf to a given path with file prefix"""

def generate_steps(num_rows, max_num_steps) -> list[tuple[int, int]]:
if max_num_steps >= num_rows:
return [(i + 1, i + 1) for i in range(num_rows)]

step_size, remainder = divmod(num_rows, max_num_steps)
limits = [
(i * step_size + 1, (i + 1) * step_size) for i in range(max_num_steps)
]

if remainder > 0:
limits[-1] = (limits[-1][0], (step_size) * max_num_steps + remainder)

return limits

import xarray as xr

file_path = path / file_name
Expand Down Expand Up @@ -1514,12 +1530,16 @@ def _export_as_netcdf(self, path: Path, file_name: str) -> Path:
"temp_dir": temp_dir,
},
)
num_files = len(self)
num_rows = len(self)
steps = generate_steps(num_rows, self._max_num_files_export)
num_files = len(steps)
num_digits = len(str(num_files))
file_name_template = f"ds_{{:0{num_digits}d}}.nc"
for i in trange(num_files, desc="Writing individual files"):
for i, (start, stop) in tqdm(
enumerate(steps), total=num_files, desc="Writing individual files"
):
xarray_to_h5netcdf_with_complex_numbers(
self.to_xarray_dataset(start=i + 1, end=i + 1),
self.to_xarray_dataset(start=start, end=stop),
temp_path / file_name_template.format(i),
)
files = tuple(temp_path.glob("*.nc"))
Expand Down

0 comments on commit d41b0ea

Please sign in to comment.