Skip to content

Commit

Permalink
Merge pull request #77 from tethys-ts/dev
Browse files Browse the repository at this point in the history
fix to using hdf5 results chunks
  • Loading branch information
mullenkamp authored Oct 20, 2022
2 parents a1dc526 + e1ee02c commit 50dd6d5
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 21 deletions.
4 changes: 2 additions & 2 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "tethysts" %}
{% set version = "4.5.3" %}
{% set version = "4.5.4" %}
# {% set sha256 = "ae2cc83fb5a75e8dc3e1b2c2137deea412c8a4c7c9acca52bf4ec59de52a80c9" %}

# sha256 is the prefered checksum -- you can get it for a file with:
Expand Down Expand Up @@ -45,7 +45,7 @@ requirements:
- shapely
- tethys-data-models >=0.4.11
- hdf5tools >=0.0.7
- s3tethys >=0.0.2
- s3tethys >=0.0.4

test:
imports:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
name = 'tethysts'
main_package = 'tethysts'
datasets = 'datasets/time_series'
version = '4.5.3'
version = '4.5.4'
descrip = 'tethys time series S3 extraction'

# The below code is for readthedocs. To have sphinx/readthedocs interact with
Expand All @@ -19,7 +19,7 @@
if os.environ.get('READTHEDOCS', False) == 'True':
INSTALL_REQUIRES = []
else:
INSTALL_REQUIRES = ['zstandard', 'pandas', 'xarray', 'scipy', 'orjson', 'requests', 'shapely', 'tethys-data-models>=0.4.11', 'hdf5tools>=0.0.7', 's3tethys>=0.0.2']
INSTALL_REQUIRES = ['zstandard', 'pandas', 'xarray', 'scipy', 'orjson', 'requests', 'shapely', 'tethys-data-models>=0.4.11', 'hdf5tools>=0.0.7', 's3tethys>=0.0.4']

# Get the long description from the README file
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
Expand Down
2 changes: 2 additions & 0 deletions tethysts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,8 @@ def get_results(self,

## Clear xarray cache...because it loves caching everything...
## This is to ensure that xarray will open the file rather than opening a cache
## The next xarray version should have this issue fixed:
## https://github.com/pydata/xarray/pull/4879
xr.backends.file_manager.FILE_CACHE.clear()

## combine results
Expand Down
28 changes: 11 additions & 17 deletions tethysts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,29 +726,15 @@ def result_filters(data, from_date=None, to_date=None, from_mod_date=None, to_mo
# return file_path2


def process_dataset_obj(results, from_date=None, to_date=None):
def process_dataset(data, from_date=None, to_date=None):
"""
Stupid xarray being inefficient at parsing file objects...
"""
if isinstance(results, io.BytesIO):
try:
data = xr.load_dataset(results, engine='h5netcdf', cache=False)
except:
data = xr.load_dataset(results)
elif isinstance(results, xr.Dataset):
data = results
else:
raise TypeError('Not the right data type.')

data = result_filters(data, from_date, to_date)

data_obj = io.BytesIO()
hdf5tools.xr_to_hdf5(data, data_obj)

data.close()
del data
del results

return data_obj


Expand Down Expand Up @@ -781,8 +767,16 @@ def download_results(chunk: dict, bucket: str, s3: botocore.client.BaseClient =

if chunk['key'].endswith('.zst'):
file_obj = s3tethys.decompress_stream_to_object(file_obj, 'zstd')
data = xr.load_dataset(file_obj.read(), engine='scipy')
else:
data = xr.load_dataset(io.BytesIO(file_obj.read()), engine='h5netcdf')

data_obj = process_dataset(data, from_date=from_date, to_date=to_date)

data.close()
del data

data_obj = process_dataset_obj(file_obj, from_date=from_date, to_date=to_date)
del file_obj

return data_obj

Expand Down

0 comments on commit 50dd6d5

Please sign in to comment.