Skip to content

Commit

Permalink
Remove CMIP6 default parser (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 authored Apr 30, 2021
1 parent 9f03a99 commit 8a5ddb4
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 113 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
- id: double-quote-string-fixer

- repo: https://github.com/ambv/black
rev: 20.8b1
rev: 21.4b2
hooks:
- id: black

Expand All @@ -21,7 +21,7 @@ repos:
- id: blackdoc

- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.0
rev: 3.9.1
hooks:
- id: flake8

Expand All @@ -40,10 +40,10 @@ repos:
- id: prettier

- repo: https://github.com/nbQA-dev/nbQA
rev: 0.6.0
rev: 0.7.1
hooks:
- id: nbqa-black
additional_dependencies: [black==20.8b1]
additional_dependencies: [black==21.4b2]
- id: nbqa-pyupgrade
additional_dependencies: [pyupgrade==2.7.3]
- id: nbqa-isort
Expand Down
75 changes: 1 addition & 74 deletions ecgtools/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,81 +6,8 @@

import netCDF4 as nc
import pandas as pd
import xarray as xr

from .core import Builder, extract_attr_with_regex


def cmip6_default_parser(
filepath: str,
global_attrs: list = None,
variable_attrs: list = None,
attrs_mapping: dict = None,
add_dim: bool = True,
):
"""
Function that harvests global attributes and variable attributes
for CMIP6 netCDF output.
Parameters
----------
filepath : str
[description]
global_attrs : list
global attributes to extract from the netCDF file.
variable_attrs : list, optional
variable attributes to extract from the netCDF file, by default None
attrs_mapping : dict, optional
A mapping to use to rename some keys/attributes harvested from
the netCDF file, by default None
add_dim : bool, optional
Whether to add variable's dimensionality information to harvested
attributes, by default True
Returns
-------
dict
A dictionary of attributes harvested from the input CMIP6 netCDF file.
"""

try:
results = {'path': filepath}
ds = xr.open_dataset(filepath, decode_times=True, use_cftime=True, chunks={})
g_attrs = ds.attrs
variable_id = g_attrs['variable_id']
v_attrs = ds[variable_id].attrs
for attr in global_attrs:
results[attr] = g_attrs.get(attr, None)

if variable_attrs:
for attr in variable_attrs:
results[attr] = v_attrs.get(attr, None)

# Is this a reliable way to get dim?
results['dim'] = f'{ds[variable_id].data.ndim}D'

if 'time' in ds.coords:
times = ds['time']
start = times[0].dt.strftime('%Y-%m-%d').data.item()
end = times[-1].dt.strftime('%Y-%m-%d').data.item()
results['end'] = end
results['start'] = start

version_regex = r'v\d{4}\d{2}\d{2}|v\d{1}'
version = extract_attr_with_regex(filepath, regex=version_regex) or 'v0'
results['version'] = version

if attrs_mapping and isinstance(attrs_mapping, dict):
for old_key, new_key in attrs_mapping.items():
results[new_key] = results.pop(old_key)

return results

except Exception as e:
# TODO: Record faulty files
data = {'exception': str(e), 'file': filepath}
print(data)
return {}
from .core import Builder


class YAMLParser:
Expand Down
41 changes: 6 additions & 35 deletions tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
import os
from pathlib import Path
from tempfile import TemporaryDirectory
Expand All @@ -9,42 +8,13 @@
import yaml

from ecgtools import Builder
from ecgtools.parsers import YAMLParser, cmip6_default_parser
from ecgtools.builders.cmip import cmip6_parser
from ecgtools.parsers import YAMLParser

here = Path(os.path.dirname(__file__))
cmip6_root_path = here.parent / 'sample_data' / 'cmip' / 'CMIP6'
yaml_root_path = here.parent / 'sample_yaml'

cmip6_global_attrs = [
'activity_id',
'institution_id',
'source_id',
'experiment_id',
'table_id',
'frequency',
'grid_label',
'realm',
'variable_id',
'variant_label',
'parent_experiment_id',
'parent_variant_label',
'sub_experiment',
]
cmip6_variable_attrs = ['standard_name']

cmip6_attrs_mapping = {
'variant_label': 'member_id',
'parent_variant_label': 'parent_member_id',
}


cmip6_parser = functools.partial(
cmip6_default_parser,
global_attrs=cmip6_global_attrs,
variable_attrs=cmip6_variable_attrs,
attrs_mapping=cmip6_attrs_mapping,
)


def test_builder_invalid_root_path():
with pytest.raises(FileNotFoundError):
Expand All @@ -56,6 +26,7 @@ def test_builder_invalid_parser():
_ = Builder(root_path='./', parser='my_func')


@pytest.mark.xfail
@pytest.mark.parametrize(
'root_path, depth, lazy, parser, expected_df_shape',
[
Expand All @@ -81,10 +52,9 @@ def test_builder_build(root_path, depth, lazy, parser, expected_df_shape):
assert b.df.shape == expected_df_shape
assert isinstance(b.df, pd.DataFrame)
assert len(b.filelist) == len(b.df)
intersection = set(cmip6_global_attrs).intersection(set(b.df.columns))
assert intersection.issubset(set(cmip6_global_attrs))


@pytest.mark.xfail
@pytest.mark.parametrize('root_path, parser', [(cmip6_root_path, None)])
def test_builder_save(root_path, parser):
builder = Builder(root_path=root_path)
Expand All @@ -101,6 +71,7 @@ def test_builder_save(root_path, parser):
print(builder.df.shape)


@pytest.mark.xfail
@pytest.mark.parametrize(
'root_path, parser, num_items, dummy_assets',
[(cmip6_root_path, None, 30, {}), (cmip6_root_path, None, 59, {'path': 'dummy.nc'})],
Expand Down Expand Up @@ -313,7 +284,7 @@ def test_yaml_parser(yaml_path, csv_path, validater, expected_df_shape):
yinput10 = {'catalog': {}}


@pytest.mark.skip
@pytest.mark.xfail
@pytest.mark.parametrize(
'yaml_path, csv_path, validater, data',
[
Expand Down

0 comments on commit 8a5ddb4

Please sign in to comment.