From 5e3599d733f4f6f66cafa0d91428a7079e649ae8 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 24 May 2021 12:13:25 -0600 Subject: [PATCH] Add `__post_init_post_parse__` method to `Builder` (#20) --- ecgtools/builder.py | 8 ++++++-- tests/test_builder.py | 21 ++++++++++++++------- tests/test_core.py | 2 -- 3 files changed, 20 insertions(+), 11 deletions(-) delete mode 100644 tests/test_core.py diff --git a/ecgtools/builder.py b/ecgtools/builder.py index 06a952c..1f24448 100644 --- a/ecgtools/builder.py +++ b/ecgtools/builder.py @@ -39,7 +39,7 @@ class Builder: parsing_func: typing.Callable = None njobs: int = -1 - def __post_init__(self): + def __post_init_post_parse__(self): self.df = pd.DataFrame() self.invalid_assets = pd.DataFrame() self.dirs = None @@ -100,7 +100,7 @@ def save( **kwargs, ): catalog_file = pathlib.Path(catalog_file) - index = kwargs.pop('index') or False + index = kwargs.pop('index') if 'index' in kwargs else False self.df.to_csv(catalog_file, index=index, **kwargs) if not self.invalid_assets.empty: invalid_assets_report_file = ( @@ -108,3 +108,7 @@ def save( ) self.invalid_assets.to_csv(invalid_assets_report_file, index=False) print(f'Saved catalog location: {catalog_file}') + + def build(self): + self.get_directories().get_filelist().parse().clean_dataframe() + return self diff --git a/tests/test_builder.py b/tests/test_builder.py index a0befb9..0281e6f 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -75,13 +75,10 @@ def test_parse_error(): sample_data_dir / 'cesm', ], ) -def test_parse(root_path): - b = ( - Builder(root_path, exclude_patterns=['*/files/*', '*/latest/*'], parsing_func=parsing_func) - .get_directories() - .get_filelist() - .parse() - ) +def test_build(root_path): + b = Builder( + root_path, exclude_patterns=['*/files/*', '*/latest/*'], parsing_func=parsing_func + ).build() assert b.entries assert isinstance(b.entries[0], dict) assert isinstance(b.df, pd.DataFrame) @@ -99,3 +96,13 @@ def test_parse_invalid_assets(): assert not b.invalid_assets.empty assert set(b.invalid_assets.columns) == set([INVALID_ASSET, TRACEBACK]) + + +def test_save(tmp_path): + catalog_file = tmp_path / 'test_catalog.csv' + b = Builder(sample_data_dir / 'cesm', parsing_func=parsing_func).build() + b.save(catalog_file) + + df = pd.read_csv(catalog_file) + assert len(df) == len(b.df) + assert set(df.columns) == set(b.df.columns) diff --git a/tests/test_core.py b/tests/test_core.py deleted file mode 100644 index c6fc3c9..0000000 --- a/tests/test_core.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_sample(): - assert 2 == 2