diff --git a/src/datadoc/backend/dapla_dataset_path_info.py b/src/datadoc/backend/dapla_dataset_path_info.py index 406f0cc8..63545655 100644 --- a/src/datadoc/backend/dapla_dataset_path_info.py +++ b/src/datadoc/backend/dapla_dataset_path_info.py @@ -192,6 +192,10 @@ def categorize_period_string(period: str) -> IsoDateFormat | SsbDateFormat: >>> date_format.name SSB_HALF_YEAR + >>> date_format = categorize_period_string('1876H5') # Not valid SSB date format, number is out of range + >>> date_format.name + SSB_HALF_YEAR + >>> categorize_period_string('unknown format') Traceback (most recent call last): ... @@ -247,6 +251,10 @@ def convert_ssb_period( >>> ssb_half_year_period_end 189812 + >>> convert_ssb_period("2018Q5","start",SSB_QUARTERLY) + Traceback (most recent call last): + KeyError: 'Q5' + """ return period_string[:4] + date_format.time_frame[period_string[-2:]][period_type] @@ -265,9 +273,6 @@ def __init__(self, dataset_path: str) -> None: with contextlib.suppress(IndexError): self.second_period_string = _period_strings[1] - if self.contains_data_until < self.contains_data_from: - raise IndexError - @staticmethod def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]: """Extract period strings from dataset name sections. @@ -285,6 +290,10 @@ def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]: >>> DaplaDatasetPathInfo._extract_period_strings(['p1990Q1', 'kommune', 'v1']) ['1990Q1'] + + >>> DaplaDatasetPathInfo._extract_period_strings(['varehandel','v1']) # No date will return empty string + [] + """ date_format_regex = re.compile( r"^p\d{4}(?:-\d{2}-\d{2}|-\d{2}|-{0,1}[QTHWB]\d{1,2})?$", diff --git a/tests/backend/test_dapla_dataset_path_info.py b/tests/backend/test_dapla_dataset_path_info.py index 6f7faed0..d46ee9b4 100644 --- a/tests/backend/test_dapla_dataset_path_info.py +++ b/tests/backend/test_dapla_dataset_path_info.py @@ -81,6 +81,21 @@ class DatasetPathTestCase: expected_contains_data_from=datetime.date(2022, 1, 1), expected_contains_data_until=datetime.date(2022, 2, 28), ), + DatasetPathTestCase( + path="ufo_observasjoner_p2019_p1920_v1.parquet", # Will pass, but wrong sequence dates + expected_contains_data_from=datetime.date(2019, 1, 1), + expected_contains_data_until=datetime.date(1920, 12, 31), + ), + DatasetPathTestCase( + path="varehandel_p2018H2_p2018H1_v1.parquet", # Will pass, but wrong sequence dates + expected_contains_data_from=datetime.date(2018, 7, 1), + expected_contains_data_until=datetime.date(2018, 6, 30), + ), + DatasetPathTestCase( + path="varehandel_p2018Q1_p2018H2_v1.parquet", # Will pass, but combination of two different SSB dates + expected_contains_data_from=datetime.date(2018, 1, 1), + expected_contains_data_until=datetime.date(2018, 12, 31), + ), ] @@ -121,58 +136,16 @@ def test_extract_period_info_date_until( assert dataset_path.contains_data_until == expected_contains_data_until -@pytest.mark.parametrize( - "data", - [ - ( - "varehandel_p2018Q5_p2018Q4_v1.parquet", - "Period format p2018Q5 is not supported", - ), - ( - "varehandel_p2018Q1_p2018H2_v1.parquet", - "Period format p2018H2 is not supported", - ), - ], -) -def test_extract_period_info_failures(data: tuple): - DaplaDatasetPathInfo(data[0]) - with pytest.raises(NotImplementedError): - raise NotImplementedError(data[1]) - - +# Nonsens names, no dates path names will trigger IndexError @pytest.mark.parametrize( "data", [ "nonsen.data", "nonsens2.parquet", + "nonsens_v1.parquet", + "varehandel_v1.parquet", ], ) def test_extract_period_info_failures_index_error(data: str): with pytest.raises(IndexError): DaplaDatasetPathInfo(data) - - -@pytest.mark.parametrize( - "data", - [ - ( - "varehandel_p2018H2_p2018H1_v1.parquet", - "Periods are not in sequence", - ), - ( - "varehandel_p2018T3_p2018T1_v1.parquet", - "Periods are not in sequence", - ), - ( - "varehandel_p2018Q4_p2018Q1_v1.parquet", - "Periods are not in sequence", - ), - ( - "varehandel_p2018B6_p2018B1_v1.parquet", - "Periods are not in sequence", - ), - ], -) -def test_extract_period_info_in_sequence(data: tuple): - with pytest.raises(IndexError): - DaplaDatasetPathInfo(data[0])