Skip to content

Commit

Permalink
Remove raise IndexError on dates in wrong sequence, remove test for d…
Browse files Browse the repository at this point in the history
…ates in wrong sequence. Add testcases, doctests
  • Loading branch information
tilen1976 committed Jan 30, 2024
1 parent 3f962ad commit a9b7f11
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 48 deletions.
15 changes: 12 additions & 3 deletions src/datadoc/backend/dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ def categorize_period_string(period: str) -> IsoDateFormat | SsbDateFormat:
>>> date_format.name
SSB_HALF_YEAR
>>> date_format = categorize_period_string('1876H5') # Not valid SSB date format, number is out of range
>>> date_format.name
SSB_HALF_YEAR
>>> categorize_period_string('unknown format')
Traceback (most recent call last):
...
Expand Down Expand Up @@ -247,6 +251,10 @@ def convert_ssb_period(
>>> ssb_half_year_period_end
189812
>>> convert_ssb_period("2018Q5","start",SSB_QUARTERLY)
Traceback (most recent call last):
KeyError: 'Q5'
"""
return period_string[:4] + date_format.time_frame[period_string[-2:]][period_type]

Expand All @@ -265,9 +273,6 @@ def __init__(self, dataset_path: str) -> None:
with contextlib.suppress(IndexError):
self.second_period_string = _period_strings[1]

if self.contains_data_until < self.contains_data_from:
raise IndexError

@staticmethod
def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]:
"""Extract period strings from dataset name sections.
Expand All @@ -285,6 +290,10 @@ def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]:
>>> DaplaDatasetPathInfo._extract_period_strings(['p1990Q1', 'kommune', 'v1'])
['1990Q1']
>>> DaplaDatasetPathInfo._extract_period_strings(['varehandel','v1']) # No date will return empty string
[]
"""
date_format_regex = re.compile(
r"^p\d{4}(?:-\d{2}-\d{2}|-\d{2}|-{0,1}[QTHWB]\d{1,2})?$",
Expand Down
63 changes: 18 additions & 45 deletions tests/backend/test_dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,21 @@ class DatasetPathTestCase:
expected_contains_data_from=datetime.date(2022, 1, 1),
expected_contains_data_until=datetime.date(2022, 2, 28),
),
DatasetPathTestCase(
path="ufo_observasjoner_p2019_p1920_v1.parquet", # Will pass, but wrong sequence dates
expected_contains_data_from=datetime.date(2019, 1, 1),
expected_contains_data_until=datetime.date(1920, 12, 31),
),
DatasetPathTestCase(
path="varehandel_p2018H2_p2018H1_v1.parquet", # Will pass, but wrong sequence dates
expected_contains_data_from=datetime.date(2018, 7, 1),
expected_contains_data_until=datetime.date(2018, 6, 30),
),
DatasetPathTestCase(
path="varehandel_p2018Q1_p2018H2_v1.parquet", # Will pass, but combination of two different SSB dates
expected_contains_data_from=datetime.date(2018, 1, 1),
expected_contains_data_until=datetime.date(2018, 12, 31),
),
]


Expand Down Expand Up @@ -121,58 +136,16 @@ def test_extract_period_info_date_until(
assert dataset_path.contains_data_until == expected_contains_data_until


@pytest.mark.parametrize(
"data",
[
(
"varehandel_p2018Q5_p2018Q4_v1.parquet",
"Period format p2018Q5 is not supported",
),
(
"varehandel_p2018Q1_p2018H2_v1.parquet",
"Period format p2018H2 is not supported",
),
],
)
def test_extract_period_info_failures(data: tuple):
DaplaDatasetPathInfo(data[0])
with pytest.raises(NotImplementedError):
raise NotImplementedError(data[1])


# Nonsens names, no dates path names will trigger IndexError
@pytest.mark.parametrize(
"data",
[
"nonsen.data",
"nonsens2.parquet",
"nonsens_v1.parquet",
"varehandel_v1.parquet",
],
)
def test_extract_period_info_failures_index_error(data: str):
with pytest.raises(IndexError):
DaplaDatasetPathInfo(data)


@pytest.mark.parametrize(
"data",
[
(
"varehandel_p2018H2_p2018H1_v1.parquet",
"Periods are not in sequence",
),
(
"varehandel_p2018T3_p2018T1_v1.parquet",
"Periods are not in sequence",
),
(
"varehandel_p2018Q4_p2018Q1_v1.parquet",
"Periods are not in sequence",
),
(
"varehandel_p2018B6_p2018B1_v1.parquet",
"Periods are not in sequence",
),
],
)
def test_extract_period_info_in_sequence(data: tuple):
with pytest.raises(IndexError):
DaplaDatasetPathInfo(data[0])

0 comments on commit a9b7f11

Please sign in to comment.