Skip to content

Commit

Permalink
Merge pull request #142 from statisticsnorway/feature/save-period-inf…
Browse files Browse the repository at this point in the history
…o-to-metadata

DPMETA-21: Saving date_from and date_until to metadata
  • Loading branch information
JanhSander authored Jan 31, 2024
2 parents 097b321 + f71a65c commit 2913b0b
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 38 deletions.
76 changes: 39 additions & 37 deletions src/datadoc/backend/dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,49 +288,51 @@ def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]:
if re.match(date_format_regex, x) is not None
]

@property
def contains_data_from(self) -> datetime.date | None:
"""The earliest date from which data in the dataset is relevant for."""
def _extract_period_string_from_index(self, index: int) -> str | None:
try:
period_string = self._period_strings[0]
date_format = categorize_period_string(period_string)
return self._period_strings[index]
except IndexError:
return None

if isinstance(date_format, SsbDateFormat):
"""If dateformat is SSB date format return start month of ssb period."""
period = convert_ssb_period(
period_string,
"start",
date_format,
@property
def contains_data_from(self) -> datetime.date | None:
"""The earliest date from which data in the dataset is relevant for."""
first_period_string = self._extract_period_string_from_index(0)
if first_period_string is not None:
date_format = categorize_period_string(first_period_string)
if isinstance(date_format, SsbDateFormat):
"""If dateformat is SSB date format return start month of ssb period."""
period = convert_ssb_period(
first_period_string,
"start",
date_format,
)
return (
arrow.get(period, date_format.arrow_pattern).floor("month").date()
)

return (
arrow.get(first_period_string, date_format.arrow_pattern)
.floor(date_format.timeframe)
.date()
)
return arrow.get(period, date_format.arrow_pattern).floor("month").date()

return (
arrow.get(period_string, date_format.arrow_pattern)
.floor(date_format.timeframe)
.date()
)
return None

@property
def contains_data_until(self) -> datetime.date | None:
"""The latest date until which data in the dataset is relevant for."""
try:
period_string = self._period_strings[1]
except IndexError:
try:
period_string = self._period_strings[0]
except IndexError:
return None

date_format = categorize_period_string(period_string)
if isinstance(date_format, SsbDateFormat):
"""If dateformat is SSB date format return end month of ssb period."""
period = convert_ssb_period(period_string, "end", date_format)
return arrow.get(period, date_format.arrow_pattern).ceil("month").date()

return (
arrow.get(period_string, date_format.arrow_pattern)
.ceil(date_format.timeframe)
.date()
)
first_period_string = self._extract_period_string_from_index(0)
second_period_string = self._extract_period_string_from_index(1)
period_string = second_period_string or first_period_string
if period_string is not None:
date_format = categorize_period_string(period_string)
if isinstance(date_format, SsbDateFormat):
"""If dateformat is SSB date format return end month of ssb period."""
period = convert_ssb_period(period_string, "end", date_format)
return arrow.get(period, date_format.arrow_pattern).ceil("month").date()
return (
arrow.get(period_string, date_format.arrow_pattern)
.ceil(date_format.timeframe)
.date()
)
return None
1 change: 0 additions & 1 deletion src/datadoc/backend/datadoc_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def get_dataset_state(
dataset_path_parts,
):
return state

return None

@staticmethod
Expand Down
17 changes: 17 additions & 0 deletions tests/backend/test_datadoc_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,20 @@ def test_save_file_path_dataset_and_no_metadata(
with Path.open(Path(TEST_RESOURCES_METADATA_DOCUMENT)) as f:
saved_file_path = json.load(f)["datadoc"]["dataset"]["file_path"]
assert saved_file_path == str(metadata.dataset)


@pytest.mark.parametrize(
("insert_string", "expected_from", "expected_until"),
[
("_p2021", "2021-01-01", "2021-12-31"),
("_p2022_p2023", "2022-01-01", "2023-12-31"),
],
)
def test_period_metadata_fields_saved(
generate_periodic_file,
expected_from,
expected_until,
):
metadata = DataDocMetadata(generate_periodic_file)
assert metadata.meta.dataset.contains_data_from == expected_from
assert metadata.meta.dataset.contains_data_until == expected_until
21 changes: 21 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .utils import TEST_EXISTING_METADATA_FILE_NAME
from .utils import TEST_EXISTING_METADATA_WITH_VALID_ID_DIRECTORY
from .utils import TEST_PARQUET_FILEPATH
from .utils import TEST_RESOURCES_DIRECTORY
from .utils import TEST_RESOURCES_METADATA_DOCUMENT


Expand Down Expand Up @@ -136,3 +137,23 @@ def nynorsk_name() -> str:
@pytest.fixture()
def language_object(english_name: str, bokmål_name: str) -> model.LanguageStringType:
return model.LanguageStringType(en=english_name, nb=bokmål_name)


@pytest.fixture()
def existing_data_path() -> Path:
return TEST_PARQUET_FILEPATH


@pytest.fixture()
def generate_periodic_file(
existing_data_path: Path,
insert_string: str,
) -> Generator[Path, None, None]:
file_name = existing_data_path.name
insert_pos = file_name.find("_v1")
new_file_name = file_name[:insert_pos] + insert_string + file_name[insert_pos:]
new_path = TEST_RESOURCES_DIRECTORY / new_file_name
shutil.copy(existing_data_path, new_path)
yield new_path
if new_path.exists():
new_path.unlink()

0 comments on commit 2913b0b

Please sign in to comment.