Skip to content

Commit

Permalink
Refactor date format classes (#146)
Browse files Browse the repository at this point in the history
* Test set-up for new date format class

* IsoDateFormat class inherits from DateFormat - methods get_floor og get_ceil returns date - tests ok

* Add test for new date classes

* Handle KeyError in SsbDateFormat class - return None

* Add doctests and docstring

* Remove comment

* Remove unnecessary doctests

* Correct name and structure

* Fix parameters in test

* Make DateFormat abstract with abstract methods

* Formatting
  • Loading branch information
tilen1976 authored Feb 1, 2024
1 parent 80788df commit 52db883
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 121 deletions.
219 changes: 98 additions & 121 deletions src/datadoc/backend/dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pathlib
import re
from abc import ABC
from abc import abstractmethod
from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import Final
Expand All @@ -16,17 +18,61 @@
if TYPE_CHECKING:
import datetime
import os
from datetime import date


@dataclass
class IsoDateFormat:
"""An ISO date format with relevant patterns."""
class DateFormat(ABC):
"""A super class for date formats."""

name: str
regex_pattern: str
arrow_pattern: str
timeframe: Literal["year", "month", "day", "week"]

@abstractmethod
def get_floor(self, period_string: str) -> date | None:
"""Return first date of timeframe period."""

@abstractmethod
def get_ceil(self, period_string: str) -> date | None:
"""Return last date of timeframe period."""


@dataclass
class IsoDateFormat(DateFormat):
"""A subclass of Dateformat with relevant patterns for ISO dates."""

def get_floor(self, period_string: str) -> date | None:
"""Method.
>>> ISO_YEAR_MONTH.get_floor("1980-08")
datetime.date(1980, 8, 1)
>>> ISO_YEAR.get_floor("2021")
datetime.date(2021, 1, 1)
>>> SSB_BIMESTER.get_floor("2003B4")
datetime.date(2003, 7, 1)
"""
return arrow.get(period_string, self.arrow_pattern).floor(self.timeframe).date()

def get_ceil(self, period_string: str) -> date | None:
"""Method.
>>> ISO_YEAR.get_ceil("1921")
datetime.date(1921, 12, 31)
>>> ISO_YEAR_MONTH.get_ceil("2021-05")
datetime.date(2021, 5, 31)
>>> SSB_HALF_YEAR.get_ceil("2024H1")
datetime.date(2024, 6, 30)
"""
return arrow.get(period_string, self.arrow_pattern).ceil(self.timeframe).date()


ISO_YEAR = IsoDateFormat(
name="ISO_YEAR",
Expand Down Expand Up @@ -55,20 +101,52 @@ class IsoDateFormat:


@dataclass
class SsbDateFormat:
"""An date format with relevant patterns for SSB special date formats."""
class SsbDateFormat(DateFormat):
"""A subclass of Dateformat with relevant patterns for SSB unique dates."""

name: str
regex_pattern: str
arrow_pattern: str
time_frame: dict
ssb_dates: dict

def get_floor(self, period_string: str) -> date | None:
"""Convert SSB format to date-string and return first date.
If not excisting SSB format, return None
>>> SSB_BIMESTER.get_floor("2003B8")
None
"""
try:
year = period_string[:4]
month = self.ssb_dates[period_string[-2:]]["start"]
period = year + month
return arrow.get(period, self.arrow_pattern).floor(self.timeframe).date()
except KeyError:
return None

def get_ceil(self, period_string: str) -> date | None:
"""Convert SSB format to date-string and return last date.
If not excisting SSB format, return None
>>> SSB_TRIANNUAL.get_ceil("1999T11")
None
"""
try:
year = period_string[:4]
month = self.ssb_dates[period_string[-2:]]["end"]
period = year + month
return arrow.get(period, self.arrow_pattern).ceil(self.timeframe).date()
except KeyError:
return None


SSB_BIMESTER = SsbDateFormat(
name="SSB_BIMESTER",
regex_pattern=r"\d{4}[B]\d{1}$",
arrow_pattern="YYYYMM",
time_frame={
timeframe="month",
ssb_dates={
"B1": {
"start": "01",
"end": "02",
Expand Down Expand Up @@ -100,7 +178,8 @@ class SsbDateFormat:
name="SSB_QUARTERLY",
regex_pattern=r"\d{4}[Q]\d{1}$",
arrow_pattern="YYYYMM",
time_frame={
timeframe="month",
ssb_dates={
"Q1": {
"start": "01",
"end": "03",
Expand All @@ -119,11 +198,13 @@ class SsbDateFormat:
},
},
)

SSB_TRIANNUAL = SsbDateFormat(
name="SSB_TRIANNUAL",
regex_pattern=r"\d{4}[T]\d{1}$",
arrow_pattern="YYYYMM",
time_frame={
timeframe="month",
ssb_dates={
"T1": {
"start": "01",
"end": "04",
Expand All @@ -142,7 +223,8 @@ class SsbDateFormat:
name="SSB_HALF_YEAR",
regex_pattern=r"\d{4}[H]\d{1}$",
arrow_pattern="YYYYMM",
time_frame={
timeframe="month",
ssb_dates={
"H1": {
"start": "01",
"end": "06",
Expand Down Expand Up @@ -172,34 +254,14 @@ def categorize_period_string(period: str) -> IsoDateFormat | SsbDateFormat:
If the period string is not recognized, a NotImplementedError is raised.
Examples:
>>> date_format = categorize_period_string('2022')
>>> date_format.name
ISO_YEAR
>>> date_format = categorize_period_string('2022-W01')
>>> date_format.name
ISO_YEAR_WEEK
>>> date_format = categorize_period_string('2022B1')
>>> date_format.name
SSB_BIMESTER
>>> date_format = categorize_period_string('1980Q3')
>>> date_format.name
SSB_QUARTERLY
>>> date_format = categorize_period_string('1954T2')
>>> date_format.name
SSB_TRIANNUAL
>>> date_format = categorize_period_string('1876H1')
>>> date_format.name
SSB_HALF_YEAR
>>> date_format = categorize_period_string('1876H5') # Not valid SSB date format, number is out of range
>>> date_format.name
SSB_HALF_YEAR
>>> categorize_period_string('unknown format')
Traceback (most recent call last):
...
Expand All @@ -215,61 +277,6 @@ def categorize_period_string(period: str) -> IsoDateFormat | SsbDateFormat:
)


def convert_ssb_period(
period_string: str,
period_type: str,
date_format: SsbDateFormat,
) -> str | None:
"""Convert ssb-format for bimester, quarterly, triannual and half year to start and end months.
If invalid SSB key, the method returns None.
Usage-examples:
>>> ssb_bimester_period_start = convert_ssb_period("2022B1","start",SSB_BIMESTER)
>>> ssb_bimester_period_start
202201
>>> ssb_bimester_period_end = convert_ssb_period("2022B1","end",SSB_BIMESTER)
>>> ssb_bimester_period_end
202202
>>> ssb_quarterly_period_start = convert_ssb_period("2015Q3","start",SSB_QUARTERLY)
>>> ssb_quarterly_period_start
201507
>>> ssb_quarterly_period_end = convert_ssb_period("2015Q3","end",SSB_QUARTERLY)
>>> ssb_quarterly_period_end
201509
>>> ssb_triannual_period_start = convert_ssb_period("1998T2","start",SSB_TRIANNUAL)
>>> ssb_triannual_period_start
199805
>>> ssb_quarterly_period_end = convert_ssb_period("1998T2","end",SSB_TRIANNUAL)
>>> ssb_quarterly_period_end
199808
>>> ssb_half_year_period_start = convert_ssb_period("1898H2","start",SSB_HALF_YEAR)
>>> ssb_half_year_period_start
189807
>>> ssb_half_year_period_end = convert_ssb_period("1898H2","end",SSB_HALF_YEAR)
>>> ssb_half_year_period_end
189812
>>> ssb_invalid_key = convert_ssb_period("2018Q5","start",SSB_QUARTERLY)
>>> ssb_invalid_key
None
"""
try:
return (
period_string[:4] + date_format.time_frame[period_string[-2:]][period_type]
)
except KeyError:
return None


class DaplaDatasetPathInfo:
"""Extract info from a path following SSB's dataset naming convention."""

Expand Down Expand Up @@ -321,32 +328,12 @@ def _extract_period_string_from_index(self, index: int) -> str | None:
def contains_data_from(self) -> datetime.date | None:
"""The earliest date from which data in the dataset is relevant for."""
period_string = self._extract_period_string_from_index(0)
if (
not period_string
or len(self._period_strings) > 1
and period_string > self._period_strings[1]
if not period_string or (
len(self._period_strings) > 1 and period_string > self._period_strings[1]
):
return None
date_format = categorize_period_string(period_string)

if isinstance(date_format, SsbDateFormat):
"""If dateformat is SSB date format return start month of ssb period."""
period = convert_ssb_period(
period_string,
"start",
date_format,
)
if period is not None:
return (
arrow.get(period, date_format.arrow_pattern).floor("month").date()
)
return None

return (
arrow.get(period_string, date_format.arrow_pattern)
.floor(date_format.timeframe)
.date()
)
return date_format.get_floor(period_string)

@property
def contains_data_until(self) -> datetime.date | None:
Expand All @@ -361,17 +348,7 @@ def contains_data_until(self) -> datetime.date | None:
):
return None
date_format = categorize_period_string(period_string)
if isinstance(date_format, SsbDateFormat):
"""If dateformat is SSB date format return end month of ssb period."""
period = convert_ssb_period(period_string, "end", date_format)
if period is not None:
return arrow.get(period, date_format.arrow_pattern).ceil("month").date()
return None
return (
arrow.get(period_string, date_format.arrow_pattern)
.ceil(date_format.timeframe)
.date()
)
return date_format.get_ceil(period_string)

@property
def dataset_state(
Expand Down
56 changes: 56 additions & 0 deletions tests/backend/test_dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

import pytest

from datadoc.backend.dapla_dataset_path_info import ISO_YEAR
from datadoc.backend.dapla_dataset_path_info import ISO_YEAR_MONTH
from datadoc.backend.dapla_dataset_path_info import ISO_YEAR_MONTH_DAY
from datadoc.backend.dapla_dataset_path_info import SSB_BIMESTER
from datadoc.backend.dapla_dataset_path_info import DaplaDatasetPathInfo
from datadoc.enums import DatasetState
from tests.utils import TEST_PARQUET_FILEPATH
Expand Down Expand Up @@ -222,3 +226,55 @@ def test_extract_period_info_date_until_invalid_pathname(
) -> None:
dataset = DaplaDatasetPathInfo(dataset_path_name)
assert dataset.contains_data_until is None


@pytest.mark.parametrize(
("date_format", "period"),
[
(ISO_YEAR, "1980"),
(ISO_YEAR_MONTH, "1888-11"),
(ISO_YEAR_MONTH_DAY, "2203-01-24"),
(SSB_BIMESTER, "1963B3"),
],
)
def test_date_format_return_date_object_period_start(date_format, period):
assert isinstance(date_format.get_floor(period), datetime.date)


@pytest.mark.parametrize(
("date_format", "period"),
[
(ISO_YEAR, "1980"),
(ISO_YEAR_MONTH, "1888-11"),
(ISO_YEAR_MONTH_DAY, "2203-01-24"),
(SSB_BIMESTER, "1963B3"),
],
)
def test_date_format_return_date_object_period_end(date_format, period):
assert isinstance(date_format.get_ceil(period), datetime.date)


@pytest.mark.parametrize(
("date_format", "period", "expected"),
[
(ISO_YEAR, "1980", datetime.date(1980, 1, 1)),
(ISO_YEAR_MONTH, "1888-11", datetime.date(1888, 11, 1)),
(ISO_YEAR_MONTH_DAY, "2203-01-24", datetime.date(2203, 1, 24)),
(SSB_BIMESTER, "1963B3", datetime.date(1963, 5, 1)),
],
)
def test_date_format_correct_from_date(date_format, period, expected: datetime.date):
assert date_format.get_floor(period) == expected


@pytest.mark.parametrize(
("date_format", "period", "expected"),
[
(ISO_YEAR, "1980", datetime.date(1980, 12, 31)),
(ISO_YEAR_MONTH, "1888-11", datetime.date(1888, 11, 30)),
(ISO_YEAR_MONTH_DAY, "2203-01-24", datetime.date(2203, 1, 24)),
(SSB_BIMESTER, "1963B3", datetime.date(1963, 6, 30)),
],
)
def test_date_format_correct_end_date(date_format, period, expected):
assert date_format.get_ceil(period) == expected

0 comments on commit 52db883

Please sign in to comment.