Skip to content

Commit

Permalink
Tidy and refactor test fixtures and resources (#192)
Browse files Browse the repository at this point in the history
* Tidy fixtures

- Remove unused fixtures
- Move local fixtures to the relevant test files
- Use tmp_path fixture to create and move test files

* Tidy test resources

* Fix typeguard
  • Loading branch information
mmwinther authored Mar 4, 2024
1 parent c6253e5 commit 460173b
Show file tree
Hide file tree
Showing 22 changed files with 103 additions and 881 deletions.
8 changes: 4 additions & 4 deletions src/datadoc/backend/unit_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@

if TYPE_CHECKING:
import pandas as pd
from klass.classes.codes import KlassCodes

from klass import KlassClassification # type: ignore [attr-defined]
from klass.classes.classification import KlassClassification

logger = logging.getLogger(__name__)

Expand All @@ -31,14 +32,13 @@ def __init__(self, classification_id: int) -> None:

def _fetch_data_from_external_source(
self,
) -> pd.DataFrame | None:
) -> KlassCodes | None:
"""Fetches the classifications from Klass by classification id.
returns a pandas dataframe with the class data for the given classification id.
"""
try:
klass_dataframe = KlassClassification(self.classification_id)
return klass_dataframe.get_codes()
return KlassClassification(str(self.classification_id)).get_codes()
except Exception:
logger.exception(
"Exception while getting classifications from Klass",
Expand Down
123 changes: 37 additions & 86 deletions tests/backend/test_datadoc_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
import pathlib
import shutil
from pathlib import Path
from typing import TYPE_CHECKING
from unittest.mock import MagicMock
Expand All @@ -29,22 +30,34 @@
from tests.utils import TEST_BUCKET_PARQUET_FILEPATH
from tests.utils import TEST_EXISTING_METADATA_DIRECTORY
from tests.utils import TEST_EXISTING_METADATA_FILE_NAME
from tests.utils import TEST_INPUT_DATA_POPULATION_DIRECTORY
from tests.utils import TEST_OUTPUT_DATA_POPULATION_DIRECTORY
from tests.utils import TEST_PARQUET_FILEPATH
from tests.utils import TEST_PROCESSED_DATA_POPULATION_DIRECTORY
from tests.utils import TEST_RESOURCES_DIRECTORY
from tests.utils import TEST_RESOURCES_METADATA_DOCUMENT
from tests.utils import TEST_STATISTICS_POPULATION_DIRECTORY

if TYPE_CHECKING:
import os
from collections.abc import Generator
from datetime import datetime


DATADOC_METADATA_MODULE = "datadoc.backend.datadoc_metadata"


@pytest.fixture()
def generate_periodic_file(
existing_data_path: Path,
insert_string: str,
) -> Generator[Path, None, None]:
file_name = existing_data_path.name
insert_pos = file_name.find("_v1")
new_file_name = file_name[:insert_pos] + insert_string + file_name[insert_pos:]
new_path = TEST_RESOURCES_DIRECTORY / new_file_name
shutil.copy(existing_data_path, new_path)
yield new_path
if new_path.exists():
new_path.unlink()


@pytest.mark.usefixtures("existing_metadata_file")
def test_existing_metadata_file(
metadata: DataDocMetadata,
Expand All @@ -69,9 +82,10 @@ def test_metadata_document_percent_complete(metadata: DataDocMetadata):
def test_write_metadata_document(
dummy_timestamp: datetime,
metadata: DataDocMetadata,
tmp_path: pathlib.Path,
):
metadata.write_metadata_document()
written_document = TEST_RESOURCES_DIRECTORY / TEST_EXISTING_METADATA_FILE_NAME
written_document = tmp_path / TEST_EXISTING_METADATA_FILE_NAME
assert Path.exists(written_document)
assert metadata.meta.dataset.metadata_created_date == dummy_timestamp
assert metadata.meta.dataset.metadata_created_by == PLACEHOLDER_EMAIL_ADDRESS
Expand Down Expand Up @@ -184,9 +198,10 @@ def test_save_file_path_metadata_field(

def test_save_file_path_dataset_and_no_metadata(
metadata: DataDocMetadata,
tmp_path: pathlib.Path,
):
metadata.write_metadata_document()
with Path.open(Path(TEST_RESOURCES_METADATA_DOCUMENT)) as f:
with (tmp_path / TEST_EXISTING_METADATA_FILE_NAME).open() as f:
saved_file_path = json.load(f)["datadoc"]["dataset"]["file_path"]
assert saved_file_path == str(metadata.dataset)

Expand Down Expand Up @@ -237,138 +252,74 @@ def test_open_file(


@pytest.mark.parametrize(
("dataset_path", "metadata_document_path", "expected_type"),
("dataset_path", "expected_type"),
[
(
str(
TEST_PROCESSED_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1.parquet",
),
str(
TEST_PROCESSED_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json",
),
DatasetStatus.DRAFT.value,
TEST_PROCESSED_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1.parquet",
DatasetStatus.INTERNAL.value,
),
(
str(
TEST_RESOURCES_DIRECTORY / "person_data_v1.parquet",
),
None,
TEST_PARQUET_FILEPATH,
DatasetStatus.DRAFT.value,
),
(
"",
None,
None,
),
],
)
def test_dataset_status_default_value(
subject_mapping_fake_statistical_structure: StatisticSubjectMapping,
dataset_path: str,
metadata_document_path: str | None,
expected_type: DatasetStatus | None,
):
datadoc_metadata = DataDocMetadata(
subject_mapping_fake_statistical_structure,
dataset_path,
metadata_document_path,
str(dataset_path),
)

assert expected_type == datadoc_metadata.meta.dataset.dataset_status
assert datadoc_metadata.meta.dataset.dataset_status == expected_type


@pytest.mark.parametrize(
("dataset_path", "metadata_document_path", "expected_type"),
("path_parts_to_insert", "expected_type"),
[
(
str(
TEST_INPUT_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1.parquet",
),
str(
TEST_INPUT_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json",
),
Assessment.SENSITIVE.value,
),
(
str(TEST_INPUT_DATA_POPULATION_DIRECTORY / "person_data_v1.parquet"),
"kildedata",
None,
Assessment.PROTECTED.value,
),
(
str(
TEST_PROCESSED_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1.parquet",
),
str(
TEST_PROCESSED_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json",
),
"inndata",
Assessment.PROTECTED.value,
),
(
str(TEST_PROCESSED_DATA_POPULATION_DIRECTORY / "person_data_v1.parquet"),
None,
"klargjorte_data",
Assessment.PROTECTED.value,
),
(
str(
TEST_STATISTICS_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1.parquet",
),
str(
TEST_STATISTICS_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json",
),
Assessment.SENSITIVE.value,
),
(
str(TEST_STATISTICS_POPULATION_DIRECTORY / "person_data_v1.parquet"),
None,
"statistikk",
Assessment.PROTECTED.value,
),
(
str(
TEST_OUTPUT_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1.parquet",
),
str(
TEST_OUTPUT_DATA_POPULATION_DIRECTORY
/ "person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json",
),
Assessment.SENSITIVE.value,
),
(
str(TEST_OUTPUT_DATA_POPULATION_DIRECTORY / "person_data_v1.parquet"),
None,
"utdata",
Assessment.OPEN.value,
),
(
str(TEST_RESOURCES_DIRECTORY / "person_data_v1.parquet"),
None,
None,
),
(
"",
None,
None,
),
],
)
def test_dataset_assessment_default_value(
dataset_path: str,
metadata_document_path: str | None,
expected_type: Assessment | None,
copy_dataset_to_path: Path,
):
datadoc_metadata = DataDocMetadata(
statistic_subject_mapping=StatisticSubjectMapping(source_url=""),
dataset_path=dataset_path,
metadata_document_path=metadata_document_path,
dataset_path=str(copy_dataset_to_path),
)
assert expected_type == datadoc_metadata.meta.dataset.assessment
assert datadoc_metadata.meta.dataset.assessment == expected_type


@pytest.mark.parametrize(
Expand Down
12 changes: 12 additions & 0 deletions tests/backend/test_statistic_subject_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@ def test_get_secondary_subject(
)


@pytest.fixture()
def subject_mapping_http_exception(
requests_mock,
exception_to_raise,
) -> StatisticSubjectMapping:
requests_mock.get(
"http://test.some.url.com",
exc=exception_to_raise,
)
return StatisticSubjectMapping("http://test.some.url.com")


@pytest.mark.parametrize(
("exception_to_raise"),
[
Expand Down
29 changes: 27 additions & 2 deletions tests/backend/test_unit_types.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import functools
import pathlib

import pandas as pd
import pytest

from datadoc.backend.unit_types import UnitTypes
Expand All @@ -6,6 +10,27 @@
TEST_UNIT_TYPES_DIR = "unit_types"


@pytest.fixture()
def _mock_fetch_dataframe(
mocker,
unit_types_csv_filepath: pathlib.Path,
) -> None:
def fake_unit_types() -> pd.DataFrame:
return pd.read_csv(unit_types_csv_filepath)

mocker.patch(
"datadoc.backend.unit_types.UnitTypes._fetch_data_from_external_source",
functools.partial(fake_unit_types),
)


@pytest.fixture()
def unit_types_fake_structure(
_mock_fetch_dataframe,
) -> UnitTypes:
return UnitTypes(100)


@pytest.mark.parametrize(
("unit_types_csv_filepath", "expected"),
[
Expand Down Expand Up @@ -55,7 +80,7 @@ def test_read_dataframe(
assert unit_types_fake_structure.classifications == expected


def test_no_source_url():
unit_types = UnitTypes(None)
def test_non_existent_code():
unit_types = UnitTypes(0)
unit_types.wait_for_external_result()
assert unit_types.classifications == []
Loading

0 comments on commit 460173b

Please sign in to comment.