diff --git a/poetry.lock b/poetry.lock index bdb34408..4738e9dd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -4139,13 +4139,13 @@ dash = ">=2.15.0" [[package]] name = "ssb-datadoc-model" -version = "4.2.0" +version = "4.3.2" description = "Data Model for use in Statistics Norway's Metadata system" optional = false python-versions = ">=3.10" files = [ - {file = "ssb_datadoc_model-4.2.0-py3-none-any.whl", hash = "sha256:402157eafd855387cc9015d07e6912f8e882a753afcc2165cc753b7ce158cd71"}, - {file = "ssb_datadoc_model-4.2.0.tar.gz", hash = "sha256:474204c21b3acae0871ac457f51a0e75c15a3806d5d382a5e0ecce1082b9d04c"}, + {file = "ssb_datadoc_model-4.3.2-py3-none-any.whl", hash = "sha256:86b998ddb7b5e926896a6ce37c1d5ac5c80b512f5b0ccd6b821050a625a7b71c"}, + {file = "ssb_datadoc_model-4.3.2.tar.gz", hash = "sha256:85566a5b5a24f40954d10e7ae4130c452941ce8362550d6fea27541775c981a7"}, ] [package.dependencies] @@ -4675,4 +4675,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "c110d6b039fdbe5f89e8a98886b8361ead519d0a1621cb91672c72a719b86f47" +content-hash = "713694ade4a05085744e77aa840336b010bfcc5ad30944fd5474945cd7596ef4" diff --git a/pyproject.toml b/pyproject.toml index 559a9d4b..0db3885c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dash = ">=2.15.0" pydantic = "==2.5.2" dash-bootstrap-components = ">=1.1.0" pandas = ">=1.4.2" -ssb-datadoc-model = "==4.2.0" +ssb-datadoc-model = "4.3.2" dapla-toolbelt = ">=1.3.3" gunicorn = ">=21.2.0" flask-healthz = ">=0.0.3" diff --git a/src/datadoc/app.py b/src/datadoc/app.py index 852714ad..efab934b 100644 --- a/src/datadoc/app.py +++ b/src/datadoc/app.py @@ -14,9 +14,9 @@ from datadoc import config from datadoc import state +from datadoc.backend.code_list import CodeList from datadoc.backend.datadoc_metadata import DataDocMetadata from datadoc.backend.statistic_subject_mapping import StatisticSubjectMapping -from datadoc.backend.unit_types import UnitTypes from datadoc.enums import SupportedLanguages from datadoc.frontend.callbacks.register_callbacks import register_callbacks from datadoc.frontend.components.alerts import dataset_validation_error @@ -122,10 +122,14 @@ def collect_data_from_external_sources() -> None: config.get_statistical_subject_source_url(), ) - state.unit_types = UnitTypes( + state.unit_types = CodeList( config.get_unit_code(), ) + state.organisational_units = CodeList( + config.get_organisational_unit_code(), + ) + def main(dataset_path: str | None = None) -> None: """Entrypoint when running as a script.""" diff --git a/src/datadoc/backend/unit_types.py b/src/datadoc/backend/code_list.py similarity index 80% rename from src/datadoc/backend/unit_types.py rename to src/datadoc/backend/code_list.py index 2249111f..8be28699 100644 --- a/src/datadoc/backend/unit_types.py +++ b/src/datadoc/backend/code_list.py @@ -16,37 +16,39 @@ @dataclass -class UnitType: - """Data structure for the a unit type.""" +class CodeListItem: + """Data structure for a code list item.""" titles: dict[str, str] - unit_code: str + code: str def get_title(self, language: SupportedLanguages) -> str: """Get the title in the given language.""" try: - return self.titles[ - ( - # Adjust to language codes in the UnitTypes structure. - "nb" - if language - in [ - SupportedLanguages.NORSK_BOKMÅL, - SupportedLanguages.NORSK_NYNORSK, - ] - else "en" - ) - ] + return self.titles[language] except KeyError: - logger.exception( - "Could not find title for subject %s and language: %s", - self, - language.name, - ) - return "" + try: + return self.titles[ + ( + "nb" + if language + in [ + SupportedLanguages.NORSK_BOKMÅL, + SupportedLanguages.NORSK_NYNORSK, + ] + else "en" + ) + ] + except KeyError: + logger.exception( + "Could not find title for subject %s and language: %s", + self, + language.name, + ) + return "" -class UnitTypes(GetExternalSource): +class CodeList(GetExternalSource): """Class for retrieving classifications from Klass.""" def __init__(self, classification_id: int | None) -> None: @@ -58,13 +60,9 @@ def __init__(self, classification_id: int | None) -> None: SupportedLanguages.NORSK_BOKMÅL.value, SupportedLanguages.ENGLISH.value, ] - - self._classifications: list[UnitType] = [] - + self._classifications: list[CodeListItem] = [] self.classification_id = classification_id - self.classifications_dataframes: dict[str, pd.DataFrame] | None = None - super().__init__() def _fetch_data_from_external_source( @@ -85,7 +83,6 @@ def _fetch_data_from_external_source( .get_codes() .data ) - except Exception: logger.exception( "Exception while getting classifications from Klass", @@ -110,10 +107,10 @@ def _extract_titles( list_of_titles.append(titles) return list_of_titles - def _create_unit_types_from_dataframe( + def _create_code_list_from_dataframe( self, classifications_dataframes: dict[SupportedLanguages, pd.DataFrame], - ) -> list[UnitType]: + ) -> list[CodeListItem]: """Method that finds the name column in the dataframe, and returns all values in a list.""" classification_names = self._extract_titles(classifications_dataframes) classification_codes: list @@ -128,7 +125,7 @@ def _create_unit_types_from_dataframe( unit_types = [] for a, b in zip(classification_names, classification_codes): unit_types.append( - UnitType(a, b), + CodeListItem(a, b), ) return unit_types @@ -137,7 +134,7 @@ def _get_classification_dataframe_if_loaded(self) -> bool: if not self._classifications: self.classifications_dataframes = self.retrieve_external_data() if self.classifications_dataframes is not None: - self._classifications = self._create_unit_types_from_dataframe( + self._classifications = self._create_code_list_from_dataframe( self.classifications_dataframes, ) logger.debug( @@ -151,7 +148,7 @@ def _get_classification_dataframe_if_loaded(self) -> bool: return False @property - def classifications(self) -> list[UnitType]: + def classifications(self) -> list[CodeListItem]: """Getter for primary subjects.""" self._get_classification_dataframe_if_loaded() logger.debug("Got %s classifications subjects", len(self._classifications)) diff --git a/src/datadoc/backend/datadoc_metadata.py b/src/datadoc/backend/datadoc_metadata.py index 6de4d072..5149c6b5 100644 --- a/src/datadoc/backend/datadoc_metadata.py +++ b/src/datadoc/backend/datadoc_metadata.py @@ -8,7 +8,6 @@ import uuid from typing import TYPE_CHECKING -import pydantic from cloudpathlib import CloudPath from cloudpathlib import GSClient from cloudpathlib import GSPath @@ -51,16 +50,13 @@ def __init__( ) -> None: """Read in a dataset if supplied, otherwise naively instantiate the class.""" self._statistic_subject_mapping = statistic_subject_mapping - self.metadata_document: pathlib.Path | CloudPath | None = None self.container: model.MetadataContainer | None = None self.dataset_path: pathlib.Path | CloudPath | None = None self.short_name: str | None = None self.dataset = model.Dataset() self.variables: list = [] - self.variables_lookup: dict[str, model.Variable] = {} - if metadata_document_path: # In this case the user has specified an independent metadata document for editing # without a dataset. @@ -72,7 +68,6 @@ def __init__( self.metadata_document = self.dataset_path.parent / ( self.dataset_path.stem + METADATA_DOCUMENT_FILE_SUFFIX ) - self.extract_metadata_from_files() @staticmethod @@ -123,10 +118,7 @@ def extract_metadata_from_existing_document( try: with document.open(mode="r", encoding="utf-8") as file: fresh_metadata = json.load(file) - logger.info( - "Opened existing metadata file %s", - document, - ) + logger.info("Opened existing metadata file %s", document) if self.is_metadata_in_container_structure(fresh_metadata): self.container = model.MetadataContainer.model_validate_json( json.dumps(fresh_metadata), @@ -134,16 +126,13 @@ def extract_metadata_from_existing_document( datadoc_metadata = fresh_metadata["datadoc"] else: datadoc_metadata = fresh_metadata - if datadoc_metadata is None: # In this case we've read in a file with an empty "datadoc" structure. # A typical example of this is a file produced from a pseudonymization process. return - datadoc_metadata = upgrade_metadata( datadoc_metadata, ) - meta = model.DatadocMetadata.model_validate_json( json.dumps(datadoc_metadata), ) @@ -151,7 +140,6 @@ def extract_metadata_from_existing_document( self.dataset = meta.dataset if meta.variables is not None: self.variables = meta.variables - except json.JSONDecodeError: logger.warning( "Could not open existing metadata file %s. \ @@ -169,14 +157,7 @@ def is_metadata_in_container_structure( The container provides a structure for different 'types' of metadata, such as 'datadoc', 'pseudonymization' etc. This method returns True if the metadata is in the container structure, False otherwise. """ - try: - model.MetadataContainer.model_validate_json( - json.dumps(metadata), - ) - except pydantic.ValidationError: - return False - else: - return True + return "datadoc" in metadata def extract_metadata_from_dataset( self, diff --git a/src/datadoc/backend/external_sources/external_sources.py b/src/datadoc/backend/external_sources/external_sources.py index c6bdd5db..2c1c7525 100644 --- a/src/datadoc/backend/external_sources/external_sources.py +++ b/src/datadoc/backend/external_sources/external_sources.py @@ -21,7 +21,6 @@ def __init__(self) -> None: Initializes the future object. """ self.future: concurrent.futures.Future[T | None] | None = None - executor = concurrent.futures.ThreadPoolExecutor(max_workers=1) self.future = executor.submit( self._fetch_data_from_external_source, diff --git a/src/datadoc/backend/model_backwards_compatibility.py b/src/datadoc/backend/model_backwards_compatibility.py index b0ed6282..2d482d67 100644 --- a/src/datadoc/backend/model_backwards_compatibility.py +++ b/src/datadoc/backend/model_backwards_compatibility.py @@ -63,12 +63,20 @@ def handle_current_version(supplied_metadata: dict[str, Any]) -> dict[str, Any]: return supplied_metadata +def handle_version_2_1_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]: + """Handle breaking changes for v2.1.0. + + Datatype changed from LanguageStringType to str for owner + """ + data = supplied_metadata["dataset"]["owner"] + supplied_metadata["dataset"]["owner"] = str(data["nb"] or data["nn"] or data["en"]) + supplied_metadata["document_version"] = "2.2.0" + return supplied_metadata + + def handle_version_1_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]: """Handle breaking changes for v1.0.0.""" - datetime_fields = [ - ("metadata_created_date"), - ("metadata_last_updated_date"), - ] + datetime_fields = [("metadata_created_date"), ("metadata_last_updated_date")] for field in datetime_fields: if supplied_metadata["dataset"][field]: supplied_metadata["dataset"][field] = datetime.isoformat( @@ -77,13 +85,11 @@ def handle_version_1_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]: ), timespec="seconds", ) - if isinstance(supplied_metadata["dataset"]["data_source"], str): supplied_metadata["dataset"]["data_source"] = LanguageStringType( en=supplied_metadata["dataset"]["data_source"], ) supplied_metadata["document_version"] = "2.1.0" - return supplied_metadata @@ -102,7 +108,6 @@ def handle_version_0_1_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]: supplied_metadata["dataset"][new_key] = supplied_metadata["dataset"].pop( old_key, ) - # Replace empty strings with None, empty strings are not valid for LanguageStrings values supplied_metadata["dataset"] = { k: None if v == "" else v for k, v in supplied_metadata["dataset"].items() @@ -113,14 +118,9 @@ def handle_version_0_1_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]: # Register all the supported versions and their handlers. # MUST be ordered from oldest to newest. BackwardsCompatibleVersion(version="0.1.1", handler=handle_version_0_1_1) -BackwardsCompatibleVersion( - version="1.0.0", - handler=handle_version_1_0_0, -) -BackwardsCompatibleVersion( - version="2.1.0", - handler=handle_current_version, -) +BackwardsCompatibleVersion(version="1.0.0", handler=handle_version_1_0_0) +BackwardsCompatibleVersion(version="2.1.0", handler=handle_version_2_1_0) +BackwardsCompatibleVersion(version="2.2.0", handler=handle_current_version) def upgrade_metadata(fresh_metadata: dict[str, Any]) -> dict[str, Any]: @@ -128,15 +128,12 @@ def upgrade_metadata(fresh_metadata: dict[str, Any]) -> dict[str, Any]: # Special case for current version, we expose the current_model_version parameter for test purposes supplied_version = fresh_metadata[VERSION_FIELD_NAME] start_running_handlers = False - # Run all the handlers in order from the supplied version onwards for k, v in SUPPORTED_VERSIONS.items(): if k == supplied_version: start_running_handlers = True if start_running_handlers: fresh_metadata = v.handler(fresh_metadata) - if not start_running_handlers: raise UnknownModelVersionError(supplied_version) - return fresh_metadata diff --git a/src/datadoc/config.py b/src/datadoc/config.py index 13671e88..d30ccec2 100644 --- a/src/datadoc/config.py +++ b/src/datadoc/config.py @@ -128,3 +128,8 @@ def get_oidc_token() -> str | None: def get_unit_code() -> int | None: """The code for the Unit Type code list in Klass.""" return int(_get_config_item("DATADOC_UNIT_CODE") or 702) + + +def get_organisational_unit_code() -> int | None: + """The code for the organisational units code list in Klass.""" + return int(_get_config_item("DATADOC_ORGANISATIONAL_UNIT_CODE") or 83) diff --git a/src/datadoc/frontend/fields/display_dataset.py b/src/datadoc/frontend/fields/display_dataset.py index 3c49a3e3..89f7716e 100644 --- a/src/datadoc/frontend/fields/display_dataset.py +++ b/src/datadoc/frontend/fields/display_dataset.py @@ -60,12 +60,25 @@ def get_unit_type_options( return [ { "label": unit_type.get_title(language), - "value": unit_type.unit_code, + "value": unit_type.code, } for unit_type in state.unit_types.classifications ] +def get_owner_options( + language: SupportedLanguages, +) -> list[dict[str, str]]: + """Collect the owner options for the given language.""" + return [ + { + "label": f"{option.code} - {option.get_title(language)}", + "value": option.code, + } + for option in state.organisational_units.classifications + ] + + class DatasetIdentifiers(str, Enum): """As defined here: https://statistics-norway.atlassian.net/l/c/aoSfEWJU.""" @@ -225,13 +238,14 @@ class DatasetIdentifiers(str, Enum): editable=False, value_getter=get_metadata_and_stringify, ), - DatasetIdentifiers.OWNER: DisplayDatasetMetadata( + DatasetIdentifiers.OWNER: DisplayDatasetMetadataDropdown( identifier=DatasetIdentifiers.OWNER.value, display_name="Eier", description="Maskingenerert seksjonstilhørighet til den som oppretter metadata om datasettet, men kan korrigeres manuelt", obligatory=True, - editable=False, - multiple_language_support=True, + editable=True, + multiple_language_support=False, + options_getter=get_owner_options, ), DatasetIdentifiers.FILE_PATH: DisplayDatasetMetadata( identifier=DatasetIdentifiers.FILE_PATH.value, diff --git a/src/datadoc/state.py b/src/datadoc/state.py index 2219bb33..0e43c6c0 100644 --- a/src/datadoc/state.py +++ b/src/datadoc/state.py @@ -13,9 +13,9 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from datadoc.backend.code_list import CodeList from datadoc.backend.datadoc_metadata import DataDocMetadata from datadoc.backend.statistic_subject_mapping import StatisticSubjectMapping - from datadoc.backend.unit_types import UnitTypes from datadoc.enums import SupportedLanguages @@ -26,4 +26,6 @@ statistic_subject_mapping: StatisticSubjectMapping -unit_types: UnitTypes +unit_types: CodeList + +organisational_units: CodeList diff --git a/tests/backend/test_unit_types.py b/tests/backend/test_code_list.py similarity index 53% rename from tests/backend/test_unit_types.py rename to tests/backend/test_code_list.py index 85998334..bb4d78a0 100644 --- a/tests/backend/test_unit_types.py +++ b/tests/backend/test_code_list.py @@ -1,116 +1,116 @@ import pytest -from datadoc.backend.unit_types import UnitType -from datadoc.backend.unit_types import UnitTypes +from datadoc.backend.code_list import CodeList +from datadoc.backend.code_list import CodeListItem from tests.utils import TEST_RESOURCES_DIRECTORY -UNIT_TYPES_DIR = "unit_types" +CODE_LIST_DIR = "code_list" @pytest.mark.parametrize( ( - "unit_types_csv_filepath_nb", - "unit_types_csv_filepath_nn", - "unit_types_csv_filepath_en", + "code_list_csv_filepath_nb", + "code_list_csv_filepath_nn", + "code_list_csv_filepath_en", "expected", ), [ ( - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_nb.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_nn.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_en.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nb.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nn.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_en.csv", [ - UnitType( + CodeListItem( titles={ "nb": "Adresse", "nn": "Adresse", "en": "Adresse", }, - unit_code="01", + code="01", ), - UnitType( + CodeListItem( titles={ "nb": "Arbeidsulykke", "nn": "Arbeidsulykke", "en": "Arbeidsulykke", }, - unit_code="02", + code="02", ), - UnitType( + CodeListItem( titles={ "nb": "Bolig", "nn": "Bolig", "en": "Bolig", }, - unit_code="03", + code="03", ), ], ), ( - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "empty.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "empty.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "empty.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "empty.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "empty.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "empty.csv", [], ), ( - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_nb.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "empty.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "empty.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nb.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "empty.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "empty.csv", [ - UnitType( + CodeListItem( titles={ "nb": "Adresse", "nn": None, "en": None, }, - unit_code="01", + code="01", ), - UnitType( + CodeListItem( titles={ "nb": "Arbeidsulykke", "nn": None, "en": None, }, - unit_code="02", + code="02", ), - UnitType( + CodeListItem( titles={ "nb": "Bolig", "nn": None, "en": None, }, - unit_code="03", + code="03", ), ], ), ( - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "no_code.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "no_code.csv", - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "no_code.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "no_code.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "no_code.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "no_code.csv", [ - UnitType( + CodeListItem( titles={ "nb": "Adresse", "nn": "Adresse", "en": "Adresse", }, - unit_code=None, + code=None, ), - UnitType( + CodeListItem( titles={ "nb": "Arbeidsulykke", "nn": "Arbeidsulykke", "en": "Arbeidsulykke", }, - unit_code=None, + code=None, ), - UnitType( + CodeListItem( titles={ "nb": "Bolig", "nn": "Bolig", "en": "Bolig", }, - unit_code=None, + code=None, ), ], ), @@ -118,14 +118,14 @@ ) @pytest.mark.usefixtures("_mock_fetch_dataframe") def test_read_dataframe( - unit_types_fake_structure: UnitTypes, + code_list_fake_structure: CodeList, expected: list[str], ): - unit_types_fake_structure.wait_for_external_result() - assert unit_types_fake_structure.classifications == expected + code_list_fake_structure.wait_for_external_result() + assert code_list_fake_structure.classifications == expected def test_non_existent_code(): - unit_types = UnitTypes(0) - unit_types.wait_for_external_result() - assert unit_types.classifications == [] + code_list = CodeList(0) + code_list.wait_for_external_result() + assert code_list.classifications == [] diff --git a/tests/backend/test_datadoc_metadata.py b/tests/backend/test_datadoc_metadata.py index 362734e7..ebd23484 100644 --- a/tests/backend/test_datadoc_metadata.py +++ b/tests/backend/test_datadoc_metadata.py @@ -77,7 +77,7 @@ def test_metadata_document_percent_complete(metadata: DataDocMetadata): metadata.dataset = document.dataset # type: ignore [assignment] metadata.variables = document.variables # type: ignore [assignment] - assert metadata.percent_complete == 17 # noqa: PLR2004 + assert metadata.percent_complete == 16 # noqa: PLR2004 def test_write_metadata_document( @@ -279,7 +279,6 @@ def test_dataset_status_default_value( subject_mapping_fake_statistical_structure, str(dataset_path), ) - assert datadoc_metadata.dataset.dataset_status == expected_type diff --git a/tests/backend/test_model_backwards_compatibility.py b/tests/backend/test_model_backwards_compatibility.py index 12e26fb8..0c532055 100644 --- a/tests/backend/test_model_backwards_compatibility.py +++ b/tests/backend/test_model_backwards_compatibility.py @@ -19,7 +19,7 @@ def test_existing_metadata_current_model_version(): - current_model_version = "2.1.0" + current_model_version = "2.2.0" fresh_metadata = {"document_version": current_model_version} upgraded_metadata = upgrade_metadata(fresh_metadata) assert upgraded_metadata == fresh_metadata diff --git a/tests/conftest.py b/tests/conftest.py index a7a85344..9434ad69 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,9 +19,9 @@ from datadoc_model import model from datadoc import state +from datadoc.backend.code_list import CodeList from datadoc.backend.datadoc_metadata import DataDocMetadata from datadoc.backend.statistic_subject_mapping import StatisticSubjectMapping -from datadoc.backend.unit_types import UnitTypes from datadoc.backend.user_info import TestUserInfo from tests.backend.test_statistic_subject_mapping import ( STATISTICAL_SUBJECT_STRUCTURE_DIR, @@ -33,7 +33,7 @@ from .utils import TEST_PARQUET_FILEPATH from .utils import TEST_RESOURCES_DIRECTORY -UNIT_TYPES_DIR = "unit_types" +CODE_LIST_DIR = "code_list" if TYPE_CHECKING: from pytest_mock import MockerFixture @@ -204,45 +204,45 @@ def subject_mapping_http_exception( @pytest.fixture() -def unit_types_csv_filepath_nb() -> pathlib.Path: - return TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_nb.csv" +def code_list_csv_filepath_nb() -> pathlib.Path: + return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nb.csv" @pytest.fixture() -def unit_types_csv_filepath_nn() -> pathlib.Path: - return TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_nn.csv" +def code_list_csv_filepath_nn() -> pathlib.Path: + return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nn.csv" @pytest.fixture() -def unit_types_csv_filepath_en() -> pathlib.Path: - return TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_en.csv" +def code_list_csv_filepath_en() -> pathlib.Path: + return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_en.csv" @pytest.fixture() def _mock_fetch_dataframe( mocker, - unit_types_csv_filepath_nb: pathlib.Path, - unit_types_csv_filepath_nn: pathlib.Path, - unit_types_csv_filepath_en: pathlib.Path, + code_list_csv_filepath_nb: pathlib.Path, + code_list_csv_filepath_nn: pathlib.Path, + code_list_csv_filepath_en: pathlib.Path, ) -> None: - def fake_unit_types() -> dict[str, pd.DataFrame]: + def fake_code_list() -> dict[str, pd.DataFrame]: return { - "nb": pd.read_csv(unit_types_csv_filepath_nb, converters={"code": str}), - "nn": pd.read_csv(unit_types_csv_filepath_nn, converters={"code": str}), - "en": pd.read_csv(unit_types_csv_filepath_en, converters={"code": str}), + "nb": pd.read_csv(code_list_csv_filepath_nb, converters={"code": str}), + "nn": pd.read_csv(code_list_csv_filepath_nn, converters={"code": str}), + "en": pd.read_csv(code_list_csv_filepath_en, converters={"code": str}), } mocker.patch( - "datadoc.backend.unit_types.UnitTypes._fetch_data_from_external_source", - functools.partial(fake_unit_types), + "datadoc.backend.code_list.CodeList._fetch_data_from_external_source", + functools.partial(fake_code_list), ) @pytest.fixture() -def unit_types_fake_structure( +def code_list_fake_structure( _mock_fetch_dataframe, -) -> UnitTypes: - return UnitTypes(100) +) -> CodeList: + return CodeList(100) @pytest.fixture() diff --git a/tests/frontend/callbacks/test_dataset_callbacks.py b/tests/frontend/callbacks/test_dataset_callbacks.py index d5e029b5..cb8057cb 100644 --- a/tests/frontend/callbacks/test_dataset_callbacks.py +++ b/tests/frontend/callbacks/test_dataset_callbacks.py @@ -27,8 +27,8 @@ from tests.utils import TEST_PARQUET_FILEPATH if TYPE_CHECKING: + from datadoc.backend.code_list import CodeList from datadoc.backend.statistic_subject_mapping import StatisticSubjectMapping - from datadoc.backend.unit_types import UnitTypes from datadoc.frontend.callbacks.utils import MetadataInputTypes DATASET_CALLBACKS_MODULE = "datadoc.frontend.callbacks.dataset" @@ -122,7 +122,7 @@ def file_path(): ( DatasetIdentifiers.OWNER, "Seksjon for dataplattform", - enums.LanguageStringType(nb="Seksjon for dataplattform"), + "Seksjon for dataplattform", ), ], ) @@ -243,7 +243,7 @@ def test_update_dataset_metadata_language_enums(): @pytest.mark.parametrize("language", list(SupportedLanguages)) def test_change_language_dataset_metadata_options_enums( subject_mapping_fake_statistical_structure: StatisticSubjectMapping, - unit_types_fake_structure: UnitTypes, + code_list_fake_structure: CodeList, metadata: DataDocMetadata, enum_for_options: LanguageStringsEnum, language: SupportedLanguages, @@ -251,7 +251,8 @@ def test_change_language_dataset_metadata_options_enums( state.metadata = metadata state.current_metadata_language = SupportedLanguages.NORSK_BOKMÅL state.statistic_subject_mapping = subject_mapping_fake_statistical_structure - state.unit_types = unit_types_fake_structure + state.unit_types = code_list_fake_structure + state.organisational_units = code_list_fake_structure value = change_language_dataset_metadata(language) for options in cast(list[list[dict[str, str]]], value[0:-1]): diff --git a/tests/frontend/fields/test_display_dataset.py b/tests/frontend/fields/test_display_dataset.py index d11c7d86..99888e3f 100644 --- a/tests/frontend/fields/test_display_dataset.py +++ b/tests/frontend/fields/test_display_dataset.py @@ -4,10 +4,10 @@ from datadoc.enums import SupportedLanguages from datadoc.frontend.fields.display_dataset import get_statistical_subject_options from datadoc.frontend.fields.display_dataset import get_unit_type_options +from tests.backend.test_code_list import CODE_LIST_DIR from tests.backend.test_statistic_subject_mapping import ( STATISTICAL_SUBJECT_STRUCTURE_DIR, ) -from tests.backend.test_unit_types import UNIT_TYPES_DIR from tests.utils import TEST_RESOURCES_DIRECTORY @@ -48,10 +48,10 @@ def test_get_statistical_subject_options( @pytest.mark.parametrize( - ("unit_types_csv_filepath_nb", "expected"), + ("code_list_csv_filepath_nb", "expected"), [ ( - TEST_RESOURCES_DIRECTORY / UNIT_TYPES_DIR / "unit_types_nb.csv", + TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nb.csv", [ {"label": "Adresse", "value": "01"}, {"label": "Arbeidsulykke", "value": "02"}, @@ -61,9 +61,9 @@ def test_get_statistical_subject_options( ], ) def test_get_unit_type_options( - unit_types_fake_structure, + code_list_fake_structure, expected, ): - state.unit_types = unit_types_fake_structure + state.unit_types = code_list_fake_structure state.unit_types.wait_for_external_result() assert get_unit_type_options(SupportedLanguages.NORSK_BOKMÅL) == expected diff --git a/tests/resources/unit_types/unit_types.csv b/tests/resources/code_list/code_list.csv similarity index 100% rename from tests/resources/unit_types/unit_types.csv rename to tests/resources/code_list/code_list.csv diff --git a/tests/resources/unit_types/unit_types_en.csv b/tests/resources/code_list/code_list_en.csv similarity index 100% rename from tests/resources/unit_types/unit_types_en.csv rename to tests/resources/code_list/code_list_en.csv diff --git a/tests/resources/unit_types/unit_types_nb.csv b/tests/resources/code_list/code_list_nb.csv similarity index 100% rename from tests/resources/unit_types/unit_types_nb.csv rename to tests/resources/code_list/code_list_nb.csv diff --git a/tests/resources/unit_types/unit_types_nn.csv b/tests/resources/code_list/code_list_nn.csv similarity index 100% rename from tests/resources/unit_types/unit_types_nn.csv rename to tests/resources/code_list/code_list_nn.csv diff --git a/tests/resources/unit_types/empty.csv b/tests/resources/code_list/empty.csv similarity index 100% rename from tests/resources/unit_types/empty.csv rename to tests/resources/code_list/empty.csv diff --git a/tests/resources/unit_types/no_code.csv b/tests/resources/code_list/no_code.csv similarity index 100% rename from tests/resources/unit_types/no_code.csv rename to tests/resources/code_list/no_code.csv diff --git a/tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json index c3f9ff55..724976b0 100644 --- a/tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json +++ b/tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json @@ -20,7 +20,11 @@ "subject_field": null, "spatial_coverage_description": null, "id": null, - "owner": null, + "owner": { + "en": "", + "nn": "", + "nb": "724" + }, "data_source_path": "klargjorte_data/person_data_v1.parquet", "created_date": "2022-07-13T17:28:01.617657", "created_by": "default_user@ssb.no", diff --git a/tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json index f23018ac..6b062b2e 100644 --- a/tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json +++ b/tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json @@ -46,7 +46,11 @@ "nb": "Norge" }, "id": "143fca77-ef56-419c-a1e1-d69c4199f020", - "owner": null, + "owner": { + "en": "", + "nn": "", + "nb": "722" + }, "data_source_path": "klargjorte_data/person_data_v1.parquet", "metadata_created_date": "2022-09-05T13:07:14.066023", "metadata_created_by": "default_user@ssb.no", diff --git a/tests/resources/existing_metadata_file/compatibility/v2_1_0/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/compatibility/v2_1_0/person_data_v1__DOC.json new file mode 100644 index 00000000..908e7752 --- /dev/null +++ b/tests/resources/existing_metadata_file/compatibility/v2_1_0/person_data_v1__DOC.json @@ -0,0 +1,257 @@ +{ + "percentage_complete": 98, + "document_version": "2.1.0", + "dataset": { + "short_name": "person_data_v1", + "assessment": "SENSITIVE", + "dataset_status": "DRAFT", + "dataset_state": "PROCESSED_DATA", + "name": { + "en": "successfully_read_existing_file", + "nn": "", + "nb": "Persondata" + }, + "description": { + "en": "", + "nn": "", + "nb": "" + }, + "data_source": { + "en": "", + "nn": "", + "nb": "Skatteetaten" + }, + "register_uri": null, + "population_description": { + "en": "", + "nn": "", + "nb": "Norsk befolkning" + }, + "version": "1", + "version_description": null, + "unit_type": "PERSON", + "temporality_type": "STATUS", + "subject_field": { + "en": "", + "nn": "", + "nb": "Skatt" + }, + "keyword": [ + "Skatt", + "Person", + "Helsepenger" + ], + "spatial_coverage_description": { + "en": "", + "nn": "", + "nb": "Norge" + }, + "id": "143fca77-ef56-419c-a1e1-d69c4199f020", + "owner": { + "en": "", + "nn": "", + "nb": "723" + }, + "file_path": null, + "metadata_created_date": "2022-09-05T11:07:14Z", + "metadata_created_by": "default_user@ssb.no", + "metadata_last_updated_date": "2024-01-08T15:41:05.681664Z", + "metadata_last_updated_by": "default_user@ssb.no", + "contains_data_from": "2010-09-05", + "contains_data_until": "2022-09-05" + }, + "variables": [ + { + "short_name": "pers_id", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "STRING", + "variable_role": "IDENTIFIER", + "definition_uri": "https://www.ssb.no/a/metadata/conceptvariable/vardok/26/nb", + "direct_person_identifying": true, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + }, + { + "short_name": "tidspunkt", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "DATETIME", + "variable_role": "START_TIME", + "definition_uri": null, + "direct_person_identifying": false, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + }, + { + "short_name": "sivilstand", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "STRING", + "variable_role": "MEASURE", + "definition_uri": "https://www.ssb.no/a/metadata/conceptvariable/vardok/91/nb", + "direct_person_identifying": false, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + }, + { + "short_name": "alm_inntekt", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "INTEGER", + "variable_role": "MEASURE", + "definition_uri": "https://www.ssb.no/a/metadata/conceptvariable/vardok/1438/nb", + "direct_person_identifying": false, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + }, + { + "short_name": "sykepenger", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "INTEGER", + "variable_role": "MEASURE", + "definition_uri": "https://www.ssb.no/metadata/conceptvariable/vardok/3366/nb", + "direct_person_identifying": false, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + }, + { + "short_name": "ber_bruttoformue", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "INTEGER", + "variable_role": "MEASURE", + "definition_uri": "https://www.ssb.no/metadata/conceptvariable/vardok/3327/nb", + "direct_person_identifying": false, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + }, + { + "short_name": "fullf_utdanning", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "STRING", + "variable_role": "MEASURE", + "definition_uri": "https://www.ssb.no/metadata/conceptvariable/vardok/3242/nb", + "direct_person_identifying": false, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + }, + { + "short_name": "hoveddiagnose", + "name": { + "en": "", + "nn": "", + "nb": "g" + }, + "data_type": "STRING", + "variable_role": "MEASURE", + "definition_uri": "https://www.ssb.no/metadata/conceptvariable/vardok/2578/nb", + "direct_person_identifying": false, + "data_source": null, + "population_description": null, + "comment": null, + "temporality_type": null, + "measurement_unit": null, + "format": null, + "classification_uri": null, + "sentinel_value_uri": null, + "invalid_value_description": null, + "id": null, + "contains_data_from": null, + "contains_data_until": null + } + ] + } diff --git a/tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json index 653c2a9c..b7eccdcd 100644 --- a/tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json +++ b/tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json @@ -2,7 +2,7 @@ "document_version": "0.0.1", "datadoc": { "percentage_complete": 6, - "document_version": "2.1.0", + "document_version": "2.2.0", "dataset": { "short_name": "person_data_v1", "assessment": "OPEN", diff --git a/tests/resources/existing_metadata_file/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/person_data_v1__DOC.json index ed7dcdc3..71ade1b8 100644 --- a/tests/resources/existing_metadata_file/person_data_v1__DOC.json +++ b/tests/resources/existing_metadata_file/person_data_v1__DOC.json @@ -2,7 +2,7 @@ "document_version": "0.0.1", "datadoc": { "percentage_complete": 98, - "document_version": "2.1.0", + "document_version": "2.2.0", "dataset": { "short_name": "person_data_v1", "assessment": "SENSITIVE", @@ -49,7 +49,7 @@ "nb": "Norge" }, "id": "143fca77-ef56-419c-a1e1-d69c4199f020", - "owner": null, + "owner": "NNN", "file_path": null, "metadata_created_date": "2022-09-05T11:07:14Z", "metadata_created_by": "default_user@ssb.no", diff --git a/tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json index 90a178bd..0662d9bb 100644 --- a/tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json +++ b/tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json @@ -2,7 +2,7 @@ "document_version": "0.0.1", "datadoc": { "percentage_complete": 6, - "document_version": "2.1.0", + "document_version": "2.2.0", "dataset": { "short_name": "person_data_v1", "assessment": "OPEN", diff --git a/tests/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json b/tests/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json index e8e4bc98..54947ae6 100644 --- a/tests/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json +++ b/tests/resources/klargjorte_data/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json @@ -2,7 +2,7 @@ "document_version": "0.0.1", "datadoc": { "percentage_complete": 97, - "document_version": "2.1.0", + "document_version": "2.2.0", "dataset": { "short_name": "person_testdata_p2021-12-31_p2021-12-31", "assessment": "PROTECTED", @@ -52,11 +52,7 @@ "nb": "Norge" }, "id": "2f72477a-f051-43ee-bf8b-0d8f47b5e0a7", - "owner": { - "en": "", - "nn": "", - "nb": "Seksjon NNN" - }, + "owner": "722", "file_path": null, "metadata_created_date": "2022-10-07T07:35:01Z", "metadata_created_by": "default_user@ssb.no", diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 91f5300d..c3a1b51d 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -4,9 +4,9 @@ from datadoc.app import get_app -def test_get_app(subject_mapping_fake_statistical_structure, unit_types_fake_structure): +def test_get_app(subject_mapping_fake_statistical_structure, code_list_fake_structure): state.statistic_subject_mapping = subject_mapping_fake_statistical_structure - state.unit_types = unit_types_fake_structure + state.code_list = code_list_fake_structure app, _ = get_app() assert app.config["name"] == "Datadoc"