diff --git a/poetry.lock b/poetry.lock index 22686273..9acd1420 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5250,4 +5250,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "0e17893f396e1e140d092af6b293b4aa3d08214c6e02e40353e0a2e400cca0fb" +content-hash = "680594f088f20a75afe08bf2d6b3572326217db8c163887d417f15e774636857" diff --git a/src/datadoc/frontend/callbacks/register_callbacks.py b/src/datadoc/frontend/callbacks/register_callbacks.py index 0ada7de5..ca03ff99 100644 --- a/src/datadoc/frontend/callbacks/register_callbacks.py +++ b/src/datadoc/frontend/callbacks/register_callbacks.py @@ -22,9 +22,7 @@ from datadoc.frontend.callbacks.dataset import accept_dataset_metadata_input from datadoc.frontend.callbacks.dataset import open_dataset_handling from datadoc.frontend.callbacks.utils import render_tabs -from datadoc.frontend.callbacks.validation_utils import ( - save_metadata_and_generate_alerts, -) +from datadoc.frontend.callbacks.utils import save_metadata_and_generate_alerts from datadoc.frontend.callbacks.variables import accept_variable_metadata_date_input from datadoc.frontend.callbacks.variables import accept_variable_metadata_input from datadoc.frontend.callbacks.variables import populate_variables_workspace @@ -73,7 +71,7 @@ def callback_save_metadata_file( If none return no_update. """ if n_clicks and n_clicks > 0: - return save_metadata_and_generate_alerts() + return save_metadata_and_generate_alerts(state.metadata) return no_update diff --git a/src/datadoc/frontend/callbacks/utils.py b/src/datadoc/frontend/callbacks/utils.py index 044eaf51..3d2d53d2 100644 --- a/src/datadoc/frontend/callbacks/utils.py +++ b/src/datadoc/frontend/callbacks/utils.py @@ -4,16 +4,22 @@ import datetime import logging +import warnings from typing import TYPE_CHECKING from typing import TypeAlias import arrow import ssb_dash_components as ssb +from dapla_metadata.datasets import Datadoc +from dapla_metadata.datasets import ObligatoryDatasetWarning +from dapla_metadata.datasets import ObligatoryVariableWarning from dapla_metadata.datasets import model from dash import html from datadoc import config from datadoc import state +from datadoc.frontend.components.builders import AlertTypes +from datadoc.frontend.components.builders import build_ssb_alert from datadoc.frontend.components.identifiers import ACCORDION_WRAPPER_ID from datadoc.frontend.components.identifiers import SECTION_WRAPPER_ID from datadoc.frontend.components.identifiers import VARIABLES_INFORMATION_ID @@ -226,3 +232,42 @@ def render_tabs(tab: str) -> html.Article | None: ) return None + + +def save_metadata_and_generate_alerts(metadata: Datadoc) -> list: + """Save the metadata document to disk and check obligatory metadata. + + Returns: + List of alerts including obligatory metadata warnings if missing, + and success alert if metadata is saved correctly. + """ + from datadoc.frontend.callbacks.dataset import dataset_control + from datadoc.frontend.callbacks.variables import variables_control + + missing_obligatory_dataset = "" + missing_obligatory_variables = "" + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + metadata.write_metadata_document() + success_alert = build_ssb_alert( + AlertTypes.SUCCESS, + "Lagret metadata", + ) + + for warning in w: + if issubclass(warning.category, ObligatoryDatasetWarning): + missing_obligatory_dataset = str(warning.message) + elif issubclass(warning.category, ObligatoryVariableWarning): + missing_obligatory_variables = str(warning.message) + else: + logger.warning( + "An unexpected warning was caught: %s", + warning.message, + ) + + return [ + success_alert, + dataset_control(missing_obligatory_dataset), + variables_control(missing_obligatory_variables, metadata.variables), + ] diff --git a/src/datadoc/frontend/callbacks/validation_utils.py b/src/datadoc/frontend/callbacks/validation_utils.py deleted file mode 100644 index ff5bff4d..00000000 --- a/src/datadoc/frontend/callbacks/validation_utils.py +++ /dev/null @@ -1,51 +0,0 @@ -"""Functions which can't be placed in utils.py because of circular imports.""" - -import logging -import warnings - -from dapla_metadata.datasets import ObligatoryDatasetWarning -from dapla_metadata.datasets import ObligatoryVariableWarning - -from datadoc import state -from datadoc.frontend.callbacks.dataset import dataset_control -from datadoc.frontend.callbacks.variables import variables_control -from datadoc.frontend.components.builders import AlertTypes -from datadoc.frontend.components.builders import build_ssb_alert - -logger = logging.getLogger(__name__) - - -def save_metadata_and_generate_alerts() -> list: - """Save the metadata document to disk and check obligatory metadata. - - Returns: - List of alerts including obligatory metadata warnings if missing, - and success alert if metadata is saved correctly. - """ - missing_obligatory_dataset = "" - missing_obligatory_variables = "" - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - state.metadata.write_metadata_document() - success_alert = build_ssb_alert( - AlertTypes.SUCCESS, - "Lagret metadata", - ) - - for warning in w: - if issubclass(warning.category, ObligatoryDatasetWarning): - missing_obligatory_dataset = str(warning.message) - elif issubclass(warning.category, ObligatoryVariableWarning): - missing_obligatory_variables = str(warning.message) - else: - logger.warning( - "An unexpected warning was caught: %s", - warning.message, - ) - - return [ - success_alert, - dataset_control(missing_obligatory_dataset), - variables_control(missing_obligatory_variables), - ] diff --git a/src/datadoc/frontend/callbacks/variables.py b/src/datadoc/frontend/callbacks/variables.py index 858f89d6..f4258803 100644 --- a/src/datadoc/frontend/callbacks/variables.py +++ b/src/datadoc/frontend/callbacks/variables.py @@ -370,7 +370,7 @@ def _get_dict_by_key( return next((item for item in metadata_list if key in item), None) -def variables_control(error_message: str | None) -> dbc.Alert | None: +def variables_control(error_message: str | None, variables: list) -> dbc.Alert | None: """Check obligatory metadata for variables and return an alert if any metadata is missing. This function parses an error message to identify missing obligatory metadata @@ -379,13 +379,15 @@ def variables_control(error_message: str | None) -> dbc.Alert | None: Args: error_message: A message generated by ObligatoryVariableWarning containing the variable short name and a list of field names with missing values. + variables: list of datadoc variables Returns: An alert object if there are missing metadata fields, otherwise None. """ missing_metadata: list = [] error_message_parsed = _parse_error_message(str(error_message)) - for variable in state.metadata.variables: + # for variable in state.metadata.variables: + for variable in variables: if error_message_parsed: fields_by_variable = _get_dict_by_key( error_message_parsed, diff --git a/tests/conftest.py b/tests/conftest.py index 33464a5e..00a7bd01 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,9 @@ from datadoc import state +from .utils import TEST_EKS_PARQUET from .utils import TEST_EXISTING_METADATA_DIRECTORY +from .utils import TEST_EXISTING_METADATA_NAMING_STANDARD_FILEPATH_NO_MISSING from .utils import TEST_PARQUET_FILE_NAME from .utils import TEST_PARQUET_FILEPATH from .utils import TEST_RESOURCES_DIRECTORY @@ -89,6 +91,21 @@ def metadata( ) +@pytest.fixture() +def metadata_3( + _mock_timestamp: None, + _mock_user_info: None, + subject_mapping_fake_statistical_structure: StatisticSubjectMapping, + tmp_path: Path, +) -> Datadoc: + shutil.copy(TEST_PARQUET_FILEPATH, tmp_path / TEST_EKS_PARQUET) + return Datadoc( + None, + str(TEST_EXISTING_METADATA_NAMING_STANDARD_FILEPATH_NO_MISSING), + statistic_subject_mapping=subject_mapping_fake_statistical_structure, + ) + + @pytest.fixture() def existing_metadata_path() -> Path: return TEST_EXISTING_METADATA_DIRECTORY diff --git a/tests/frontend/callbacks/test_callbacks_validation_utils.py b/tests/frontend/callbacks/test_callbacks_validation_utils.py new file mode 100644 index 00000000..74cffd60 --- /dev/null +++ b/tests/frontend/callbacks/test_callbacks_validation_utils.py @@ -0,0 +1,64 @@ +# def test_validation_utils() + + +from unittest import mock + +import dash_bootstrap_components as dbc +from dapla_metadata.datasets import Datadoc + +from datadoc import state +from datadoc.frontend.callbacks.utils import save_metadata_and_generate_alerts +from datadoc.frontend.components.builders import AlertTypes +from datadoc.frontend.components.builders import build_ssb_alert + + +# if none metadata missing: only save alert +# if dataset missing -> +# if variables missing -> +# if another warning -> +# if not n_clicks ? +def test_save_and_validate(metadata: Datadoc, mocker): + # if n_clicks and n_clicks > 0 ? + + success_alert = build_ssb_alert( + AlertTypes.SUCCESS, + "Lagret metadata", + ) + state.metadata = metadata + + mocker.patch( + "datadoc.frontend.callbacks.utils.save_metadata_and_generate_alerts", + return_value=success_alert, + ) + output = save_metadata_and_generate_alerts(metadata) + assert isinstance(output, list) + num_list_of_alerts = 3 + assert len(output) == num_list_of_alerts + assert output[1] is not None + + +def test_with_mock_patch(metadata_3): + state.metadata = metadata_3 + result = save_metadata_and_generate_alerts(metadata_3) + num_list_of_alerts = 3 + assert len(result) == num_list_of_alerts + assert result[1] is None + assert result[2] is None + + +def test_1(): + mock_metadata = mock.Mock() + mock_metadata.variables = [ + "var1", + "var2", + ] + state.metadata = mock_metadata + + result = save_metadata_and_generate_alerts( + mock_metadata, + ) + + num_list_of_alerts = 3 + assert len(result) == num_list_of_alerts + assert result[2] is None + assert isinstance(result[0], dbc.Alert) diff --git a/tests/frontend/callbacks/test_validation_utils.py b/tests/frontend/callbacks/test_validation_utils.py deleted file mode 100644 index b8462768..00000000 --- a/tests/frontend/callbacks/test_validation_utils.py +++ /dev/null @@ -1 +0,0 @@ -# def test_validation_utils() diff --git a/tests/frontend/callbacks/test_variables_callbacks.py b/tests/frontend/callbacks/test_variables_callbacks.py index 3a589e81..656b7c62 100644 --- a/tests/frontend/callbacks/test_variables_callbacks.py +++ b/tests/frontend/callbacks/test_variables_callbacks.py @@ -591,7 +591,7 @@ def test_variables_metadata_control_return_alert(metadata: Datadoc): state.metadata.write_metadata_document() if issubclass(w[1].category, ObligatoryVariableWarning): missing_metadata = str(w[1].message) - result = variables_control(missing_metadata) + result = variables_control(missing_metadata, metadata.variables) assert isinstance(result, dbc.Alert) @@ -632,5 +632,5 @@ def test_variables_metadata_control_dont_return_alert(metadata: Datadoc): state.metadata.write_metadata_document() if issubclass(w[0].category, ObligatoryVariableWarning): missing_metadata = str(w[0].message) - result = variables_control(missing_metadata) + result = variables_control(missing_metadata, metadata.variables) assert result is None diff --git a/tests/resources/existing_metadata_file/test_p2021-12-31_p2021-12-31_v1__DOC.json b/tests/resources/existing_metadata_file/test_p2021-12-31_p2021-12-31_v1__DOC.json new file mode 100644 index 00000000..93351d7d --- /dev/null +++ b/tests/resources/existing_metadata_file/test_p2021-12-31_p2021-12-31_v1__DOC.json @@ -0,0 +1,402 @@ +{ + "document_version": "0.0.1", + "datadoc": { + "percentage_complete": 100, + "document_version": "4.0.0", + "dataset": { + "short_name": "person_testdata", + "assessment": "PROTECTED", + "dataset_status": "DRAFT", + "dataset_state": "PROCESSED_DATA", + "name": [ + { + "languageCode": "nb", + "languageText": "Test persondata" + }, + { + "languageCode": "nn", + "languageText": "Test persondata" + }, + { + "languageCode": "en", + "languageText": "Test personal data" + } + ], + "description": [ + { + "languageCode": "nb", + "languageText": "Data er kun for test formål" + }, + { + "languageCode": "nn", + "languageText": "Data er kun for test formål" + }, + { + "languageCode": "en", + "languageText": "For testing purposes only" + } + ], + "data_source": "23", + "population_description": [ + { + "languageCode": "nb", + "languageText": "Syntetisk" + }, + { + "languageCode": "nn", + "languageText": "Syntetisk" + }, + { + "languageCode": "en", + "languageText": "Synthetic" + } + ], + "version": "1", + "version_description": [ + { + "languageCode": "nb", + "languageText": "Initiell versjon" + }, + { + "languageCode": "nn", + "languageText": "Initiell versjon" + }, + { + "languageCode": "en", + "languageText": "Initial version" + } + ], + "unit_type": "17", + "temporality_type": "ACCUMULATED", + "subject_field": "al04", + "keyword": [ + "sykepenger", + "inntekt" + ], + "spatial_coverage_description": [ + { + "languageCode": "nb", + "languageText": "Norge" + }, + { + "languageCode": "nn", + "languageText": "Noreg" + }, + { + "languageCode": "en", + "languageText": "Norway" + } + ], + "contains_personal_data": true, + "use_restriction": "PROCESS_LIMITATIONS", + "use_restriction_date": "2024-12-31T23:59:59Z", + "custom_type": null, + "id": "77346986-a2b4-4211-8d74-817e8b1f99d4", + "owner": "320", + "file_path": "None", + "metadata_created_date": "2024-07-09T09:47:28.347248Z", + "metadata_created_by": "@cbi", + "metadata_last_updated_date": "2022-01-01T00:00:00Z", + "metadata_last_updated_by": "default_user@ssb.no", + "contains_data_from": "2021-12-31", + "contains_data_until": "2021-12-31" + }, + "variables": [ + { + "short_name": "fnr", + "data_element_path": null, + "name": [ + { + "languageCode": "nb", + "languageText": "Fødselsnummer" + }, + { + "languageCode": "nn", + "languageText": "Fødselsnummer" + }, + { + "languageCode": "en", + "languageText": "Personal number" + } + ], + "data_type": "STRING", + "variable_role": "MEASURE", + "definition_uri": null, + "is_personal_data": "PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA", + "data_source": "23", + "population_description": [ + { + "languageCode": "nb", + "languageText": "Syntetisk" + }, + { + "languageCode": "nn", + "languageText": "Syntetisk" + }, + { + "languageCode": "en", + "languageText": "Synthetic" + } + ], + "comment": null, + "temporality_type": "ACCUMULATED", + "measurement_unit": null, + "multiplication_factor": null, + "format": null, + "classification_uri": null, + "special_value": null, + "invalid_value_description": null, + "custom_type": null, + "id": "3ba32371-0900-4b3e-b67a-6907146f86fb", + "contains_data_from": "2021-12-31", + "contains_data_until": "2021-12-31" + }, + { + "short_name": "sivilstand", + "data_element_path": null, + "name": [ + { + "languageCode": "nb", + "languageText": "Sivilstand" + }, + { + "languageCode": "nn", + "languageText": "Sivilstand" + }, + { + "languageCode": "en", + "languageText": "Marital status" + } + ], + "data_type": "STRING", + "variable_role": "MEASURE", + "definition_uri": null, + "is_personal_data": "NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA", + "data_source": "23", + "population_description": [ + { + "languageCode": "nb", + "languageText": "Syntetisk" + }, + { + "languageCode": "nn", + "languageText": "Syntetisk" + }, + { + "languageCode": "en", + "languageText": "Synthetic" + } + ], + "comment": null, + "temporality_type": "ACCUMULATED", + "measurement_unit": null, + "multiplication_factor": null, + "format": null, + "classification_uri": null, + "special_value": null, + "invalid_value_description": null, + "custom_type": null, + "id": "9ac9002d-84de-4d10-8d08-7e1e3b592bcf", + "contains_data_from": "2021-12-31", + "contains_data_until": "2021-12-31" + }, + { + "short_name": "bostedskommune", + "data_element_path": null, + "name": [ + { + "languageCode": "nb", + "languageText": "Bostedskommune" + }, + { + "languageCode": "nn", + "languageText": "Bostedskommune" + }, + { + "languageCode": "en", + "languageText": "County of residence" + } + ], + "data_type": "STRING", + "variable_role": "MEASURE", + "definition_uri": null, + "is_personal_data": "NON_PSEUDONYMISED_ENCRYPTED_PERSONAL_DATA", + "data_source": "23", + "population_description": [ + { + "languageCode": "nb", + "languageText": "Syntetisk" + }, + { + "languageCode": "nn", + "languageText": "Syntetisk" + }, + { + "languageCode": "en", + "languageText": "Synthetic" + } + ], + "comment": null, + "temporality_type": "ACCUMULATED", + "measurement_unit": null, + "multiplication_factor": null, + "format": null, + "classification_uri": null, + "special_value": null, + "invalid_value_description": null, + "custom_type": null, + "id": "24788fd2-3891-4326-a051-d113d91ebe92", + "contains_data_from": "2021-12-31", + "contains_data_until": "2021-12-31" + }, + { + "short_name": "inntekt", + "data_element_path": null, + "name": [ + { + "languageCode": "nb", + "languageText": "Inntekt" + }, + { + "languageCode": "nn", + "languageText": "Inntekt" + }, + { + "languageCode": "en", + "languageText": "Income" + } + ], + "data_type": "INTEGER", + "variable_role": "MEASURE", + "definition_uri": null, + "is_personal_data": "NOT_PERSONAL_DATA", + "data_source": "23", + "population_description": [ + { + "languageCode": "nb", + "languageText": "Syntetisk" + }, + { + "languageCode": "nn", + "languageText": "Syntetisk" + }, + { + "languageCode": "en", + "languageText": "Synthetic" + } + ], + "comment": null, + "temporality_type": "ACCUMULATED", + "measurement_unit": "12.02", + "multiplication_factor": null, + "format": null, + "classification_uri": null, + "special_value": null, + "invalid_value_description": null, + "custom_type": null, + "id": "d6dfe816-9a86-4614-a064-569a73e9a7f9", + "contains_data_from": "2021-12-31", + "contains_data_until": "2021-12-31" + }, + { + "short_name": "bankinnskudd", + "data_element_path": null, + "name": [ + { + "languageCode": "nb", + "languageText": "Bankinnskudd" + }, + { + "languageCode": "nn", + "languageText": "Bankinnskudd" + }, + { + "languageCode": "en", + "languageText": "Bank deposits" + } + ], + "data_type": "INTEGER", + "variable_role": "MEASURE", + "definition_uri": null, + "is_personal_data": "NOT_PERSONAL_DATA", + "data_source": "23", + "population_description": [ + { + "languageCode": "nb", + "languageText": "Syntetisk" + }, + { + "languageCode": "nn", + "languageText": "Syntetisk" + }, + { + "languageCode": "en", + "languageText": "Synthetic" + } + ], + "comment": null, + "temporality_type": "ACCUMULATED", + "measurement_unit": "12.02", + "multiplication_factor": null, + "format": null, + "classification_uri": null, + "special_value": null, + "invalid_value_description": null, + "custom_type": null, + "id": "e445135a-74b2-4a6f-99bc-058be6458802", + "contains_data_from": "2021-12-31", + "contains_data_until": "2021-12-31" + }, + { + "short_name": "dato", + "data_element_path": null, + "name": [ + { + "languageCode": "nb", + "languageText": "Dato" + }, + { + "languageCode": "nn", + "languageText": "Dato" + }, + { + "languageCode": "en", + "languageText": "Date" + } + ], + "data_type": "STRING", + "variable_role": "MEASURE", + "definition_uri": null, + "is_personal_data": "NOT_PERSONAL_DATA", + "data_source": "23", + "population_description": [ + { + "languageCode": "nb", + "languageText": "Syntetisk" + }, + { + "languageCode": "nn", + "languageText": "Syntetisk" + }, + { + "languageCode": "en", + "languageText": "Synthetic" + } + ], + "comment": null, + "temporality_type": "ACCUMULATED", + "measurement_unit": null, + "multiplication_factor": null, + "format": null, + "classification_uri": null, + "special_value": null, + "invalid_value_description": null, + "custom_type": null, + "id": "1da549ab-fe39-4ead-859b-91d137a96ded", + "contains_data_from": "2021-12-31", + "contains_data_until": "2021-12-31" + } + ] + }, + "pseudonymization": null +} diff --git a/tests/utils.py b/tests/utils.py index 8ea1ec9a..a18b96a8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -36,3 +36,10 @@ TEST_COMPATIBILITY_DIRECTORY = TEST_EXISTING_METADATA_DIRECTORY / "compatibility" TEST_PROCESSED_DATA_POPULATION_DIRECTORY = TEST_RESOURCES_DIRECTORY / "klargjorte_data" + + +TEST_EKS_PARQUET = "test_p2021-12-31_p2021-12-31_v1.parquet" + +TEST_EXISTING_METADATA_NAMING_STANDARD_FILEPATH_NO_MISSING = ( + TEST_EXISTING_METADATA_DIRECTORY / "test_p2021-12-31_p2021-12-31_v1__DOC.json" +)