diff --git a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py index f5f8a446..3a655b1e 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py @@ -112,6 +112,37 @@ def apply_validation( # Return validation report return report + def extract_site_visit_id_to_site_id_map( + self, + data: base.types.ReadableType, + ) -> dict[str, str]: + """Constructs a dictionary mapping site visit id to site id. + + Args: + data: Raw data to be mapped. + + Returns: + Map with site visit id for keys and site id for values. + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource(source=data, format="csv", schema=schema, encoding="utf-8") + + # Declare result reference + result: dict[str, str] = {} + + # Context manager for row streaming + with resource.open() as r: + for row in r.row_stream: + # Check that the cells have values and add to map + if (svid := row["siteVisitID"]) is not None and (sid := row["siteID"]) is not None: + result[svid] = sid + + # Return + return result + def extract_temporal_defaults( self, data: base.types.ReadableType, diff --git a/tests/templates/test_survey_site_visit_data_v2.py b/tests/templates/test_survey_site_visit_data_v2.py index ca96f3e5..5cc5d0e1 100644 --- a/tests/templates/test_survey_site_visit_data_v2.py +++ b/tests/templates/test_survey_site_visit_data_v2.py @@ -107,7 +107,9 @@ def test_add_temporal_coverage_node( assert valid, report -class TestExtractTemporalDefaults: +class TestMapExtractors: + """Tests for the key value extraction methods.""" + def test_extract_temporal_defaults( self, mapper: mapping.SurveySiteVisitMapper, @@ -195,6 +197,83 @@ def test_extract_temporal_defaults( assert actual == expected mocked_schema.assert_called_once() + def test_extract_site_visit_id_to_site_id_map( + self, + mapper: mapping.SurveySiteVisitMapper, + mocker: pytest_mock.MockerFixture, + ) -> None: + """Tests the extract_site_visit_id_to_site_id_map method. + + Args: + mapper: Mapper instance fixture. + mocker: The mocker fixture. + """ + # Retrieve actual descriptor + original_descriptor = mapping.SurveySiteVisitMapper.schema() + + # Define fields of relevance for tests + fieldnames = ["siteID", "siteVisitID"] + + # Make descriptor only include these fields + descriptor = { + **original_descriptor, + "fields": [f for f in original_descriptor["fields"] if f["name"] in fieldnames], + } + + # Patch schema + mocked_schema = mocker.patch.object(mapping.SurveySiteVisitMapper, "schema", return_value=descriptor) + + # Declare some raw data + expected_rows: list[dict[str, str | None]] = [ + { + "siteID": "S1", + "siteVisitID": "SV1", + }, + { + "siteID": "S1", + "siteVisitID": "SV2", + }, + ] + excluded_rows: list[dict[str, str | None]] = [ + # The map should exclude these since there are no + # values for siteID + { + "siteID": "", + "siteVisitID": "SV3", + }, + { + "siteID": None, + "siteVisitID": "SV4", + }, + # map should exclude these because there is no siteVisitID + { + "siteID": "S2", + "siteVisitID": "", + }, + { + "siteID": "S3", + "siteVisitID": None, + }, + ] + # Construct expected map + expected = {r["siteVisitID"]: r["siteID"] for r in expected_rows} + + # Create raw data csv string + with io.StringIO() as output: + csv_writer = csv.DictWriter(output, fieldnames=expected_rows[0].keys()) + csv_writer.writeheader() + + for row in expected_rows + excluded_rows: + csv_writer.writerow(row) + + csv_data = output.getvalue().encode("utf-8") + # Invoke + actual = mapper.extract_site_visit_id_to_site_id_map(csv_data) + + # Assert + assert actual == expected + mocked_schema.assert_called_once() + class TestApplyValidation: @pytest.fixture(scope="class")