Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add transformer functions and mappers #2

Merged
merged 9 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
working-directory: ./tests
run: |
source ../.venv/bin/activate
pytest
pytest --db-url=${{ secrets.DB_URL }}
- name: flake8 linter
run: |
source ./.venv/bin/activate
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,7 @@ pg_restore -d isb_sesar ./sesardb-schemaonly.dump
```

## Run the main script
`python main.py -d "postgresql+psycopg2://isb_writer:password@localhost/isb_sesar"`
`python main.py -d "postgresql+psycopg2://isb_writer:password@localhost/isb_sesar"`

## Run the test script
`pytest --db-url="postgresql+psycopg2://username:password@DB_HOST/DB_NAME"`
4 changes: 4 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def pytest_addoption(parser):
parser.addoption(
'--db-url', action='store'
)
4 changes: 0 additions & 4 deletions examples/EOI00002H.json
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to formalize these as tests. Will post a link to how we do this in iSamples…

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me know if you have any questions on these.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You would need to make some test files with the raw SESAR source records as well.

Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@
"name": "Evans_Leigh",
"role": "collector"
},
{
"name": "Andra Bobbitt",
"role": "sample owner"
},
{
"name": "TN300",
"role": "sponsor"
Expand Down
4 changes: 0 additions & 4 deletions examples/IEDUT103B.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@
"name": "Andrea Dutton",
"role": "collector"
},
{
"name": "Andrea Dutton",
"role": "sample owner"
},
{
"name": "UF Jamaica 2015 Fieldwork",
"role": "sponsor"
Expand Down
4 changes: 0 additions & 4 deletions examples/IEEJR000M.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@
{
"name": "Jade Star Lackey",
"role": "collector"
},
{
"name": "Evan Ramos",
"role": "sample owner"
}
],
"resultTime": "2021-01-19",
Expand Down
4 changes: 0 additions & 4 deletions examples/IEJEN0040.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@
{
"name": "Miriam Jones",
"role": "collector"
},
{
"name": "Jonathan Nichols",
"role": "sample owner"
}
],
"resultTime": "2017-09-01",
Expand Down
8 changes: 2 additions & 6 deletions examples/IERVTL1I7.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
"label": "Not Provided",
"responsibility": [
{
"contact_information": "[email protected]",
"name": "Watershed Function SFA Data Team",
"contact_information": "[email protected]",
"name": "SLAC SFA",
"role": "sample owner"
},
{
Expand Down Expand Up @@ -50,10 +50,6 @@
"name": "Zach Perzan",
"role": "collector"
},
{
"name": "SLAC SFA",
"role": "sample owner"
},
{
"name": "SLAC-SFA",
"role": "sponsor"
Expand Down
4 changes: 0 additions & 4 deletions examples/ODP02Q1IZ.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@
"hasFeatureOfInterest": "Not Provided",
"label": "ODP Leg 178",
"responsibility": [
{
"name": "Integrated Ocean Drilling Program (TAMU)",
"role": "sample owner"
},
{
"name": "ODP Leg 178",
"role": "sponsor"
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from .sample import Sample

from .Mapper import (
from .mapper import (
AbstractCategoryMapper,
StringPairedCategoryMapper,
StringOrderedCategoryMapper,
Expand Down Expand Up @@ -112,9 +112,7 @@ def has_specimen_categories(self) -> typing.List[str]:
return SpecimenCategoryMetaMapper.categories(sample_type)

def id_string(self) -> str:
return "https://data.isamples.org/digitalsample/{0}/{1}".format(
'igsn', self.sample.igsn
)
return f"https://data.isamples.org/digitalsample/igsn/{self.sample.igsn}"

def _material_type(self) -> str:
if self.sample.classification and self.sample.top_level_classification:
Expand Down Expand Up @@ -158,6 +156,7 @@ def informal_classification(self) -> typing.List[str]:
return [Transformer.NOT_PROVIDED]

def keywords(self) -> typing.List:
# TODO: add more keywords
keyword_arr = []
sample_type = self.sample.sample_type
if sample_type:
Expand Down Expand Up @@ -259,17 +258,17 @@ def produced_by_responsibilities(self) -> list[dict]:
}
responsibilities.append(collector)

if self.sample.orig_owner:
if self.sample.orig_owner.fname.lower() == 'curator':
sample_owner_name = self.sample.orig_owner.lname
else:
sample_owner_name = f"{self.sample.orig_owner.fname} {self.sample.orig_owner.lname}"
if sample_owner_name:
sample_owner = {
"role": "sample owner",
"name": sample_owner_name
}
responsibilities.append(sample_owner)
# if self.sample.orig_owner:
# if self.sample.orig_owner.fname.lower() == 'curator':
# sample_owner_name = self.sample.orig_owner.lname
# else:
# sample_owner_name = f"{self.sample.orig_owner.fname} {self.sample.orig_owner.lname}"
# if sample_owner_name:
# sample_owner = {
# "role": "sample owner",
# "name": sample_owner_name
# }
# responsibilities.append(sample_owner)

if self.sample.cruise_field_prgrm:
cruise_field_prgrm = {
Expand Down Expand Up @@ -601,7 +600,7 @@ class ContextCategoryMetaMapper(AbstractCategoryMetaMapper):
"Liquid>aqueous", "Vent", "Subsurface fluid reservoir"
)
_floodplainAquiferMapper = StringPairedCategoryMapper(
"Liquid>aqueous", "floodplain\, aquifer", "Subsurface fluid reservoir" # noqa: W605
"Liquid>aqueous", "floodplain, aquifer", "Subsurface fluid reservoir" # noqa: W605
)
_creekBankMapper = StringPairedCategoryMapper(
"Sedimentary>GlacialAndOrPaleosol>Rock",
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import click
from isamples_sesar.sqlmodel_database import SQLModelDAO, get_sample_with_igsn
from isamples_sesar.SesarTransformer import Transformer
from isamples_sesar.sesar_transformer import Transformer
import json


Expand Down
206 changes: 206 additions & 0 deletions tests/test_isamples_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import pytest
import json
from sqlmodel import Session, create_engine

from isamples_sesar.sesar_transformer import Transformer
from isamples_sesar.sqlmodel_database import (
get_sample_with_igsn
)


@pytest.fixture
def db_url(request):
return request.config.getoption("--db-url")


@pytest.fixture(name="session")
def session_fixture(db_url):
engine = create_engine(
db_url,
echo=False
)
with Session(engine) as session:
yield session


@pytest.mark.parametrize("igsn", ["10.58052/EOI00002H",
"10.58052/IEDUT103B",
"10.58052/IEEJR000M",
"10.58052/IEJEN0040",
"10.58052/IERVTL1I7",
"10.60471/ODP02Q1IZ"])
def test_example(session: Session, igsn):
# get sample and transform it
sample = get_sample_with_igsn(session, igsn)
assert sample is not None
transformed_test_data = Transformer(sample).transform()

igsn_suffix = igsn.split("/")[1]
json_file = open("examples/"+igsn_suffix+".json")
expected_data = json.load(json_file)
json_file.close()

check_id(transformed_test_data, expected_data)
check_label(transformed_test_data, expected_data)
check_sample_identifier(transformed_test_data, expected_data)
check_description(transformed_test_data, expected_data)
check_context_category(transformed_test_data, expected_data)
check_material_category(transformed_test_data, expected_data)
check_specimen_category(transformed_test_data, expected_data)
check_informal_classification(transformed_test_data, expected_data)
check_keywords(transformed_test_data, expected_data)
check_produced_by_id(transformed_test_data, expected_data)
check_produced_by_label(transformed_test_data, expected_data)
check_produced_by_description(transformed_test_data, expected_data)
check_produced_by_feature(transformed_test_data, expected_data)
check_produced_by_responsibility(transformed_test_data, expected_data)
check_produced_by_time(transformed_test_data, expected_data)
check_sampling_site_description(transformed_test_data, expected_data)
check_sampling_site_label(transformed_test_data, expected_data)
check_sampling_site_place_name(transformed_test_data, expected_data)
check_sampling_site_elevation(transformed_test_data, expected_data)
check_sampling_site_latitude(transformed_test_data, expected_data)
check_sampling_site_longitude(transformed_test_data, expected_data)
check_registrant(transformed_test_data, expected_data)
check_sampling_purpose(transformed_test_data, expected_data)
check_curation_label(transformed_test_data, expected_data)
check_curation_description(transformed_test_data, expected_data)
check_curation_access_constraints(transformed_test_data, expected_data)
check_curation_location(transformed_test_data, expected_data)
check_curation_responsibility(transformed_test_data, expected_data)
check_related_resource(transformed_test_data, expected_data)
check_authorized_by(transformed_test_data, expected_data)
check_complies_with(transformed_test_data, expected_data)


def check_id(test_data, expected_data):
assert test_data["@id"] == expected_data["@id"]


def check_label(test_data, expected_data):
assert test_data["label"] == expected_data["label"]


def check_sample_identifier(test_data, expected_data):
assert test_data["sampleidentifier"] == expected_data["sampleidentifier"]


def check_description(test_data, expected_data):
assert test_data["description"] == expected_data["description"]


def check_context_category(test_data, expected_data):
assert test_data["hasContextCategory"] == expected_data["hasContextCategory"]
# assert test_data["hasContextCategoryConfidence"] == expected_data["hasContextCategoryConfidence"]


def check_material_category(test_data, expected_data):
assert test_data["hasMaterialCategory"] == expected_data["hasMaterialCategory"]
# assert test_data["hasMaterialCategoryConfidence"] == expected_data["hasMaterialCategoryConfidence"]


def check_specimen_category(test_data, expected_data):
assert test_data["hasSpecimenCategory"] == expected_data["hasSpecimenCategory"]
# assert test_data["hasSpecimenCategoryConfidence"] == expected_data["hasSpecimenCategoryConfidence"]


def check_informal_classification(test_data, expected_data):
assert test_data["informalClassification"] == expected_data["informalClassification"]


def check_keywords(test_data, expected_data):
assert test_data["keywords"] == expected_data["keywords"]


def check_produced_by_id(test_data, expected_data):
assert test_data["producedBy"]["@id"] == expected_data["producedBy"]["@id"]


def check_produced_by_label(test_data, expected_data):
assert test_data["producedBy"]["label"] == expected_data["producedBy"]["label"]


def check_produced_by_description(test_data, expected_data):
assert test_data["producedBy"]["description"] == expected_data["producedBy"]["description"]


def check_produced_by_feature(test_data, expected_data):
assert test_data["producedBy"]["hasFeatureOfInterest"] == expected_data["producedBy"]["hasFeatureOfInterest"]


def check_produced_by_responsibility(test_data, expected_data):
assert test_data["producedBy"]["responsibility"] == expected_data["producedBy"]["responsibility"]


def check_produced_by_time(test_data, expected_data):
assert test_data["producedBy"]["resultTime"] == expected_data["producedBy"]["resultTime"]


def check_sampling_site_description(test_data, expected_data):
assert test_data["producedBy"]["samplingSite"]["description"] == \
expected_data["producedBy"]["samplingSite"]["description"]


def check_sampling_site_label(test_data, expected_data):
assert test_data["producedBy"]["samplingSite"]["label"] == \
expected_data["producedBy"]["samplingSite"]["label"]


def check_sampling_site_place_name(test_data, expected_data):
assert test_data["producedBy"]["samplingSite"]["placeName"] == \
expected_data["producedBy"]["samplingSite"]["placeName"]


def check_sampling_site_elevation(test_data, expected_data):
assert test_data["producedBy"]["samplingSite"]["location"]["elevation"] == \
expected_data["producedBy"]["samplingSite"]["location"]["elevation"]


def check_sampling_site_latitude(test_data, expected_data):
assert test_data["producedBy"]["samplingSite"]["location"]["latitude"] == \
expected_data["producedBy"]["samplingSite"]["location"]["latitude"]


def check_sampling_site_longitude(test_data, expected_data):
assert test_data["producedBy"]["samplingSite"]["location"]["longitude"] == \
expected_data["producedBy"]["samplingSite"]["location"]["longitude"]


def check_registrant(test_data, expected_data):
assert test_data["registrant"] == expected_data["registrant"]


def check_sampling_purpose(test_data, expected_data):
assert test_data["samplingPurpose"] == expected_data["samplingPurpose"]


def check_curation_label(test_data, expected_data):
assert test_data["curation"]["label"] == expected_data["curation"]["label"]


def check_curation_description(test_data, expected_data):
assert test_data["curation"]["description"] == expected_data["curation"]["description"]


def check_curation_access_constraints(test_data, expected_data):
assert test_data["curation"]["accessConstraints"] == expected_data["curation"]["accessConstraints"]


def check_curation_location(test_data, expected_data):
assert test_data["curation"]["curationLocation"] == expected_data["curation"]["curationLocation"]


def check_curation_responsibility(test_data, expected_data):
assert test_data["curation"]["responsibility"] == expected_data["curation"]["responsibility"]


def check_related_resource(test_data, expected_data):
assert test_data["relatedResource"] == expected_data["relatedResource"]


def check_authorized_by(test_data, expected_data):
assert test_data["authorizedBy"] == expected_data["authorizedBy"]


def check_complies_with(test_data, expected_data):
assert test_data["compliesWith"] == expected_data["compliesWith"]
3 changes: 0 additions & 3 deletions tests/test_python.py

This file was deleted.

Loading
Loading