Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
* Rename '_get_control_field_general_info' -> '_get_control_field'
* Call private methods directly instead of assigning to temporary variables
* Assign 'crosswalk' variables as Marc class variables
* Rename tests
  • Loading branch information
jonavellecuerdo committed Jul 22, 2024
1 parent e0d7b44 commit 692c090
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 60 deletions.
28 changes: 14 additions & 14 deletions tests/sources/xml/test_marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

def create_marc_source_record_stub(
leader_field_insert: str = "<leader>03282nam 2200721Ki 4500</leader>",
control_field_general_info_insert: str = (
control_field_insert: str = (
'<controlfield tag="008">170906s2016 fr mun| o e zxx d</controlfield>'
),
datafield_insert: str = "",
Expand All @@ -22,7 +22,7 @@ def create_marc_source_record_stub(
Args:
leader_field_insert (str): A string representing a MARC fixed length 'leader'
XML element. Defaults to a dummy value.
control_field_general_info_insert (str): A string representing a MARC fixed length
control_field_insert (str): A string representing a MARC fixed length
'general info control field' (i.e., code 008) XML element.
Defaults to a dummy value.
datafield_insert (str): A string representing a MARC 'datafield' XML element.
Expand All @@ -34,7 +34,7 @@ def create_marc_source_record_stub(
<collection>
<record>
{leader_field_insert}
{control_field_general_info_insert}
{control_field_insert}
<controlfield tag="001">990027185640106761</controlfield>
{datafield_insert}
</record>
Expand All @@ -44,7 +44,7 @@ def create_marc_source_record_stub(
return BeautifulSoup(
xml_string.format(
leader_field_insert=leader_field_insert,
control_field_general_info_insert=control_field_general_info_insert,
control_field_insert=control_field_insert,
datafield_insert=datafield_insert,
),
"xml",
Expand Down Expand Up @@ -798,7 +798,7 @@ def test_get_leader_field_success():
assert Marc._get_leader_field(source_record) == "03282nam 2200721Ki 4500"


def test_get_leader_field_raises_error_if_field_blank():
def test_get_leader_field_raises_skipped_record_event_if_field_blank():
source_record = create_marc_source_record_stub(
leader_field_insert="<leader></leader>"
)
Expand All @@ -809,7 +809,7 @@ def test_get_leader_field_raises_error_if_field_blank():
Marc._get_leader_field(source_record)


def test_get_leader_data_raises_error_if_field_missing():
def test_get_leader_field_raises_skipped_record_event_if_field_missing():
source_record = create_marc_source_record_stub(leader_field_insert="")
with pytest.raises(
SkippedRecordEvent,
Expand All @@ -818,14 +818,14 @@ def test_get_leader_data_raises_error_if_field_missing():
Marc._get_leader_field(source_record)


def test_get_control_field_general_info_success():
def test_get_control_field_success():
source_record = create_marc_source_record_stub()
assert Marc._get_control_field_general_info(source_record) == (
assert Marc._get_control_field(source_record) == (
"170906s2016 fr mun| o e zxx d"
)


def test_get_control_field_general_info_if_field_blank():
def test_get_control_field_raises_skipped_record_event_if_field_blank():
source_record = create_marc_source_record_stub(
control_field_general_info_insert='<controlfield tag="008"></controlfield>'
)
Expand All @@ -835,18 +835,18 @@ def test_get_control_field_general_info_if_field_blank():
'Record skipped because key information is missing: <controlfield tag="008">.'
),
):
Marc._get_control_field_general_info(source_record)
Marc._get_control_field(source_record)


def test_get_control_field_general_info_if_field_missing():
def test_get_control_field_raises_skipped_record_event_if_field_missing():
source_record = create_marc_source_record_stub(control_field_general_info_insert="")
with pytest.raises(
SkippedRecordEvent,
match=(
'Record skipped because key information is missing: <controlfield tag="008">.'
),
):
Marc._get_control_field_general_info(source_record)
Marc._get_control_field(source_record)


def test_get_alternate_titles_success():
Expand Down Expand Up @@ -961,7 +961,7 @@ def test_get_call_numbers_transforms_correctly_if_fields_missing():
assert Marc.get_call_numbers(source_record) is None


def test_marc_record_missing_leader_logs_error(caplog):
def test_marc_record_missing_leader_skips_record(caplog):
marc_xml_records = Marc.parse_source_file(
"tests/fixtures/marc/marc_record_missing_leader.xml"
)
Expand All @@ -971,7 +971,7 @@ def test_marc_record_missing_leader_logs_error(caplog):
assert output_records.skipped_record_count == 1


def test_marc_record_missing_008_logs_error(caplog):
def test_marc_record_missing_008_skips_record(caplog):
marc_xml_records = Marc.parse_source_file(
"tests/fixtures/marc/marc_record_missing_008.xml"
)
Expand Down
90 changes: 44 additions & 46 deletions transmogrifier/sources/xml/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,24 @@
logger = logging.getLogger(__name__)


country_code_crosswalk = load_external_config("config/loc-countries.xml", "xml")

holdings_collection_crosswalk = load_external_config(
"config/holdings_collection_crosswalk.json", "json"
)
holdings_format_crosswalk = load_external_config(
"config/holdings_format_crosswalk.json", "json"
)
holdings_location_crosswalk = load_external_config(
"config/holdings_location_crosswalk.json", "json"
)

language_code_crosswalk = load_external_config("config/loc-languages.xml", "xml")

marc_content_type_crosswalk = load_external_config(
"config/marc_content_type_crosswalk.json", "json"
)


class Marc(XMLTransformer):
"""Marc transformer."""

country_code_crosswalk = load_external_config("config/loc-countries.xml", "xml")
holdings_collection_crosswalk = load_external_config(
"config/holdings_collection_crosswalk.json", "json"
)
holdings_format_crosswalk = load_external_config(
"config/holdings_format_crosswalk.json", "json"
)
holdings_location_crosswalk = load_external_config(
"config/holdings_location_crosswalk.json", "json"
)
language_code_crosswalk = load_external_config("config/loc-languages.xml", "xml")
marc_content_type_crosswalk = load_external_config(
"config/marc_content_type_crosswalk.json", "json"
)

def get_optional_fields(self, source_record: Tag) -> dict | None:
"""
Retrieve optional TIMDEX fields from a MARC XML record.
Expand All @@ -44,10 +40,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
"""
fields: dict = {}

source_record_id = Marc.get_source_record_id(source_record)

leader_field = Marc._get_leader_field(source_record)
control_field_general_info = Marc._get_control_field_general_info(source_record)
source_record_id = self.get_source_record_id(source_record)

# alternate titles
fields["alternate_titles"] = self.get_alternate_titles(source_record)
Expand All @@ -59,8 +52,8 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:

# content_type
if content_type := Marc.json_crosswalk_code_to_name(
leader_field[6:7],
marc_content_type_crosswalk,
self._get_leader_field(source_record)[6:7],
self.marc_content_type_crosswalk,
source_record_id,
"Leader/06",
):
Expand Down Expand Up @@ -138,7 +131,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
fields["contributors"] = contributor_values or None

# dates
publication_year = control_field_general_info[7:11].strip()
publication_year = self._get_control_field(source_record)[7:11].strip()
if validate_date(publication_year, source_record_id):
fields["dates"] = [
timdex.Date(kind="Publication date", value=publication_year)
Expand Down Expand Up @@ -167,19 +160,19 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
)
holding_collection_value = Marc.json_crosswalk_code_to_name(
self.create_subfield_value_string_from_datafield(datafield, ["aa"]),
holdings_collection_crosswalk,
self.holdings_collection_crosswalk,
source_record_id,
"985 $aa",
)
holding_format_value = Marc.json_crosswalk_code_to_name(
self.create_subfield_value_string_from_datafield(datafield, "t"),
holdings_format_crosswalk,
self.holdings_format_crosswalk,
source_record_id,
"985 $t",
)
holding_location_value = Marc.json_crosswalk_code_to_name(
self.create_subfield_value_string_from_datafield(datafield, "i"),
holdings_location_crosswalk,
self.holdings_location_crosswalk,
source_record_id,
"985 $i",
)
Expand Down Expand Up @@ -287,7 +280,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:

# Get language codes
language_codes = []
if fixed_language_value := control_field_general_info[35:38]:
if fixed_language_value := self._get_control_field(source_record)[35:38]:
language_codes.append(fixed_language_value)
for field_041 in source_record.find_all("datafield", tag="041"):
language_codes.extend(
Expand All @@ -297,7 +290,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
# Crosswalk codes to names
for language_code in list(dict.fromkeys(language_codes)):
if language_name := Marc.loc_crosswalk_code_to_name(
language_code, language_code_crosswalk, source_record_id, "language"
language_code, self.language_code_crosswalk, source_record_id, "language"
):
languages.append(language_name) # noqa: PERF401

Expand Down Expand Up @@ -337,22 +330,29 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
# by leader "Type of Record" position = "Language Material" or "Manuscript
# language material" and "Bibliographic level" position =
# "Monographic component part," "Collection," "Subunit," or "Monograph/Item."
if leader_field[6:7] in "at" and leader_field[7:8] in "acdm":
if control_field_general_info[33:34] in "0se":
if (
self._get_leader_field(source_record)[6:7] in "at"
and self._get_leader_field(source_record)[7:8] in "acdm"
):
if self._get_control_field(source_record)[33:34] in "0se":
fields["literary_form"] = "Nonfiction"
elif control_field_general_info[33:34]:
elif self._get_control_field(source_record)[33:34]:
fields["literary_form"] = "Fiction"

# locations

# Get place of publication from 008 field code
if fixed_location_code := control_field_general_info[15:17]: # noqa: SIM102
if location_name := Marc.loc_crosswalk_code_to_name(
fixed_location_code, country_code_crosswalk, source_record_id, "country"
):
fields.setdefault("locations", []).append(
timdex.Location(value=location_name, kind="Place of Publication")
)
if (fixed_location_code := self._get_control_field(source_record)[15:17]) and (
location_name := Marc.loc_crosswalk_code_to_name(
fixed_location_code,
self.country_code_crosswalk,
source_record_id,
"country",
)
):
fields.setdefault("locations", []).append(
timdex.Location(value=location_name, kind="Place of Publication")
)

# Get other locations
location_marc_fields = [
Expand Down Expand Up @@ -738,11 +738,9 @@ def _get_leader_field(cls, source_record: Tag) -> str:
raise SkippedRecordEvent(message)

@classmethod
def _get_control_field_general_info(cls, source_record: Tag) -> str:
if control_field_general_info := source_record.find(
"controlfield", tag="008", string=True
):
return str(control_field_general_info.string)
def _get_control_field(cls, source_record: Tag) -> str:
if control_field := source_record.find("controlfield", tag="008", string=True):
return str(control_field.string)
message = (
"Record skipped because key information is missing: "
'<controlfield tag="008">.'
Expand Down

0 comments on commit 692c090

Please sign in to comment.