Skip to content

Commit

Permalink
Address comments in ead.py
Browse files Browse the repository at this point in the history
* Add docstrings for private methods for important fields
* Use named args for 'create_list/string_from_mixed_value'
  • Loading branch information
jonavellecuerdo committed May 30, 2024
1 parent 81f60e0 commit de46532
Showing 1 changed file with 51 additions and 15 deletions.
66 changes: 51 additions & 15 deletions transmogrifier/sources/xml/ead.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,16 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
"arrangement", recursive=False
):
for arrangement_value in self.create_list_from_mixed_value(
arrangement_element, ["head"]
arrangement_element, skipped_elements=["head"]
):
fields.setdefault("contents", []).append(arrangement_value)

# contributors
for origination_element in collection_description_did.find_all("origination"):
for name_element in origination_element.find_all(name=True, recursive=False):
if name_value := self.create_string_from_mixed_value(name_element, " "):
if name_value := self.create_string_from_mixed_value(
name_element, separator=" "
):
fields.setdefault("contributors", []).append(
timdex.Contributor(
value=name_value,
Expand Down Expand Up @@ -91,7 +93,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
continue
if id_value := self.create_string_from_mixed_value(
id_element,
" ",
separator=" ",
):
fields.setdefault("identifiers", []).append(
timdex.Identifier(value=id_value, kind="Collection Identifier")
Expand All @@ -116,7 +118,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
for location_element in control_access_element.find_all("geogname"):
if location_value := self.create_string_from_mixed_value(
location_element,
" ",
separator=" ",
):
fields.setdefault("locations", []).append(
timdex.Location(value=location_value)
Expand All @@ -136,7 +138,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
for subelement in note_element.find_all(subelement_tag, recursive=False):
if subelement_value := self.create_string_from_mixed_value(
subelement,
" ",
separator=" ",
):
note_value.append(subelement_value) # noqa: PERF401
if note_value:
Expand All @@ -162,7 +164,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
"physdesc", recursive=False
):
if physical_description_value := self.create_string_from_mixed_value(
physical_description_element, " "
physical_description_element, separator=" "
):
physical_descriptions.append(physical_description_value) # noqa: PERF401
if physical_descriptions:
Expand All @@ -175,7 +177,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
"repository"
):
if publication_value := self.create_string_from_mixed_value(
publication_element, " "
publication_element, separator=" "
):
fields["publishers"] = [timdex.Publisher(name=publication_value)]

Expand All @@ -185,7 +187,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
recursive=False,
):
if related_item_value := self.create_string_from_mixed_value(
related_item_element, " ", ["head"]
related_item_element, separator=" ", skipped_elements=["head"]
):
fields.setdefault("related_items", []).append(
timdex.RelatedItem(
Expand All @@ -205,7 +207,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
subelements = related_item_element.find_all("p", recursive=False)
for subelement in subelements:
if related_item_value := self.create_string_from_mixed_value(
subelement, " ", ["head"]
subelement, separator=" ", skipped_elements=["head"]
):
fields.setdefault("related_items", []).append(
timdex.RelatedItem(
Expand All @@ -218,7 +220,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
["accessrestrict", "userestrict"], recursive=False
):
if rights_value := self.create_string_from_mixed_value(
rights_element, " ", ["head"]
rights_element, separator=" ", skipped_elements=["head"]
):
fields.setdefault("rights", []).append(
timdex.Rights(
Expand All @@ -235,7 +237,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
name=True, recursive=False
):
if subject_value := self.create_string_from_mixed_value(
subject_element, " "
subject_element, separator=" "
):
subject_source = subject_element.get("source")
fields.setdefault("subjects", []).append(
Expand All @@ -252,7 +254,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
"abstract", recursive=False
):
if abstract_value := self.create_string_from_mixed_value(
abstract_element, " "
abstract_element, separator=" "
):
abstract_values.append(abstract_value) # noqa: PERF401
fields["summary"] = abstract_values or None
Expand Down Expand Up @@ -328,6 +330,16 @@ def generate_name_identifier_url(cls, name_element: Tag) -> list | None:

@classmethod
def _get_collection_description(cls, source_record: Tag) -> Tag:
"""Get element with archival description for a collection.
For EAD-formatted XML, all of the information about a collection
that is relevant to the TIMDEX data model is
encapsulated within the <archdesc level="collection"> element.
If this element is missing, it suggests that there is a structural
error in the record.
This method is used by multiple field methods.
"""
if collection_description := source_record.metadata.find(
"archdesc", level="collection"
):
Expand All @@ -340,6 +352,16 @@ def _get_collection_description(cls, source_record: Tag) -> Tag:

@classmethod
def _get_collection_description_did(cls, source_record: Tag) -> Tag:
"""Get element with descriptive identification for a collection.
For EAD-formatted XML, information essential
for identifying the material being described is encapsulated within
the <did> element (nested within the <archdesc> element).
If this element is missing, the required TIMDEX field 'title'
cannot be derived.
This method is used by multiple field methods.
"""
collection_description = cls._get_collection_description(source_record)
if collection_description_did := collection_description.did:
return collection_description_did
Expand All @@ -348,6 +370,16 @@ def _get_collection_description_did(cls, source_record: Tag) -> Tag:

@classmethod
def _get_control_access(cls, source_record: Tag) -> list[Tag]:
"""Get elements with control access headings for a collection.
For EAD-formatted XML, information essential
for identifying the material being described is encapsulated within
the <did> element (nested within the <archdesc> element).
If this element is missing, the required TIMDEX field 'title'
cannot be derived.
This method is used by multiple field methods.
"""
collection_description = cls._get_collection_description(source_record)
return collection_description.find_all("controlaccess", recursive=False)

Expand All @@ -369,7 +401,7 @@ def get_citation(cls, source_record: Tag) -> str | None:
citation_element := collection_description.find("prefercite", recursive=False)
) and (
citation := cls.create_string_from_mixed_value(
citation_element, " ", ["head"]
citation_element, separator=" ", skipped_elements=["head"]
)
):
return citation
Expand All @@ -385,7 +417,7 @@ def get_content_type(cls, source_record: Tag) -> list[str] | None:
for content_type_element in control_access_element.find_all("genreform")
if (
content_type := cls.create_string_from_mixed_value(
content_type_element, " "
content_type_element, separator=" "
)
)
]
Expand All @@ -412,7 +444,11 @@ def get_main_titles(cls, source_record: Tag) -> list[str]:
return [
title
for unit_title in unit_titles
if (title := cls.create_string_from_mixed_value(unit_title, " ", ["num"]))
if (
title := cls.create_string_from_mixed_value(
unit_title, separator=" ", skipped_elements=["num"]
)
)
]

@classmethod
Expand Down

0 comments on commit de46532

Please sign in to comment.