Skip to content

Commit

Permalink
add mapping dictionary configurable by the associated yaml file
Browse files Browse the repository at this point in the history
  • Loading branch information
ladrians committed Nov 7, 2024
1 parent 2e5c641 commit 6cb1ffe
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 15 deletions.
17 changes: 3 additions & 14 deletions amazon_s3/s3reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,6 @@ def get_files_from_url(self) -> list[str]:
logging.getLogger().debug(f"{doc_num} with '{file_type}' extension discarded")
continue

filepath = f"{temp_dir}/{doc_num}"
original_key = f"{self.prefix}/{doc_num}" if self.prefix else doc_num

if self.skip_existing_file:
Expand Down Expand Up @@ -436,10 +435,6 @@ def get_files(self) -> list[str]:
temp_name = next(tempfile._get_candidate_names())
temp_name = obj.key.split("/")[-1]

filepath = (
f"{temp_dir}/{temp_name}"
)

if not os.path.exists(temp_dir):
os.makedirs(temp_dir)

Expand Down Expand Up @@ -675,15 +670,9 @@ def augment_metadata(
# Combine with preference to the first dictionary
initial_metadata = dict(ChainMap(initial_metadata, external_metadata))

mapping = {
"docnum": "documentid",
"docname": "filename",
"stagedesc": "disclosureactivity",
"language": "documentlanguage",
"editors": "documentauthor",
"approvaldate": timestamp_tag,
}
initial_metadata = self.update_with_mapping(initial_metadata, mapping)
mapping = self.alternative_document_service.get('metadata_mappings', {})
if mapping:
initial_metadata = self.update_with_mapping(initial_metadata, mapping)
date_string = initial_metadata.get(timestamp_tag, date_string)
date_string_format = "%Y-%m-%dT%H:%M:%S"
doc_url = initial_metadata.get('docurl', None)
Expand Down
3 changes: 2 additions & 1 deletion saia_ingest/profile_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,8 @@ def get_json_response_from_url(
break

except Exception as e:
logging.getLogger().error(f"Error elements {e}")
logging.getLogger().info(f"URL: {url}")
logging.getLogger().error(f"Error getting elements {e}")
finally:
return (new_list, next_url_href)

Expand Down

0 comments on commit 6cb1ffe

Please sign in to comment.