Skip to content

Commit

Permalink
Make published_date timezone aware
Browse files Browse the repository at this point in the history
  • Loading branch information
katybaulch committed Oct 21, 2024
1 parent b4e0c2d commit 55dac51
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion app/repository/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,19 @@ def parse_document_object(row: pd.Series) -> DocumentParserInput:
:return DocumentParserInput: A DocumentParserInput object
representing the family document record & its context.
"""
published_date = row.family_published_date
published_date = datetime(
published_date.year, published_date.month, published_date.day
).astimezone(timezone.utc)

fallback_date = datetime(1900, 1, 1, tzinfo=timezone.utc)
return DocumentParserInput(
# All documents in a family indexed by title
name=cast(str, row.family_title),
document_title=cast(str, row.physical_document_title),
description=cast(str, row.family_description),
category=str(row.family_category),
publication_ts=row.family_published_date or fallback_date,
publication_ts=published_date or fallback_date,
import_id=cast(str, row.family_document_import_id),
# This gets the most recently added document slug.
slug=cast(str, row.family_document_slug),
Expand Down

0 comments on commit 55dac51

Please sign in to comment.