diff --git a/singer_sdk/_singerlib/__init__.py b/singer_sdk/_singerlib/__init__.py index 368ca191a..ae97070de 100644 --- a/singer_sdk/_singerlib/__init__.py +++ b/singer_sdk/_singerlib/__init__.py @@ -17,6 +17,7 @@ write_message, ) from singer_sdk._singerlib.schema import Schema, resolve_schema_references +from singer_sdk._singerlib.utils import strftime, strptime_to_utc __all__ = [ "Catalog", @@ -35,4 +36,6 @@ "write_message", "Schema", "resolve_schema_references", + "strftime", + "strptime_to_utc", ] diff --git a/singer_sdk/_singerlib/utils.py b/singer_sdk/_singerlib/utils.py new file mode 100644 index 000000000..e8f513aa7 --- /dev/null +++ b/singer_sdk/_singerlib/utils.py @@ -0,0 +1,58 @@ +from datetime import datetime, timedelta + +import dateutil.parser +import pytz + +DATETIME_FMT = "%04Y-%m-%dT%H:%M:%S.%fZ" +DATETIME_FMT_SAFE = "%Y-%m-%dT%H:%M:%S.%fZ" + + +class NonUTCDatetimeError(Exception): + """Raised when a non-UTC datetime is passed to a function expecting UTC.""" + + def __init__(self) -> None: + """Initialize the exception.""" + super().__init__("datetime must be pegged at UTC tzoneinfo") + + +def strptime_to_utc(dtimestr: str) -> datetime: + """Parses a provide datetime string into a UTC datetime object. + + Args: + dtimestr: a string representation of a datetime + + Returns: + A UTC datetime.datetime object + """ + d_object: datetime = dateutil.parser.parse(dtimestr) + if d_object.tzinfo is None: + return d_object.replace(tzinfo=pytz.UTC) + else: + return d_object.astimezone(tz=pytz.UTC) + + +def strftime(dtime: datetime, format_str: str = DATETIME_FMT) -> str: + """Formats a provided datetime object as a string. + + Args: + dtime: a datetime + format_str: output format specification + + Returns: + A string in the specified format + + Raises: + NonUTCDatetimeError: if the datetime is not UTC (if it has a nonzero time zone + offset) + """ + if dtime.utcoffset() != timedelta(0): + raise NonUTCDatetimeError() + + dt_str = None + try: + dt_str = dtime.strftime(format_str) + if dt_str.startswith("4Y"): + dt_str = dtime.strftime(DATETIME_FMT_SAFE) + except ValueError: + dt_str = dtime.strftime(DATETIME_FMT_SAFE) + return dt_str diff --git a/tests/_singerlib/test_utils.py b/tests/_singerlib/test_utils.py new file mode 100644 index 000000000..d26ee4688 --- /dev/null +++ b/tests/_singerlib/test_utils.py @@ -0,0 +1,41 @@ +from datetime import datetime + +import pytest +import pytz + +from singer_sdk._singerlib import strftime, strptime_to_utc +from singer_sdk._singerlib.utils import NonUTCDatetimeError + + +def test_small_years(): + assert ( + strftime(datetime(90, 1, 1, tzinfo=pytz.UTC)) == "0090-01-01T00:00:00.000000Z" + ) + + +def test_round_trip(): + now = datetime.utcnow().replace(tzinfo=pytz.UTC) + dtime = strftime(now) + parsed_datetime = strptime_to_utc(dtime) + formatted_datetime = strftime(parsed_datetime) + assert dtime == formatted_datetime + + +@pytest.mark.parametrize( + "dtimestr", + [ + "2021-01-01T00:00:00.000000Z", + "2021-01-01T00:00:00.000000+00:00", + "2021-01-01T00:00:00.000000+06:00", + "2021-01-01T00:00:00.000000-04:00", + ], + ids=["Z", "offset+0", "offset+6", "offset-4"], +) +def test_strptime_to_utc(dtimestr): + assert strptime_to_utc(dtimestr).tzinfo == pytz.UTC + + +def test_stftime_non_utc(): + now = datetime.utcnow().replace(tzinfo=pytz.timezone("America/New_York")) + with pytest.raises(NonUTCDatetimeError): + strftime(now)