From 6e8799591a6c10d5178ae1f71d152b09a327aaf9 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Wed, 7 Sep 2022 11:49:21 +0300 Subject: [PATCH] msgpack: support tzoffset in datetime Support non-zero tzoffset in datetime extended type. If tzoffset and tzindex are not specified, return object with timezone-naive pandas.Timestamp internals. If tzoffset is specified, return object with timezone-aware pandas.Timestamp with pytz.FixedOffset [1] timezone info. pytz module is already a dependency of pandas, but this patch adds it as a requirement just in case something will change in the future. pandas >= 1.0.0 restriction was added to ensure that Timestamp.tz() setter is disabled. 1. https://pypi.org/project/pytz/ Part of #204 --- CHANGELOG.md | 1 + requirements.txt | 3 +- tarantool/msgpack_ext/types/datetime.py | 58 ++++++++++++++++++++++--- test/suites/test_msgpack_ext.py | 57 ++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c93543c..f4ee5538 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Decimal type support (#203). - UUID type support (#202). - Datetime type support and tarantool.Datetime type (#204). +- Offset in datetime type support (#204). ### Changed - Bump msgpack requirement to 1.0.4 (PR #223). diff --git a/requirements.txt b/requirements.txt index cdf505c7..0204cc0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ msgpack>=1.0.4 -pandas +pandas>=1.0.0 +pytz diff --git a/tarantool/msgpack_ext/types/datetime.py b/tarantool/msgpack_ext/types/datetime.py index 93fabe1c..00347a2d 100644 --- a/tarantool/msgpack_ext/types/datetime.py +++ b/tarantool/msgpack_ext/types/datetime.py @@ -1,4 +1,5 @@ import pandas +import pytz # https://www.tarantool.io/ru/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type # @@ -39,7 +40,16 @@ BYTEORDER = 'little' NSEC_IN_SEC = 1000000000 +SEC_IN_MIN = 60 +MIN_IN_DAY = 60 * 24 +def compute_offset(dt): + if dt.tz is None: + return 0 + + utc_offset = dt.tz.utcoffset(dt) + # There is no precision loss since pytz.FixedOffset is in minutes + return utc_offset.days * MIN_IN_DAY + utc_offset.seconds // SEC_IN_MIN def get_bytes_as_int(data, cursor, size): part = data[cursor:cursor + size] @@ -61,22 +71,35 @@ def msgpack_decode(data): tzoffset = 0 tzindex = 0 - if (tzoffset != 0) or (tzindex != 0): - raise NotImplementedError - total_nsec = seconds * NSEC_IN_SEC + nsec - dt = pandas.to_datetime(total_nsec, unit='ns') + if (tzindex != 0): + raise NotImplementedError + elif (tzoffset != 0): + tzinfo = pytz.FixedOffset(tzoffset) + dt = pandas.to_datetime(total_nsec, unit='ns').replace(tzinfo=pytz.utc).tz_convert(tzinfo) + else: + # return timezone-naive pandas.Timestamp + dt = pandas.to_datetime(total_nsec, unit='ns') + return dt, tzoffset, tzindex class Datetime(pandas.Timestamp): def __new__(cls, *args, **kwargs): - if len(args) > 0 and isinstance(args[0], bytes): - dt, tzoffset, tzindex = msgpack_decode(args[0]) - else: + dt = None + if len(args) > 0: + if isinstance(args[0], bytes): + dt, tzoffset, tzindex = msgpack_decode(args[0]) + elif isinstance(args[0], Datetime): + dt = pandas.Timestamp.__new__(cls, *args, **kwargs) + tzoffset = args[0].tarantool_tzoffset + + if dt is None: dt = super().__new__(cls, *args, **kwargs) + tzoffset = compute_offset(dt) dt.__class__ = cls + dt.tarantool_tzoffset = tzoffset return dt def msgpack_encode(self): @@ -85,6 +108,11 @@ def msgpack_encode(self): tzoffset = 0 tzindex = 0 + if isinstance(self, Datetime): + tzoffset = self.tarantool_tzoffset + else: + tzoffset = compute_offset(self) + buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): @@ -93,3 +121,19 @@ def msgpack_encode(self): buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) return buf + + def replace(self, *args, **kwargs): + dt = super().replace(*args, **kwargs) + return Datetime(dt) + + def astimezone(self, *args, **kwargs): + dt = super().astimezone(*args, **kwargs) + return Datetime(dt) + + def tz_convert(self, *args, **kwargs): + dt = super().tz_convert(*args, **kwargs) + return Datetime(dt) + + def tz_localize(self, *args, **kwargs): + dt = super().tz_localize(*args, **kwargs) + return Datetime(dt) diff --git a/test/suites/test_msgpack_ext.py b/test/suites/test_msgpack_ext.py index 7a6c8382..04ec3699 100644 --- a/test/suites/test_msgpack_ext.py +++ b/test/suites/test_msgpack_ext.py @@ -10,6 +10,7 @@ import warnings import tarantool import pandas +import pytz from tarantool.msgpack_ext.packer import default as packer_default from tarantool.msgpack_ext.unpacker import ext_hook as unpacker_ext_hook @@ -559,6 +560,62 @@ def test_UUID_tarantool_encode(self): 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + r"nsec=308543321})", }, + 'datetime_with_positive_offset': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543, nanosecond=321, + tzinfo=pytz.FixedOffset(180)), + 'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321, tzoffset=180})", + }, + 'datetime_with_negative_offset': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543, nanosecond=321, + tzinfo=pytz.FixedOffset(-60)), + 'msgpack': (b'\x8a\xb1\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xc4\xff\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321, tzoffset=-60})", + }, + 'pandas_timestamp_with_positive_offset': { + 'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543, nanosecond=321, + tzinfo=pytz.FixedOffset(180)), + 'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321, tzoffset=180})", + }, + 'pandas_timestamp_with_negative_offset': { + 'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543, nanosecond=321, + tzinfo=pytz.FixedOffset(-60)), + 'msgpack': (b'\x8a\xb1\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xc4\xff\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321, tzoffset=-60})", + }, + 'datetime_offset_replace': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543, nanosecond=321, + ).replace(tzinfo=pytz.FixedOffset(180)), + 'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321, tzoffset=180})", + }, + 'datetime_offset_convert': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=16, minute=7, second=54, + microsecond=308543, nanosecond=321, + tzinfo=pytz.FixedOffset(60)).tz_convert(pytz.FixedOffset(180)), + 'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321, tzoffset=180})", + }, + 'datetime_offset_astimezone': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=16, minute=7, second=54, + microsecond=308543, nanosecond=321, + tzinfo=pytz.FixedOffset(60)).astimezone(pytz.FixedOffset(180)), + 'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321, tzoffset=180})", + }, } def test_datetime_msgpack_decode(self):