From 1f842558cda8451d80211e1388a19e497d9c6107 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Mon, 24 Oct 2022 16:06:34 +0300 Subject: [PATCH] api: extract datetime encode/decode from class Extract tarantool.Datetime encode and decode to external functions. This is a breaking change, but since there is no tagged release with Datetime yet and API was more internal rather than public, it shouldn't be an issue. Follows #204 --- CHANGELOG.md | 2 + tarantool/msgpack_ext/datetime.py | 146 +++++++++++++++++- tarantool/msgpack_ext/types/datetime.py | 191 +----------------------- test/suites/test_datetime.py | 8 +- 4 files changed, 153 insertions(+), 194 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1852f4d8..ff7b56d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -189,6 +189,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update API documentation strings (#67). - Update documentation index, quick start and guide pages (#67). - Use git version to set package version (#238). +- Extract tarantool.Datetime encode and decode to external + functions (PR #252). ### Fixed - Package build (#238). diff --git a/tarantool/msgpack_ext/datetime.py b/tarantool/msgpack_ext/datetime.py index e47f162e..fc1045d4 100644 --- a/tarantool/msgpack_ext/datetime.py +++ b/tarantool/msgpack_ext/datetime.py @@ -1,18 +1,83 @@ """ Tarantool `datetime`_ extension type support module. -Refer to :mod:`~tarantool.msgpack_ext.types.datetime`. +The datetime MessagePack representation looks like this: -.. _datetime: https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type +.. code-block:: text + + +---------+----------------+==========+-----------------+ + | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | + | = d7/d8 | = 4 | | tzindex; | + +---------+----------------+==========+-----------------+ + +MessagePack data contains: + +* Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the + little-endian order. +* The optional fields (8 bytes), if any of them have a non-zero value. + The fields include nsec (4 bytes), tzoffset (2 bytes), and + tzindex (2 bytes) packed in the little-endian order. + +``seconds`` is seconds since Epoch, where the epoch is the point where +the time starts, and is platform dependent. For Unix, the epoch is +January 1, 1970, 00:00:00 (UTC). Tarantool uses a ``double`` type, see a +structure definition in src/lib/core/datetime.h and reasons in +`datetime RFC`_. + +``nsec`` is nanoseconds, fractional part of seconds. Tarantool uses +``int32_t``, see a definition in src/lib/core/datetime.h. + +``tzoffset`` is timezone offset in minutes from UTC. Tarantool uses +``int16_t`` type, see a structure definition in src/lib/core/datetime.h. + +``tzindex`` is Olson timezone id. Tarantool uses ``int16_t`` type, see +a structure definition in src/lib/core/datetime.h. If both +``tzoffset`` and ``tzindex`` are specified, ``tzindex`` has the +preference and the ``tzoffset`` value is ignored. + +.. _datetime RFC: https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c """ -from tarantool.msgpack_ext.types.datetime import Datetime +from tarantool.msgpack_ext.types.datetime import ( + NSEC_IN_SEC, + SEC_IN_MIN, + Datetime, +) +import tarantool.msgpack_ext.types.timezones as tt_timezones + +from tarantool.error import MsgpackError EXT_ID = 4 """ `datetime`_ type id. """ +BYTEORDER = 'little' + +SECONDS_SIZE_BYTES = 8 +NSEC_SIZE_BYTES = 4 +TZOFFSET_SIZE_BYTES = 2 +TZINDEX_SIZE_BYTES = 2 + + +def get_int_as_bytes(data, size): + """ + Get binary representation of integer value. + + :param data: Integer value. + :type data: :obj:`int` + + :param size: Integer size, in bytes. + :type size: :obj:`int` + + :return: Encoded integer. + :rtype: :obj:`bytes` + + :meta private: + """ + + return data.to_bytes(size, byteorder=BYTEORDER, signed=True) + def encode(obj): """ Encode a datetime object. @@ -26,7 +91,48 @@ def encode(obj): :raise: :exc:`tarantool.Datetime.msgpack_encode` exceptions """ - return obj.msgpack_encode() + seconds = obj.value // NSEC_IN_SEC + nsec = obj.nsec + tzoffset = obj.tzoffset + + tz = obj.tz + if tz != '': + tzindex = tt_timezones.timezoneToIndex[tz] + else: + tzindex = 0 + + buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) + + if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): + buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) + buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) + buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) + + return buf + + +def get_bytes_as_int(data, cursor, size): + """ + Get integer value from binary data. + + :param data: MessagePack binary data. + :type data: :obj:`bytes` + + :param cursor: Index after last parsed byte. + :type cursor: :obj:`int` + + :param size: Integer size, in bytes. + :type size: :obj:`int` + + :return: First value: parsed integer, second value: new cursor + position. + :rtype: first value: :obj:`int`, second value: :obj:`int` + + :meta private: + """ + + part = data[cursor:cursor + size] + return int.from_bytes(part, BYTEORDER, signed=True), cursor + size def decode(data): """ @@ -38,7 +144,35 @@ def decode(data): :return: Decoded datetime. :rtype: :class:`tarantool.Datetime` - :raise: :exc:`tarantool.Datetime` exceptions + :raise: :exc:`~tarantool.error.MsgpackError`, + :exc:`tarantool.Datetime` exceptions """ - return Datetime(data) + cursor = 0 + seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) + + data_len = len(data) + if data_len == (SECONDS_SIZE_BYTES + NSEC_SIZE_BYTES + \ + TZOFFSET_SIZE_BYTES + TZINDEX_SIZE_BYTES): + nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) + tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) + tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) + elif data_len == SECONDS_SIZE_BYTES: + nsec = 0 + tzoffset = 0 + tzindex = 0 + else: + raise MsgpackError(f'Unexpected datetime payload length {data_len}') + + if tzindex != 0: + if tzindex not in tt_timezones.indexToTimezone: + raise MsgpackError(f'Failed to decode datetime with unknown tzindex "{tzindex}"') + tz = tt_timezones.indexToTimezone[tzindex] + return Datetime(timestamp=seconds, nsec=nsec, tz=tz, + timestamp_since_utc_epoch=True) + elif tzoffset != 0: + return Datetime(timestamp=seconds, nsec=nsec, tzoffset=tzoffset, + timestamp_since_utc_epoch=True) + else: + return Datetime(timestamp=seconds, nsec=nsec, + timestamp_since_utc_epoch=True) diff --git a/tarantool/msgpack_ext/types/datetime.py b/tarantool/msgpack_ext/types/datetime.py index b2dac8a9..f5912dda 100644 --- a/tarantool/msgpack_ext/types/datetime.py +++ b/tarantool/msgpack_ext/types/datetime.py @@ -1,41 +1,5 @@ """ -Tarantool `datetime`_ extension type support module. - -The datetime MessagePack representation looks like this: - -.. code-block:: text - - +---------+----------------+==========+-----------------+ - | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | - | = d7/d8 | = 4 | | tzindex; | - +---------+----------------+==========+-----------------+ - -MessagePack data contains: - -* Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the - little-endian order. -* The optional fields (8 bytes), if any of them have a non-zero value. - The fields include nsec (4 bytes), tzoffset (2 bytes), and - tzindex (2 bytes) packed in the little-endian order. - -``seconds`` is seconds since Epoch, where the epoch is the point where -the time starts, and is platform dependent. For Unix, the epoch is -January 1, 1970, 00:00:00 (UTC). Tarantool uses a ``double`` type, see a -structure definition in src/lib/core/datetime.h and reasons in -`datetime RFC`_. - -``nsec`` is nanoseconds, fractional part of seconds. Tarantool uses -``int32_t``, see a definition in src/lib/core/datetime.h. - -``tzoffset`` is timezone offset in minutes from UTC. Tarantool uses -``int16_t`` type, see a structure definition in src/lib/core/datetime.h. - -``tzindex`` is Olson timezone id. Tarantool uses ``int16_t`` type, see -a structure definition in src/lib/core/datetime.h. If both -``tzoffset`` and ``tzindex`` are specified, ``tzindex`` has the -preference and the ``tzoffset`` value is ignored. - -.. _datetime RFC: https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c +Tarantool `datetime`_ extension type implementation module. """ from copy import deepcopy @@ -44,63 +8,14 @@ import pytz import tarantool.msgpack_ext.types.timezones as tt_timezones -from tarantool.error import MsgpackError from tarantool.msgpack_ext.types.interval import Interval, Adjust -SECONDS_SIZE_BYTES = 8 -NSEC_SIZE_BYTES = 4 -TZOFFSET_SIZE_BYTES = 2 -TZINDEX_SIZE_BYTES = 2 - -BYTEORDER = 'little' - NSEC_IN_SEC = 1000000000 NSEC_IN_MKSEC = 1000 SEC_IN_MIN = 60 MONTH_IN_YEAR = 12 -def get_bytes_as_int(data, cursor, size): - """ - Get integer value from binary data. - - :param data: MessagePack binary data. - :type data: :obj:`bytes` - - :param cursor: Index after last parsed byte. - :type cursor: :obj:`int` - - :param size: Integer size, in bytes. - :type size: :obj:`int` - - :return: First value: parsed integer, second value: new cursor - position. - :rtype: first value: :obj:`int`, second value: :obj:`int` - - :meta private: - """ - - part = data[cursor:cursor + size] - return int.from_bytes(part, BYTEORDER, signed=True), cursor + size - -def get_int_as_bytes(data, size): - """ - Get binary representation of integer value. - - :param data: Integer value. - :type data: :obj:`int` - - :param size: Integer size, in bytes. - :type size: :obj:`int` - - :return: Encoded integer. - :rtype: :obj:`bytes` - - :meta private: - """ - - return data.to_bytes(size, byteorder=BYTEORDER, signed=True) - def compute_offset(timestamp): """ Compute timezone offset. Offset is computed each time and not stored @@ -126,7 +41,7 @@ def compute_offset(timestamp): # There is no precision loss since offset is in minutes return int(utc_offset.total_seconds()) // SEC_IN_MIN -def get_python_tzinfo(tz, error_class): +def get_python_tzinfo(tz): """ All non-abbreviated Tarantool timezones are represented as pytz timezones (from :func:`pytz.timezone`). All non-ambiguous @@ -138,9 +53,6 @@ def get_python_tzinfo(tz, error_class): :param tz: Tarantool timezone name. :type tz: :obj:`str` - :param error_class: Error class to raise in case of fail. - :type error_class: :obj:`Exception` - :return: Timezone object. :rtype: :func:`pytz.timezone` result or :class:`pytz.FixedOffset` @@ -155,66 +67,17 @@ def get_python_tzinfo(tz, error_class): # Checked with timezones/validate_timezones.py tt_tzinfo = tt_timezones.timezoneAbbrevInfo[tz] if (tt_tzinfo['category'] & tt_timezones.TZ_AMBIGUOUS) != 0: - raise error_class(f'Failed to create datetime with ambiguous timezone "{tz}"') + raise ValueError(f'Failed to create datetime with ambiguous timezone "{tz}"') return pytz.FixedOffset(tt_tzinfo['offset']) -def msgpack_decode(data): - """ - Decode MsgPack binary data to useful timestamp and timezone data. - For internal use of :class:`~tarantool.Datetime`. - - :param data: MessagePack binary data to decode. - :type data: :obj:`bytes` - - :return: First value: timestamp data with timezone info, second - value: Tarantool timezone name. - :rtype: first value: :class:`pandas.Timestamp`, second value: - :obj:`str` - - :raises: :exc:`~tarantool.error.MsgpackError` - - :meta private: - """ - - cursor = 0 - seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) - - data_len = len(data) - if data_len == (SECONDS_SIZE_BYTES + NSEC_SIZE_BYTES + \ - TZOFFSET_SIZE_BYTES + TZINDEX_SIZE_BYTES): - nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) - tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) - tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) - elif data_len == SECONDS_SIZE_BYTES: - nsec = 0 - tzoffset = 0 - tzindex = 0 - else: - raise MsgpackError(f'Unexpected datetime payload length {data_len}') - - total_nsec = seconds * NSEC_IN_SEC + nsec - datetime = pandas.to_datetime(total_nsec, unit='ns') - - if tzindex != 0: - if tzindex not in tt_timezones.indexToTimezone: - raise MsgpackError(f'Failed to decode datetime with unknown tzindex "{tzindex}"') - tz = tt_timezones.indexToTimezone[tzindex] - tzinfo = get_python_tzinfo(tz, MsgpackError) - return datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo), tz - elif tzoffset != 0: - tzinfo = pytz.FixedOffset(tzoffset) - return datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo), '' - else: - return datetime, '' - class Datetime(): """ Class representing Tarantool `datetime`_ info. Internals are based on :class:`pandas.Timestamp`. - You can create :class:`~tarantool.Datetime` objects either from - MessagePack data or by using the same API as in Tarantool: + You can create :class:`~tarantool.Datetime` objects by using the + same API as in Tarantool: .. code-block:: python @@ -277,14 +140,10 @@ class Datetime(): .. _datetime: https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type """ - def __init__(self, data=None, *, timestamp=None, year=None, month=None, + def __init__(self, *, timestamp=None, year=None, month=None, day=None, hour=None, minute=None, sec=None, nsec=None, tzoffset=0, tz='', timestamp_since_utc_epoch=False): """ - :param data: MessagePack binary data to decode. If provided, - all other parameters are ignored. - :type data: :obj:`bytes`, optional - :param timestamp: Timestamp since epoch. Cannot be provided together with :paramref:`~tarantool.Datetime.params.year`, @@ -403,22 +262,12 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None, .. _datetime.new(): https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/ """ - if data is not None: - if not isinstance(data, bytes): - raise ValueError('data argument (first positional argument) ' + - 'expected to be a "bytes" instance') - - datetime, tz = msgpack_decode(data) - self._datetime = datetime - self._tz = tz - return - tzinfo = None if tz != '': if tz not in tt_timezones.timezoneToIndex: raise ValueError(f'Unknown Tarantool timezone "{tz}"') - tzinfo = get_python_tzinfo(tz, ValueError) + tzinfo = get_python_tzinfo(tz) elif tzoffset != 0: tzinfo = pytz.FixedOffset(tzoffset) self._tz = tz @@ -783,29 +632,3 @@ def value(self): """ return self._datetime.value - - def msgpack_encode(self): - """ - Encode a datetime object. - - :rtype: :obj:`bytes` - """ - - seconds = self.value // NSEC_IN_SEC - nsec = self.nsec - tzoffset = self.tzoffset - - tz = self.tz - if tz != '': - tzindex = tt_timezones.timezoneToIndex[tz] - else: - tzindex = 0 - - buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) - - if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): - buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) - buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) - buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) - - return buf diff --git a/test/suites/test_datetime.py b/test/suites/test_datetime.py index 583f9058..fb2d2855 100644 --- a/test/suites/test_datetime.py +++ b/test/suites/test_datetime.py @@ -104,14 +104,14 @@ def test_Datetime_class_API_wth_tz(self): 'positional_year': { 'args': [2022], 'kwargs': {}, - 'type': ValueError, - 'msg': 'data argument (first positional argument) expected to be a "bytes" instance' + 'type': TypeError, + 'msg': '__init__() takes 1 positional argument but 2 were given' }, 'positional_date': { 'args': [2022, 8, 31], 'kwargs': {}, 'type': TypeError, - 'msg': '__init__() takes from 1 to 2 positional arguments but 4 were given' + 'msg': '__init__() takes 1 positional argument but 4 were given' }, 'mixing_date_and_timestamp': { 'args': [], @@ -338,7 +338,7 @@ def test_msgpack_decode_unknown_tzindex(self): def test_msgpack_decode_ambiguous_tzindex(self): case = b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x82\x00' self.assertRaisesRegex( - MsgpackError, 'Failed to create datetime with ambiguous timezone "AET"', + ValueError, 'Failed to create datetime with ambiguous timezone "AET"', lambda: unpacker_ext_hook(4, case))