From 32efcc9bb9616ddf7393b0ebcab759785e9a1985 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Tue, 6 Sep 2022 13:36:49 +0300 Subject: [PATCH] msgpack: support datetime extended type Tarantool supports datetime type since version 2.10.0 [1]. This patch introduced the support of Tarantool datetime type in msgpack decoders and encoders. Tarantool datetime objects are decoded to `tarantool.Datetime` type. `tarantool.Datetime` and `pandas.Timestamp` may be encoded to Tarantool datetime objects. `tarantool.Datetime` is basically a `pandas.Timestamp` wrapper. You can create `tarantool.Datetime` objects - from `pandas.Timestamp` object, - by using the same API as in `pandas.Timestamp()` [2], - from another `tarantool.Datetime` object. To work with datetime data as a `pandas.Timestamp`, convert `tarantool.Datetime` object to a `pandas.Timestamp` with `to_pd_timestamp()` method call. You can use this `pandas.Timestamp` object to build a `tarantool.Datetime` object before sending data to Tarantool. To work with data as `numpy.datetime64` or `datetime.datetime`, convert to a `pandas.Timestamp` and then use `to_datetime64()` or `to_datetime()` converter. pandas.Timestamp was chosen to store data because it could be used to store both nanoseconds and timezone information. In-build Python datetime.datetime supports microseconds at most, numpy.datetime64 do not support timezones. There are two reasons to use custom type instead of plain pandas.Timestamp: - tzindex may be lost on conversion to pandas.Timestamp - Tarantool datetime interval type is planned to be stored in custom type tarantool.Interval and we'll need a way to support arithmetic between datetime and interval. This patch does not yet introduce the support of timezones in datetime. 1. https://github.com/tarantool/tarantool/issues/5941 2. https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html Part of #204 --- CHANGELOG.md | 21 ++++ requirements.txt | 1 + tarantool/__init__.py | 6 +- tarantool/error.py | 10 ++ tarantool/msgpack_ext/datetime.py | 12 ++ tarantool/msgpack_ext/packer.py | 28 ++++- tarantool/msgpack_ext/types/datetime.py | 121 ++++++++++++++++++ tarantool/msgpack_ext/unpacker.py | 6 +- test/suites/__init__.py | 3 +- test/suites/lib/skip.py | 11 ++ test/suites/test_datetime.py | 158 ++++++++++++++++++++++++ 11 files changed, 370 insertions(+), 7 deletions(-) create mode 100644 tarantool/msgpack_ext/datetime.py create mode 100644 tarantool/msgpack_ext/types/datetime.py create mode 100644 test/suites/test_datetime.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 226909d8..19288f2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Decimal type support (#203). - UUID type support (#202). +- Datetime type support and tarantool.Datetime type (#204). + + Tarantool datetime objects are decoded to `tarantool.Datetime` + type. `tarantool.Datetime` and `pandas.Timestamp` may be encoded + to Tarantool datetime objects. + + `tarantool.Datetime` is basically a `pandas.Timestamp` wrapper. + You can create `tarantool.Datetime` objects + - from `pandas.Timestamp` object, + - by using the same API as in `pandas.Timestamp()`, + - from another `tarantool.Datetime` object. + + To work with datetime data as a `pandas.Timestamp`, convert + `tarantool.Datetime` object to a `pandas.Timestamp` with + `to_pd_timestamp()` method call. You can use this + `pandas.Timestamp` object to build a `tarantool.Datetime` + object before sending data to Tarantool. + + To work with data as `numpy.datetime64` or `datetime.datetime`, + convert to a `pandas.Timestamp` and then use `to_datetime64()` + or `to_datetime()` converter. ### Changed - Bump msgpack requirement to 1.0.4 (PR #223). diff --git a/requirements.txt b/requirements.txt index 46dff380..cdf505c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ msgpack>=1.0.4 +pandas diff --git a/tarantool/__init__.py b/tarantool/__init__.py index 3d4a19a8..6625b4eb 100644 --- a/tarantool/__init__.py +++ b/tarantool/__init__.py @@ -32,6 +32,10 @@ ENCODING_DEFAULT, ) +from tarantool.msgpack_ext.types.datetime import ( + Datetime, +) + __version__ = "0.9.0" @@ -91,7 +95,7 @@ def connectmesh(addrs=({'host': 'localhost', 'port': 3301},), user=None, __all__ = ['connect', 'Connection', 'connectmesh', 'MeshConnection', 'Schema', 'Error', 'DatabaseError', 'NetworkError', 'NetworkWarning', - 'SchemaError', 'dbapi'] + 'SchemaError', 'dbapi', 'Datetime'] # ConnectionPool is supported only for Python 3.7 or newer. if sys.version_info.major >= 3 and sys.version_info.minor >= 7: diff --git a/tarantool/error.py b/tarantool/error.py index 9ea49d71..93f4dc10 100644 --- a/tarantool/error.py +++ b/tarantool/error.py @@ -119,6 +119,16 @@ class MsgpackWarning(UserWarning): Warning with encoding or decoding of MP_EXT types ''' +class ExtTypeError(ValueError): + ''' + Error related to tarantool.Datetime type + ''' + +class ExtTypeWarning(UserWarning): + ''' + Warning related to tarantool.Datetime type + ''' + __all__ = ("Warning", "Error", "InterfaceError", "DatabaseError", "DataError", "OperationalError", "IntegrityError", "InternalError", "ProgrammingError", "NotSupportedError", "MsgpackError", diff --git a/tarantool/msgpack_ext/datetime.py b/tarantool/msgpack_ext/datetime.py new file mode 100644 index 00000000..f9c29126 --- /dev/null +++ b/tarantool/msgpack_ext/datetime.py @@ -0,0 +1,12 @@ +from tarantool.msgpack_ext.types.datetime import Datetime + +EXT_ID = 4 + +def encode(obj): + return obj.msgpack_encode() + +def encode_pd_timestamp(obj): + return Datetime(obj).msgpack_encode() + +def decode(data): + return Datetime(data) diff --git a/tarantool/msgpack_ext/packer.py b/tarantool/msgpack_ext/packer.py index e8dd74db..d16d9855 100644 --- a/tarantool/msgpack_ext/packer.py +++ b/tarantool/msgpack_ext/packer.py @@ -1,17 +1,39 @@ from decimal import Decimal from uuid import UUID from msgpack import ExtType +import pandas + +from tarantool.msgpack_ext.types.datetime import Datetime import tarantool.msgpack_ext.decimal as ext_decimal import tarantool.msgpack_ext.uuid as ext_uuid +import tarantool.msgpack_ext.datetime as ext_datetime encoders = [ - {'type': Decimal, 'ext': ext_decimal}, - {'type': UUID, 'ext': ext_uuid }, + { + 'type': Decimal, + 'ext_id': ext_decimal.EXT_ID, + 'encoder': ext_decimal.encode, + }, + { + 'type': UUID, + 'ext_id': ext_uuid.EXT_ID, + 'encoder': ext_uuid.encode, + }, + { + 'type': Datetime, + 'ext_id': ext_datetime.EXT_ID, + 'encoder': ext_datetime.encode, + }, + { + 'type': pandas.Timestamp, + 'ext_id': ext_datetime.EXT_ID, + 'encoder': ext_datetime.encode_pd_timestamp, + }, ] def default(obj): for encoder in encoders: if isinstance(obj, encoder['type']): - return ExtType(encoder['ext'].EXT_ID, encoder['ext'].encode(obj)) + return ExtType(encoder['ext_id'], encoder['encoder'](obj)) raise TypeError("Unknown type: %r" % (obj,)) diff --git a/tarantool/msgpack_ext/types/datetime.py b/tarantool/msgpack_ext/types/datetime.py new file mode 100644 index 00000000..1b7e8df1 --- /dev/null +++ b/tarantool/msgpack_ext/types/datetime.py @@ -0,0 +1,121 @@ +from copy import deepcopy + +import pandas + +# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type +# +# The datetime MessagePack representation looks like this: +# +---------+----------------+==========+-----------------+ +# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | +# | = d7/d8 | = 4 | | tzindex; | +# +---------+----------------+==========+-----------------+ +# MessagePack data contains: +# +# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the +# little-endian order. +# * The optional fields (8 bytes), if any of them have a non-zero value. +# The fields include nsec (4 bytes), tzoffset (2 bytes), and +# tzindex (2 bytes) packed in the little-endian order. +# +# seconds is seconds since Epoch, where the epoch is the point where the time +# starts, and is platform dependent. For Unix, the epoch is January 1, +# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure +# definition in src/lib/core/datetime.h and reasons in +# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c +# +# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see +# a definition in src/lib/core/datetime.h. +# +# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type, +# see a structure definition in src/lib/core/datetime.h. +# +# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure +# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are +# specified, tzindex has the preference and the tzoffset value is ignored. + +SECONDS_SIZE_BYTES = 8 +NSEC_SIZE_BYTES = 4 +TZOFFSET_SIZE_BYTES = 2 +TZINDEX_SIZE_BYTES = 2 + +BYTEORDER = 'little' + +NSEC_IN_SEC = 1000000000 + + +def get_bytes_as_int(data, cursor, size): + part = data[cursor:cursor + size] + return int.from_bytes(part, BYTEORDER, signed=True), cursor + size + +def get_int_as_bytes(data, size): + return data.to_bytes(size, byteorder=BYTEORDER, signed=True) + +def msgpack_decode(data): + cursor = 0 + seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) + + if len(data) > SECONDS_SIZE_BYTES: + nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) + tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) + tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) + else: + nsec = 0 + tzoffset = 0 + tzindex = 0 + + if (tzoffset != 0) or (tzindex != 0): + raise NotImplementedError + + total_nsec = seconds * NSEC_IN_SEC + nsec + + timestamp = pandas.to_datetime(total_nsec, unit='ns') + return timestamp, tzoffset, tzindex + +class Datetime(): + def __init__(self, *args, **kwargs): + if len(args) > 0: + data = args[0] + if isinstance(data, bytes): + timestamp, tzoffset, tzindex = msgpack_decode(data) + elif isinstance(data, pandas.Timestamp): + timestamp = deepcopy(data) + elif isinstance(data, Datetime): + timestamp = deepcopy(data._timestamp) + else: + timestamp = pandas.Timestamp(*args, **kwargs) + + self._timestamp = timestamp + + def __eq__(self, other): + if isinstance(other, Datetime): + return self._timestamp == other._timestamp + elif isinstance(other, pandas.Timestamp): + return self._timestamp == other + else: + return False + + def to_pd_timestamp(self): + return deepcopy(self._timestamp) + + def __str__(self): + return f'tarantool.Datetime(timestamp={self._timestamp})' + + def __repr__(self): + return f'tarantool.Datetime(timestamp={self._timestamp})' + + def msgpack_encode(self): + ts_value = self._timestamp.value + + seconds = ts_value // NSEC_IN_SEC + nsec = ts_value % NSEC_IN_SEC + tzoffset = 0 + tzindex = 0 + + buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) + + if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): + buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) + buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) + buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) + + return buf diff --git a/tarantool/msgpack_ext/unpacker.py b/tarantool/msgpack_ext/unpacker.py index 44bfdb63..b303e18d 100644 --- a/tarantool/msgpack_ext/unpacker.py +++ b/tarantool/msgpack_ext/unpacker.py @@ -1,9 +1,11 @@ import tarantool.msgpack_ext.decimal as ext_decimal import tarantool.msgpack_ext.uuid as ext_uuid +import tarantool.msgpack_ext.datetime as ext_datetime decoders = { - ext_decimal.EXT_ID: ext_decimal.decode, - ext_uuid.EXT_ID : ext_uuid.decode , + ext_decimal.EXT_ID : ext_decimal.decode , + ext_uuid.EXT_ID : ext_uuid.decode , + ext_datetime.EXT_ID: ext_datetime.decode, } def ext_hook(code, data): diff --git a/test/suites/__init__.py b/test/suites/__init__.py index 94357c8e..c5792bdd 100644 --- a/test/suites/__init__.py +++ b/test/suites/__init__.py @@ -17,13 +17,14 @@ from .test_ssl import TestSuite_Ssl from .test_decimal import TestSuite_Decimal from .test_uuid import TestSuite_UUID +from .test_datetime import TestSuite_Datetime test_cases = (TestSuite_Schema_UnicodeConnection, TestSuite_Schema_BinaryConnection, TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect, TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI, TestSuite_Encoding, TestSuite_Pool, TestSuite_Ssl, - TestSuite_Decimal, TestSuite_UUID) + TestSuite_Decimal, TestSuite_UUID, TestSuite_Datetime) def load_tests(loader, tests, pattern): suite = unittest.TestSuite() diff --git a/test/suites/lib/skip.py b/test/suites/lib/skip.py index 9ce76991..71bfce13 100644 --- a/test/suites/lib/skip.py +++ b/test/suites/lib/skip.py @@ -154,3 +154,14 @@ def skip_or_run_UUID_test(func): return skip_or_run_test_tarantool(func, '2.4.1', 'does not support UUID type') + +def skip_or_run_datetime_test(func): + """Decorator to skip or run datetime-related tests depending on + the tarantool version. + + Tarantool supports datetime type only since 2.10.0 version. + See https://github.com/tarantool/tarantool/issues/5941 + """ + + return skip_or_run_test_pcall_require(func, 'datetime', + 'does not support datetime type') diff --git a/test/suites/test_datetime.py b/test/suites/test_datetime.py new file mode 100644 index 00000000..6183cc39 --- /dev/null +++ b/test/suites/test_datetime.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import sys +import unittest +import msgpack +import warnings +import tarantool +import pandas + +from tarantool.msgpack_ext.packer import default as packer_default +from tarantool.msgpack_ext.unpacker import ext_hook as unpacker_ext_hook + +from .lib.tarantool_server import TarantoolServer +from .lib.skip import skip_or_run_datetime_test +from tarantool.error import MsgpackError, MsgpackWarning + +class TestSuite_Datetime(unittest.TestCase): + @classmethod + def setUpClass(self): + print(' DATETIME EXT TYPE '.center(70, '='), file=sys.stderr) + print('-' * 70, file=sys.stderr) + self.srv = TarantoolServer() + self.srv.script = 'test/suites/box.lua' + self.srv.start() + + self.adm = self.srv.admin + self.adm(r""" + _, datetime = pcall(require, 'datetime') + + box.schema.space.create('test') + box.space['test']:create_index('primary', { + type = 'tree', + parts = {1, 'string'}, + unique = true}) + + box.schema.user.create('test', {password = 'test', if_not_exists = true}) + box.schema.user.grant('test', 'read,write,execute', 'universe') + """) + + self.con = tarantool.Connection(self.srv.host, self.srv.args['primary'], + user='test', password='test') + + def setUp(self): + # prevent a remote tarantool from clean our session + if self.srv.is_started(): + self.srv.touch_lock() + + self.adm("box.space['test']:truncate()") + + + cases = { + 'date': { + 'python': tarantool.Datetime(year=2022, month=8, day=31), + 'msgpack': (b'\x80\xa4\x0e\x63\x00\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31})", + }, + 'date_unix_start': { + 'python': tarantool.Datetime(year=1970, month=1, day=1), + 'msgpack': (b'\x00\x00\x00\x00\x00\x00\x00\x00'), + 'tarantool': r"datetime.new({year=1970, month=1, day=1})", + }, + 'date_before_1970': { + 'python': tarantool.Datetime(year=1900, month=1, day=1), + 'msgpack': (b'\x80\x81\x55\x7c\xff\xff\xff\xff'), + 'tarantool': r"datetime.new({year=1900, month=1, day=1})", + }, + 'datetime_with_minutes': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7), + 'msgpack': (b'\x44\xa3\x0f\x63\x00\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7})", + }, + 'datetime_with_seconds': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54), + 'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54})", + }, + 'datetime_with_microseconds': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543), + 'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x18\xfe\x63\x12\x00\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543000})", + }, + 'datetime_with_nanoseconds': { + 'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543, nanosecond=321), + 'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321})", + }, + 'pandas_timestamp': { + 'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54, + microsecond=308543, nanosecond=321), + 'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'), + 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + + r"nsec=308543321})", + }, + } + + def test_msgpack_decode(self): + for name in self.cases.keys(): + with self.subTest(msg=name): + case = self.cases[name] + + self.assertEqual(unpacker_ext_hook(4, case['msgpack']), + case['python']) + + @skip_or_run_datetime_test + def test_tarantool_decode(self): + for name in self.cases.keys(): + with self.subTest(msg=name): + case = self.cases[name] + + self.adm(f"box.space['test']:replace{{'{name}', {case['tarantool']}}}") + + self.assertSequenceEqual(self.con.select('test', name), + [[name, case['python']]]) + + def test_msgpack_encode(self): + for name in self.cases.keys(): + with self.subTest(msg=name): + case = self.cases[name] + + self.assertEqual(packer_default(case['python']), + msgpack.ExtType(code=4, data=case['msgpack'])) + + @skip_or_run_datetime_test + def test_tarantool_encode(self): + for name in self.cases.keys(): + with self.subTest(msg=name): + case = self.cases[name] + + self.con.insert('test', [name, case['python']]) + + lua_eval = f""" + local dt = {case['tarantool']} + + local tuple = box.space['test']:get('{name}') + assert(tuple ~= nil) + + if tuple[2] == dt then + return true + else + return nil, ('%s is not equal to expected %s'):format( + tostring(tuple[2]), tostring(dt)) + end + """ + + self.assertSequenceEqual(self.adm(lua_eval), [True]) + + + @classmethod + def tearDownClass(self): + self.con.close() + self.srv.stop() + self.srv.clean()