-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
msgpack: support datetime extended type
Tarantool supports datetime type since version 2.10.0 [1]. This patch introduced the support of Tarantool datetime type in msgpack decoders and encoders. The Tarantool datetime type is mapped to new tarantool.Datetime type which inherits pandas.Timestamp [2]. pandas.Timestamp was chosen as a base class because it could be used to store both nanoseconds and timezone information. In-build Python datetime.datetime supports microseconds at most, numpy.datetime64 do not support timezones. There are two reasons to use custom type instead of plain pandas.Timestamp: - tzindex may be lost on conversion to pandas.Timestamp - Tarantool datetime interval type is planned to be stored in custom type tarantool.Interval and we'll need a way to support arithmetic between datetime and interval. msgpack encoder supports encoding pandas.Timestamp with tarantool.Datetime tools, but it always decodes to tarantool.Datetime. If you plan to work with tarantool datetimes, please stick to the tarantool.Datetime object rather than pure pandas.Timestamp. You can create tarantool.Datetime from pandas.Timestamp or by using the same API as in plain pandas.Timestamp. If you used numpy.datetime64 or datetime.datetime in your logic, you can use pandas.to_datetime64() and pandas.to_datetime() converters. This patch does not yet introduce the support of timezones in datetime. 1. tarantool/tarantool#5941 2. https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html Part of #204
- Loading branch information
1 parent
c70dfa6
commit 22442a0
Showing
11 changed files
with
304 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
msgpack>=1.0.4 | ||
pandas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from tarantool.msgpack_ext.types.datetime import Datetime | ||
|
||
EXT_ID = 4 | ||
|
||
def encode(obj): | ||
return Datetime.msgpack_encode(obj) | ||
|
||
def decode(data): | ||
return Datetime(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import pandas | ||
|
||
# https://www.tarantool.io/ru/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type | ||
# | ||
# The datetime MessagePack representation looks like this: | ||
# +---------+----------------+==========+-----------------+ | ||
# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | | ||
# | = d7/d8 | = 4 | | tzindex; | | ||
# +---------+----------------+==========+-----------------+ | ||
# MessagePack data contains: | ||
# | ||
# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the | ||
# little-endian order. | ||
# * The optional fields (8 bytes), if any of them have a non-zero value. | ||
# The fields include nsec (4 bytes), tzoffset (2 bytes), and | ||
# tzindex (2 bytes) packed in the little-endian order. | ||
# | ||
# seconds is seconds since Epoch, where the epoch is the point where the time | ||
# starts, and is platform dependent. For Unix, the epoch is January 1, | ||
# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure | ||
# definition in src/lib/core/datetime.h and reasons in | ||
# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c | ||
# | ||
# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see | ||
# a definition in src/lib/core/datetime.h. | ||
# | ||
# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type, | ||
# see a structure definition in src/lib/core/datetime.h. | ||
# | ||
# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure | ||
# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are | ||
# specified, tzindex has the preference and the tzoffset value is ignored. | ||
|
||
SECONDS_SIZE_BYTES = 8 | ||
NSEC_SIZE_BYTES = 4 | ||
TZOFFSET_SIZE_BYTES = 2 | ||
TZINDEX_SIZE_BYTES = 2 | ||
|
||
BYTEORDER = 'little' | ||
|
||
NSEC_IN_SEC = 1000000000 | ||
|
||
|
||
def get_bytes_as_int(data, cursor, size): | ||
part = data[cursor:cursor + size] | ||
return int.from_bytes(part, BYTEORDER, signed=True), cursor + size | ||
|
||
def get_int_as_bytes(data, size): | ||
return data.to_bytes(size, byteorder=BYTEORDER, signed=True) | ||
|
||
def msgpack_decode(data): | ||
cursor = 0 | ||
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) | ||
|
||
if len(data) > SECONDS_SIZE_BYTES: | ||
nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) | ||
tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) | ||
tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) | ||
else: | ||
nsec = 0 | ||
tzoffset = 0 | ||
tzindex = 0 | ||
|
||
if (tzoffset != 0) or (tzindex != 0): | ||
raise NotImplementedError | ||
|
||
total_nsec = seconds * NSEC_IN_SEC + nsec | ||
|
||
dt = pandas.to_datetime(total_nsec, unit='ns') | ||
return dt, tzoffset, tzindex | ||
|
||
class Datetime(pandas.Timestamp): | ||
def __new__(cls, *args, **kwargs): | ||
if len(args) > 0 and isinstance(args[0], bytes): | ||
dt, tzoffset, tzindex = msgpack_decode(args[0]) | ||
else: | ||
dt = super().__new__(cls, *args, **kwargs) | ||
|
||
dt.__class__ = cls | ||
return dt | ||
|
||
def msgpack_encode(self): | ||
seconds = self.value // NSEC_IN_SEC | ||
nsec = self.value % NSEC_IN_SEC | ||
tzoffset = 0 | ||
tzindex = 0 | ||
|
||
buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) | ||
|
||
if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): | ||
buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) | ||
buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) | ||
buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) | ||
|
||
return buf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import print_function | ||
|
||
import sys | ||
import unittest | ||
import msgpack | ||
import warnings | ||
import tarantool | ||
import pandas | ||
|
||
from tarantool.msgpack_ext.packer import default as packer_default | ||
from tarantool.msgpack_ext.unpacker import ext_hook as unpacker_ext_hook | ||
|
||
from .lib.tarantool_server import TarantoolServer | ||
from .lib.skip import skip_or_run_datetime_test | ||
from tarantool.error import MsgpackError, MsgpackWarning | ||
|
||
class TestSuite_Datetime(unittest.TestCase): | ||
@classmethod | ||
def setUpClass(self): | ||
print(' DATETIME EXT TYPE '.center(70, '='), file=sys.stderr) | ||
print('-' * 70, file=sys.stderr) | ||
self.srv = TarantoolServer() | ||
self.srv.script = 'test/suites/box.lua' | ||
self.srv.start() | ||
|
||
self.adm = self.srv.admin | ||
self.adm(r""" | ||
_, datetime = pcall(require, 'datetime') | ||
box.schema.space.create('test') | ||
box.space['test']:create_index('primary', { | ||
type = 'tree', | ||
parts = {1, 'string'}, | ||
unique = true}) | ||
box.schema.user.create('test', {password = 'test', if_not_exists = true}) | ||
box.schema.user.grant('test', 'read,write,execute', 'universe') | ||
""") | ||
|
||
self.con = tarantool.Connection(self.srv.host, self.srv.args['primary'], | ||
user='test', password='test') | ||
|
||
def setUp(self): | ||
# prevent a remote tarantool from clean our session | ||
if self.srv.is_started(): | ||
self.srv.touch_lock() | ||
|
||
self.adm("box.space['test']:truncate()") | ||
|
||
|
||
cases = { | ||
'date': { | ||
'python': tarantool.Datetime(year=2022, month=8, day=31), | ||
'msgpack': (b'\x80\xa4\x0e\x63\x00\x00\x00\x00'), | ||
'tarantool': r"datetime.new({year=2022, month=8, day=31})", | ||
}, | ||
'date_unix_start': { | ||
'python': tarantool.Datetime(year=1970, month=1, day=1), | ||
'msgpack': (b'\x00\x00\x00\x00\x00\x00\x00\x00'), | ||
'tarantool': r"datetime.new({year=1970, month=1, day=1})", | ||
}, | ||
'date_before_1970': { | ||
'python': tarantool.Datetime(year=1900, month=1, day=1), | ||
'msgpack': (b'\x80\x81\x55\x7c\xff\xff\xff\xff'), | ||
'tarantool': r"datetime.new({year=1900, month=1, day=1})", | ||
}, | ||
'datetime_with_minutes': { | ||
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7), | ||
'msgpack': (b'\x44\xa3\x0f\x63\x00\x00\x00\x00'), | ||
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7})", | ||
}, | ||
'datetime_with_seconds': { | ||
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54), | ||
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00'), | ||
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54})", | ||
}, | ||
'datetime_with_microseconds': { | ||
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54, | ||
microsecond=308543), | ||
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x18\xfe\x63\x12\x00\x00\x00\x00'), | ||
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + | ||
r"nsec=308543000})", | ||
}, | ||
'datetime_with_nanoseconds': { | ||
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54, | ||
microsecond=308543, nanosecond=321), | ||
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'), | ||
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + | ||
r"nsec=308543321})", | ||
}, | ||
'pandas_timestamp': { | ||
'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54, | ||
microsecond=308543, nanosecond=321), | ||
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'), | ||
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + | ||
r"nsec=308543321})", | ||
}, | ||
} | ||
|
||
def test_msgpack_decode(self): | ||
for name in self.cases.keys(): | ||
with self.subTest(msg=name): | ||
case = self.cases[name] | ||
|
||
self.assertEqual(unpacker_ext_hook(4, case['msgpack']), | ||
case['python']) | ||
|
||
@skip_or_run_datetime_test | ||
def test_tarantool_decode(self): | ||
for name in self.cases.keys(): | ||
with self.subTest(msg=name): | ||
case = self.cases[name] | ||
|
||
self.adm(f"box.space['test']:replace{{'{name}', {case['tarantool']}}}") | ||
|
||
self.assertSequenceEqual(self.con.select('test', name), | ||
[[name, case['python']]]) | ||
|
||
def test_msgpack_encode(self): | ||
for name in self.cases.keys(): | ||
with self.subTest(msg=name): | ||
case = self.cases[name] | ||
|
||
self.assertEqual(packer_default(case['python']), | ||
msgpack.ExtType(code=4, data=case['msgpack'])) | ||
|
||
@skip_or_run_datetime_test | ||
def test_tarantool_encode(self): | ||
for name in self.cases.keys(): | ||
with self.subTest(msg=name): | ||
case = self.cases[name] | ||
|
||
self.con.insert('test', [name, case['python']]) | ||
|
||
lua_eval = f""" | ||
local dt = {case['tarantool']} | ||
local tuple = box.space['test']:get('{name}') | ||
assert(tuple ~= nil) | ||
if tuple[2] == dt then | ||
return true | ||
else | ||
return nil, ('%s is not equal to expected %s'):format( | ||
tostring(tuple[2]), tostring(dt)) | ||
end | ||
""" | ||
|
||
self.assertSequenceEqual(self.adm(lua_eval), [True]) | ||
|
||
|
||
@classmethod | ||
def tearDownClass(self): | ||
self.con.close() | ||
self.srv.stop() | ||
self.srv.clean() |