Skip to content

Commit

Permalink
msgpack: support datetime extended type
Browse files Browse the repository at this point in the history
Tarantool supports datetime type since version 2.10.0 [1]. This patch
introduced the support of Tarantool datetime type in msgpack decoders
and encoders.

The Tarantool datetime type is mapped to new tarantool.Datetime type
which inherits pandas.Timestamp [2]. pandas.Timestamp was chosen as a
base class because it could be used to store both nanoseconds and
timezone information. In-build Python datetime.datetime supports
microseconds at most, numpy.datetime64 do not support timezones. There
are two reasons to use custom type instead of plain pandas.Timestamp:
- tzindex may be lost on conversion to pandas.Timestamp
- Tarantool datetime interval type is planned to be stored in custom
  type tarantool.Interval and we'll need a way to support arithmetic
  between datetime and interval.

msgpack encoder supports encoding pandas.Timestamp with
tarantool.Datetime tools, but it always decodes to tarantool.Datetime.
If you plan to work with tarantool datetimes, please stick to the
tarantool.Datetime object rather than pure pandas.Timestamp. You can
create tarantool.Datetime from pandas.Timestamp or by using the same
API as in plain pandas.Timestamp. If you used numpy.datetime64 or
datetime.datetime in your logic, you can use pandas.to_datetime64() and
pandas.to_datetime() converters.

This patch does not yet introduce the support of timezones in datetime.

1. tarantool/tarantool#5941
2. https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html

Part of #204
  • Loading branch information
DifferentialOrange committed Sep 7, 2022
1 parent 51ece90 commit 1b6541f
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Decimal type support (#203).
- UUID type support (#202).
- Datetime type support and tarantool.Datetime type (#204).

### Changed
- Bump msgpack requirement to 1.0.4 (PR #223).
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
msgpack>=1.0.4
pandas
6 changes: 5 additions & 1 deletion tarantool/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
ENCODING_DEFAULT,
)

from tarantool.msgpack_ext.types.datetime import (
Datetime,
)

__version__ = "0.9.0"


Expand Down Expand Up @@ -91,7 +95,7 @@ def connectmesh(addrs=({'host': 'localhost', 'port': 3301},), user=None,

__all__ = ['connect', 'Connection', 'connectmesh', 'MeshConnection', 'Schema',
'Error', 'DatabaseError', 'NetworkError', 'NetworkWarning',
'SchemaError', 'dbapi']
'SchemaError', 'dbapi', 'Datetime']

# ConnectionPool is supported only for Python 3.7 or newer.
if sys.version_info.major >= 3 and sys.version_info.minor >= 7:
Expand Down
10 changes: 10 additions & 0 deletions tarantool/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ class MsgpackWarning(UserWarning):
Warning with encoding or decoding of MP_EXT types
'''

class ExtTypeError(ValueError):
'''
Error related to tarantool.Datetime type
'''

class ExtTypeWarning(UserWarning):
'''
Warning related to tarantool.Datetime type
'''

__all__ = ("Warning", "Error", "InterfaceError", "DatabaseError", "DataError",
"OperationalError", "IntegrityError", "InternalError",
"ProgrammingError", "NotSupportedError", "MsgpackError",
Expand Down
9 changes: 9 additions & 0 deletions tarantool/msgpack_ext/datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from tarantool.msgpack_ext.types.datetime import Datetime

EXT_ID = 4

def encode(obj):
return Datetime.msgpack_encode(obj)

def decode(data):
return Datetime(data)
10 changes: 8 additions & 2 deletions tarantool/msgpack_ext/packer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from decimal import Decimal
from uuid import UUID
from msgpack import ExtType
import pandas

from tarantool.msgpack_ext.types.datetime import Datetime

import tarantool.msgpack_ext.decimal as ext_decimal
import tarantool.msgpack_ext.uuid as ext_uuid
import tarantool.msgpack_ext.datetime as ext_datetime

encoders = [
{'type': Decimal, 'ext': ext_decimal},
{'type': UUID, 'ext': ext_uuid },
{'type': Decimal, 'ext': ext_decimal },
{'type': UUID, 'ext': ext_uuid },
{'type': Datetime, 'ext': ext_datetime},
{'type': pandas.Timestamp, 'ext': ext_datetime},
]

def default(obj):
Expand Down
95 changes: 95 additions & 0 deletions tarantool/msgpack_ext/types/datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import pandas

# https://www.tarantool.io/ru/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
#
# The datetime MessagePack representation looks like this:
# +---------+----------------+==========+-----------------+
# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; |
# | = d7/d8 | = 4 | | tzindex; |
# +---------+----------------+==========+-----------------+
# MessagePack data contains:
#
# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the
# little-endian order.
# * The optional fields (8 bytes), if any of them have a non-zero value.
# The fields include nsec (4 bytes), tzoffset (2 bytes), and
# tzindex (2 bytes) packed in the little-endian order.
#
# seconds is seconds since Epoch, where the epoch is the point where the time
# starts, and is platform dependent. For Unix, the epoch is January 1,
# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure
# definition in src/lib/core/datetime.h and reasons in
# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c
#
# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see
# a definition in src/lib/core/datetime.h.
#
# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type,
# see a structure definition in src/lib/core/datetime.h.
#
# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure
# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are
# specified, tzindex has the preference and the tzoffset value is ignored.

SECONDS_SIZE_BYTES = 8
NSEC_SIZE_BYTES = 4
TZOFFSET_SIZE_BYTES = 2
TZINDEX_SIZE_BYTES = 2

BYTEORDER = 'little'

NSEC_IN_SEC = 1000000000


def get_bytes_as_int(data, cursor, size):
part = data[cursor:cursor + size]
return int.from_bytes(part, BYTEORDER, signed=True), cursor + size

def get_int_as_bytes(data, size):
return data.to_bytes(size, byteorder=BYTEORDER, signed=True)

def msgpack_decode(data):
cursor = 0
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)

if len(data) > SECONDS_SIZE_BYTES:
nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES)
tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES)
tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES)
else:
nsec = 0
tzoffset = 0
tzindex = 0

if (tzoffset != 0) or (tzindex != 0):
raise NotImplementedError

total_nsec = seconds * NSEC_IN_SEC + nsec

dt = pandas.to_datetime(total_nsec, unit='ns')
return dt, tzoffset, tzindex

class Datetime(pandas.Timestamp):
def __new__(cls, *args, **kwargs):
if len(args) > 0 and isinstance(args[0], bytes):
dt, tzoffset, tzindex = msgpack_decode(args[0])
else:
dt = super().__new__(cls, *args, **kwargs)

dt.__class__ = cls
return dt

def msgpack_encode(self):
seconds = self.value // NSEC_IN_SEC
nsec = self.value % NSEC_IN_SEC
tzoffset = 0
tzindex = 0

buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)

if (nsec != 0) or (tzoffset != 0) or (tzindex != 0):
buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES)
buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES)
buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES)

return buf
6 changes: 4 additions & 2 deletions tarantool/msgpack_ext/unpacker.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import tarantool.msgpack_ext.decimal as ext_decimal
import tarantool.msgpack_ext.uuid as ext_uuid
import tarantool.msgpack_ext.datetime as ext_datetime

decoders = {
ext_decimal.EXT_ID: ext_decimal.decode,
ext_uuid.EXT_ID : ext_uuid.decode ,
ext_decimal.EXT_ID : ext_decimal.decode ,
ext_uuid.EXT_ID : ext_uuid.decode ,
ext_datetime.EXT_ID: ext_datetime.decode,
}

def ext_hook(code, data):
Expand Down
3 changes: 2 additions & 1 deletion test/suites/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
from .test_ssl import TestSuite_Ssl
from .test_decimal import TestSuite_Decimal
from .test_uuid import TestSuite_UUID
from .test_datetime import TestSuite_Datetime

test_cases = (TestSuite_Schema_UnicodeConnection,
TestSuite_Schema_BinaryConnection,
TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect,
TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI,
TestSuite_Encoding, TestSuite_Pool, TestSuite_Ssl,
TestSuite_Decimal, TestSuite_UUID)
TestSuite_Decimal, TestSuite_UUID, TestSuite_Datetime)

def load_tests(loader, tests, pattern):
suite = unittest.TestSuite()
Expand Down
11 changes: 11 additions & 0 deletions test/suites/lib/skip.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,14 @@ def skip_or_run_UUID_test(func):

return skip_or_run_test_tarantool(func, '2.4.1',
'does not support UUID type')

def skip_or_run_datetime_test(func):
"""Decorator to skip or run datetime-related tests depending on
the tarantool version.
Tarantool supports datetime type only since 2.10.0 version.
See https://github.com/tarantool/tarantool/issues/5941
"""

return skip_or_run_test_pcall_require(func, 'datetime',
'does not support datetime type')
158 changes: 158 additions & 0 deletions test/suites/test_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# -*- coding: utf-8 -*-

from __future__ import print_function

import sys
import unittest
import msgpack
import warnings
import tarantool
import pandas

from tarantool.msgpack_ext.packer import default as packer_default
from tarantool.msgpack_ext.unpacker import ext_hook as unpacker_ext_hook

from .lib.tarantool_server import TarantoolServer
from .lib.skip import skip_or_run_datetime_test
from tarantool.error import MsgpackError, MsgpackWarning

class TestSuite_Datetime(unittest.TestCase):
@classmethod
def setUpClass(self):
print(' DATETIME EXT TYPE '.center(70, '='), file=sys.stderr)
print('-' * 70, file=sys.stderr)
self.srv = TarantoolServer()
self.srv.script = 'test/suites/box.lua'
self.srv.start()

self.adm = self.srv.admin
self.adm(r"""
_, datetime = pcall(require, 'datetime')
box.schema.space.create('test')
box.space['test']:create_index('primary', {
type = 'tree',
parts = {1, 'string'},
unique = true})
box.schema.user.create('test', {password = 'test', if_not_exists = true})
box.schema.user.grant('test', 'read,write,execute', 'universe')
""")

self.con = tarantool.Connection(self.srv.host, self.srv.args['primary'],
user='test', password='test')

def setUp(self):
# prevent a remote tarantool from clean our session
if self.srv.is_started():
self.srv.touch_lock()

self.adm("box.space['test']:truncate()")


cases = {
'date': {
'python': tarantool.Datetime(year=2022, month=8, day=31),
'msgpack': (b'\x80\xa4\x0e\x63\x00\x00\x00\x00'),
'tarantool': r"datetime.new({year=2022, month=8, day=31})",
},
'date_unix_start': {
'python': tarantool.Datetime(year=1970, month=1, day=1),
'msgpack': (b'\x00\x00\x00\x00\x00\x00\x00\x00'),
'tarantool': r"datetime.new({year=1970, month=1, day=1})",
},
'date_before_1970': {
'python': tarantool.Datetime(year=1900, month=1, day=1),
'msgpack': (b'\x80\x81\x55\x7c\xff\xff\xff\xff'),
'tarantool': r"datetime.new({year=1900, month=1, day=1})",
},
'datetime_with_minutes': {
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7),
'msgpack': (b'\x44\xa3\x0f\x63\x00\x00\x00\x00'),
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7})",
},
'datetime_with_seconds': {
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54),
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00'),
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54})",
},
'datetime_with_microseconds': {
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54,
microsecond=308543),
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x18\xfe\x63\x12\x00\x00\x00\x00'),
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
r"nsec=308543000})",
},
'datetime_with_nanoseconds': {
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54,
microsecond=308543, nanosecond=321),
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'),
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
r"nsec=308543321})",
},
'pandas_timestamp': {
'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54,
microsecond=308543, nanosecond=321),
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'),
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
r"nsec=308543321})",
},
}

def test_msgpack_decode(self):
for name in self.cases.keys():
with self.subTest(msg=name):
case = self.cases[name]

self.assertEqual(unpacker_ext_hook(4, case['msgpack']),
case['python'])

@skip_or_run_datetime_test
def test_tarantool_decode(self):
for name in self.cases.keys():
with self.subTest(msg=name):
case = self.cases[name]

self.adm(f"box.space['test']:replace{{'{name}', {case['tarantool']}}}")

self.assertSequenceEqual(self.con.select('test', name),
[[name, case['python']]])

def test_msgpack_encode(self):
for name in self.cases.keys():
with self.subTest(msg=name):
case = self.cases[name]

self.assertEqual(packer_default(case['python']),
msgpack.ExtType(code=4, data=case['msgpack']))

@skip_or_run_datetime_test
def test_tarantool_encode(self):
for name in self.cases.keys():
with self.subTest(msg=name):
case = self.cases[name]

self.con.insert('test', [name, case['python']])

lua_eval = f"""
local dt = {case['tarantool']}
local tuple = box.space['test']:get('{name}')
assert(tuple ~= nil)
if tuple[2] == dt then
return true
else
return nil, ('%s is not equal to expected %s'):format(
tostring(tuple[2]), tostring(dt))
end
"""

self.assertSequenceEqual(self.adm(lua_eval), [True])


@classmethod
def tearDownClass(self):
self.con.close()
self.srv.stop()
self.srv.clean()

0 comments on commit 1b6541f

Please sign in to comment.