Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#139 use append for more types #161

Merged
merged 9 commits into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include monetdbe/_cffi/embed.h.j2
include monetdbe/_cffi/embed.h.j2
include monetdbe/_cffi/native_utilities.c
16 changes: 11 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ venv/:
python3 -m venv venv
venv/bin/pip install --upgrade pip wheel build setuptools

venv/installed: venv/
venv/bin/pytest: venv/
venv/bin/pip install -e ".[test]"
touch venv/installed
touch venv/bin/pytest

setup: venv/installed
setup: venv/bin/pytest

build: venv/installed
build: venv/bin/pytest
venv/bin/pyproject-build

test: setup
test: venv/bin/pytest
venv/bin/pytest

docker-wheels:
Expand Down Expand Up @@ -91,6 +91,12 @@ twine: venv/bin/twine
info: setup
venv/bin/python -c "from monetdbe._cffi.util import print_info; print_info()"

venv/bin/ipython: venv/
venv/bin/pip install ipython

ipython: venv/bin/ipython
venv/bin/ipython

venv38/:
/opt/homebrew/Cellar/[email protected]/3.8.*/bin/python3.8 -m venv venv38

Expand Down
9 changes: 3 additions & 6 deletions monetdbe/_cffi/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,12 @@
default = monetdb_branch.lower() in ("default", "jul2021", "jan2022")
win32 = platform == 'win32'


source = """
#include "monetdb/monetdbe.h"
"""

with open(Path(__file__).resolve().parent / "native_utilities.c") as f:
source = f.read()

# the ffibuilder object needs to exist and be configured in the module namespace so setup.py can reach it
ffibuilder = FFI()
ffibuilder.set_source("monetdbe._lowlevel", source, libraries=['monetdbe'])
ffibuilder.set_source("monetdbe._lowlevel", source=source, libraries=['monetdbe'])
embed_path = str(Path(__file__).resolve().parent / 'embed.h.j2')


Expand Down
54 changes: 44 additions & 10 deletions monetdbe/_cffi/convert/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from decimal import Decimal
from typing import List, Optional, Callable, Union, Any, Mapping
from typing import NamedTuple
from typing import List, Optional, Callable, Union, Any, Mapping, NamedTuple
import logging

import numpy as np

Expand All @@ -9,6 +9,11 @@
from monetdbe.converters import converters
from monetdbe.exceptions import ProgrammingError
from monetdbe.pythonize import py_date, py_time, py_timestamp
from monetdbe._cffi.branch import newer_then_jul2021

from monetdbe.types import supported_numpy_types

_logger = logging.getLogger()


def make_string(blob: char_p) -> str:
Expand Down Expand Up @@ -46,34 +51,63 @@ class MonetdbTypeInfo(NamedTuple):
null_value: Optional[Union[int, float]]


# monetdb C type, SQL type, numpy type, Cstringtype, pyconverter, null value, comment
type_infos: List[MonetdbTypeInfo] = [
inversable_type_infos: List[MonetdbTypeInfo] = [
MonetdbTypeInfo(lib.monetdbe_bool, "boolean", np.dtype(np.bool_), "bool", None, None),
MonetdbTypeInfo(lib.monetdbe_int8_t, "tinyint", np.dtype(np.int8), "int8_t", None, np.iinfo(np.int8).min), # type: ignore
MonetdbTypeInfo(lib.monetdbe_int16_t, "smallint", np.dtype(np.int16), "int16_t", None, np.iinfo(np.int16).min), # type: ignore
MonetdbTypeInfo(lib.monetdbe_int32_t, "int", np.dtype(np.int32), "int32_t", None, np.iinfo(np.int32).min), # type: ignore
MonetdbTypeInfo(lib.monetdbe_int64_t, "bigint", np.dtype(np.int64), "int64_t", None, np.iinfo(np.int64).min), # type: ignore
MonetdbTypeInfo(lib.monetdbe_size_t, None, np.dtype(np.uint64), "size_t", None, None), # used by monetdb internally
MonetdbTypeInfo(lib.monetdbe_float, "real", np.dtype(np.float32), "float", py_float, np.finfo(np.float32).min),
MonetdbTypeInfo(lib.monetdbe_double, "float", np.dtype(np.float64), "double", py_float, np.finfo(np.float64).min),
]

# things that can have a mapping from numpy to monetdb but not back
numpy_to_monetdb_type_infos: List[MonetdbTypeInfo] = [
MonetdbTypeInfo(lib.monetdbe_int8_t, "tinyint", np.dtype(np.uint8), "int8_t", None, None),
MonetdbTypeInfo(lib.monetdbe_int16_t, "smallint", np.dtype(np.uint16), "int16_t", None, None),
MonetdbTypeInfo(lib.monetdbe_int32_t, "int", np.dtype(np.uint32), "int32_t", None, None),
MonetdbTypeInfo(lib.monetdbe_int64_t, "bigint", np.dtype(np.uint64), "int64_t", None, None),
]

# things that can have a mapping from monetdb to numpy but not back
monetdb_to_numpy_type_infos: List[MonetdbTypeInfo] = [
MonetdbTypeInfo(lib.monetdbe_str, "string", np.dtype('=O'), "str", make_string, None),
MonetdbTypeInfo(lib.monetdbe_blob, "blob", np.dtype('=O'), "blob", make_blob, None),
MonetdbTypeInfo(lib.monetdbe_date, "date", np.dtype('=O'), "date", py_date, None),
MonetdbTypeInfo(lib.monetdbe_time, "time", np.dtype('=O'), "time", py_time, None),
MonetdbTypeInfo(lib.monetdbe_timestamp, "timestamp", np.dtype('=O'), "timestamp", py_timestamp, None),
]

numpy_type_map: Mapping[np.dtype, MonetdbTypeInfo] = {i.numpy_type: i for i in type_infos}
monet_c_type_map: Mapping[int, MonetdbTypeInfo] = {i.c_type: i for i in type_infos}
numpy_type_map: Mapping[np.dtype, MonetdbTypeInfo] = {i.numpy_type: i for i in
inversable_type_infos + numpy_to_monetdb_type_infos}
monet_c_type_map: Mapping[int, MonetdbTypeInfo] = {i.c_type: i for i in
inversable_type_infos + monetdb_to_numpy_type_infos}


def precision_warning(from_: int, to: int):
if from_ == lib.monetdbe_int64_t and to in (lib.monetdbe_int32_t, lib.monetdbe_int16_t, lib.monetdbe_int8_t):
_logger.warning("appending 64-bit data to lower bit column, potential loss of precision")
elif from_ == lib.monetdbe_int32_t and to in (lib.monetdbe_int16_t, lib.monetdbe_int8_t):
_logger.warning("appending 32-bit data to lower bit column, potential loss of precision")
elif from_ == lib.monetdbe_int16_t and to == lib.monetdbe_int8_t:
_logger.warning("appending 16-bit data to 8-bit column, potential loss of precision")
elif from_ in (lib.monetdbe_float, lib.monetdbe_double) and \
to in (lib.monetdbe_int64_t, lib.monetdbe_int32_t, lib.monetdbe_int16_t, lib.monetdbe_int8_t):
_logger.warning("appending float values to int column")


def numpy_monetdb_map(numpy_type: np.dtype):
if numpy_type.kind in ('i', 'f'): # type: ignore
if numpy_type.kind == 'U':
# this is an odd one, the numpy type string includes the width. Also, we don't format
# monetdb string columns as fixed width numpy columns yet, so technically this type is
# non-reversable for now.
return MonetdbTypeInfo(lib.monetdbe_str, "string", numpy_type, "char *", None, None)

if numpy_type.kind in supported_numpy_types: # type: ignore
return numpy_type_map[numpy_type]
raise ProgrammingError("append() only support int and float family types")
raise ProgrammingError(f"append() called with unsupported type {numpy_type}")


from monetdbe._cffi.branch import newer_then_jul2021
if newer_then_jul2021:
def extract(rcol: monetdbe_column, r: int, text_factory: Optional[Callable[[str], Any]] = None):
"""
Expand Down
2 changes: 2 additions & 0 deletions monetdbe/_cffi/embed.h.j2
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,5 @@ extern char* monetdbe_get_columns(monetdbe_database dbhdl, const char* schema_na

extern char* monetdbe_dump_database(monetdbe_database dbhdl, const char *backupfile);
extern char* monetdbe_dump_table(monetdbe_database dbhdl, const char *schema_name, const char *table_name, const char *backupfile);

extern void initialize_string_array_from_numpy(char** restrict output, size_t size, char* restrict numpy_string_input, size_t stride_length);
60 changes: 41 additions & 19 deletions monetdbe/_cffi/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@

from monetdbe._lowlevel import ffi, lib
from monetdbe import exceptions
from monetdbe._cffi.convert import make_string, monet_c_type_map, extract, numpy_monetdb_map
from monetdbe._cffi.convert.bind import prepare_bind
from monetdbe._cffi.convert.bind import monetdbe_decimal_to_bte, monetdbe_decimal_to_sht, monetdbe_decimal_to_int, monetdbe_decimal_to_lng

from monetdbe._cffi.convert import make_string, monet_c_type_map, extract, numpy_monetdb_map, precision_warning
from monetdbe._cffi.convert.bind import monetdbe_decimal_to_bte, monetdbe_decimal_to_sht, monetdbe_decimal_to_int, monetdbe_decimal_to_lng, prepare_bind
from monetdbe._cffi.errors import check_error
from monetdbe._cffi.types_ import monetdbe_result, monetdbe_database, monetdbe_column, monetdbe_statement

Expand Down Expand Up @@ -74,15 +72,15 @@ def get_autocommit() -> bool:
def bind(statement: monetdbe_statement, data: Any, parameter_nr: int, type_info=None) -> None:
try:
_type_info = type_info[parameter_nr]
if (_type_info.sql_type == 'decimal'):
if _type_info.sql_type == 'decimal':
d = int(Decimal(data) * (Decimal(10) ** _type_info.scale))
if (_type_info.impl_type == 'bte'):
if _type_info.impl_type == 'bte':
prepared = monetdbe_decimal_to_bte(d)
elif (_type_info.impl_type == 'sht'):
elif _type_info.impl_type == 'sht':
prepared = monetdbe_decimal_to_sht(d)
elif (_type_info.impl_type == 'int'):
elif _type_info.impl_type == 'int':
prepared = monetdbe_decimal_to_int(d)
elif (_type_info.impl_type == 'lng'):
elif _type_info.impl_type == 'lng':
prepared = monetdbe_decimal_to_lng(d)
else:
raise NotImplementedError("Unknown decimal implementation type")
Expand Down Expand Up @@ -250,6 +248,7 @@ def append(self, table: str, data: Mapping[str, np.ndarray], schema: str = 'sys'
"""
Directly append an array structure
"""

self._switch()
n_columns = len(data)
existing_columns = list(self.get_columns(schema=schema, table=table))
Expand All @@ -260,24 +259,47 @@ def append(self, table: str, data: Mapping[str, np.ndarray], schema: str = 'sys'
raise exceptions.ProgrammingError(error)

work_columns = ffi.new(f'monetdbe_column * [{n_columns}]')
work_objs = []
# cffi_objects assists to keep all in-memory native data structure alive during the execution of this call
cffi_objects = []
for column_num, (column_name, existing_type) in enumerate(existing_columns):
column_values = data[column_name]
work_column = ffi.new('monetdbe_column *')
type_info = numpy_monetdb_map(column_values.dtype)
if not type_info.c_type == existing_type:
existing_type_string = monet_c_type_map[existing_type].c_string_type
error = f"Type '{type_info.c_string_type}' for appended column '{column_name}' " \
f"does not match table type '{existing_type_string}'"
raise exceptions.ProgrammingError(error)

# try to convert the values if types don't match
if type_info.c_type != existing_type:
precision_warning(type_info.c_type, existing_type)
to_numpy_type = monet_c_type_map[existing_type].numpy_type
try:
column_values = column_values.astype(to_numpy_type)
type_info = numpy_monetdb_map(column_values.dtype)
except Exception as e:
existing_type_string = monet_c_type_map[existing_type].c_string_type
error = f"Can't convert '{type_info.c_string_type}' " \
f"to type '{existing_type_string}' for column '{column_name}': {e} "
raise ValueError(error)

work_column.type = type_info.c_type
work_column.count = column_values.shape[0]
work_column.name = ffi.new('char[]', column_name.encode())
work_column.data = ffi.cast(f"{type_info.c_string_type} *", ffi.from_buffer(column_values))
if type_info.numpy_type.kind == 'U':
# first massage the numpy array of unicode into a matrix of null terminated rows of bytes.
v = np.char.encode(column_values).view('b').reshape((work_column.count, -1))
v = np.c_[v, np.zeros(work_column.count, dtype=np.int8)]
stride_length = v.shape[1]
cffi_objects.append(v)
t = ffi.new('char*[]', work_column.count)
cffi_objects.append(t)
p = ffi.from_buffer("char*", v)
cffi_objects.append(p)
lib.initialize_string_array_from_numpy(t, work_column.count, p, stride_length)
work_column.data = t
else:
work_column.data = ffi.from_buffer(f"{type_info.c_string_type}*", column_values)
work_columns[column_num] = work_column
work_objs.append(work_column)
check_error(
lib.monetdbe_append(self._monetdbe_database, schema.encode(), table.encode(), work_columns, n_columns))
cffi_objects.append(work_column)
check_error(lib.monetdbe_append(self._monetdbe_database, schema.encode(),
table.encode(), work_columns, n_columns))

def prepare(self, query: str) -> monetdbe_statement:
self._switch()
Expand Down
8 changes: 8 additions & 0 deletions monetdbe/_cffi/native_utilities.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include "monetdb/monetdbe.h"

#include <stddef.h>
void initialize_string_array_from_numpy(char** restrict output, size_t size, char* restrict numpy_string_input, size_t stride_length) {
for (size_t i = 0; i < size; i++) {
output[i] = numpy_string_input + i*stride_length;
}
}
7 changes: 4 additions & 3 deletions monetdbe/cursors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from monetdbe.connection import Connection, Description
from monetdbe.exceptions import ProgrammingError, InterfaceError
from monetdbe.formatting import format_query, strip_split_and_clean, parameters_type
from monetdbe.monetize import monet_identifier_escape, convert
from monetdbe.monetize import monet_identifier_escape
from monetdbe.types import supported_numpy_types

if TYPE_CHECKING:
from monetdbe.row import Row
Expand Down Expand Up @@ -285,7 +286,7 @@ def _insert_slow(self, table: str, data: Dict[str, np.ndarray], schema: str = 's
query = f"insert into {schema}.{table} ({columns}) values ({qmarks})"
return self.executemany(query, rows_zipped)

def insert(self, table: str, values: Union[pd.DataFrame, Dict[str, np.ndarray]], schema: str = 'sys'):
def insert(self, table: str, values: Union[pd.DataFrame, Mapping[str, np.ndarray]], schema: str = 'sys'):
"""
Inserts a set of values into the specified table.

Expand All @@ -303,7 +304,7 @@ def insert(self, table: str, values: Union[pd.DataFrame, Dict[str, np.ndarray]],
if not isinstance(value, (np.ma.core.MaskedArray, np.ndarray)): # type: ignore
prepared[key] = np.array(value)

if sum(i.dtype.kind not in 'if' for i in prepared.values()): # type: ignore
if sum(i.dtype.kind not in supported_numpy_types for i in prepared.values()): # type: ignore
warn(
"One of the columns you are inserting is not of type int or float which fast append doesn't support. Falling back to regular insert.")
return self._insert_slow(table, prepared, schema)
Expand Down
1 change: 1 addition & 0 deletions monetdbe/monetize.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def monet_datetime(data: Any) -> str:
(datetime.date, monet_escape),
(datetime.timedelta, monet_escape),
(bool, monet_bool),
(np.bool_, monet_bool),
(type(None), monet_none),
(np.int64, int),
(np.int32, int),
Expand Down
14 changes: 14 additions & 0 deletions monetdbe/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,17 @@
BINARY_LARGE_OBJECT = BLOB
NUMERIC = DECIMAL
DOUBLE_PRECISION = DOUBLE

supported_numpy_types: str = (
'b' # boolean
'i' # signed integer
'u' # unsigned integer
'f' # floating-point
# 'M' # datetime
'U' # Unicode
# c complex floating-point
# m timedelta
# O object
# S (byte-)string
# V void
)
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[build-system]
requires = ["setuptools >= 40.6.0", "wheel", 'cffi']
build-backend = "setuptools.build_meta"

[mypy]
plugins = "numpy.typing.mypy_plugin"
34 changes: 25 additions & 9 deletions tests/test_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,50 @@ def test_append_too_little_columns(self):
con._internal.append(table='test', data=data)

def test_append_wrong_type(self):
"""
we now convert this automatically, so should not raise error
"""
with connect() as con:
con.execute("CREATE TABLE test (i int)")
data = {'i': np.array([0.1, 0.2, 0.3], dtype=np.float32)}
with self.assertRaises(ProgrammingError):
con._internal.append(table='test', data=data)
con._internal.append(table='test', data=data)

def test_append_wrong_size(self):
"""
we now convert this automatically, so should not raise error
"""
with connect() as con:

con.execute("CREATE TABLE test (i int)") # SQL int is 32 bit
data = {'i': np.array([1, 2, 3], dtype=np.int64)}
with self.assertRaises(ProgrammingError):
con._internal.append(table='test', data=data)
con._internal.append(table='test', data=data)

def test_append_supported_types(self):
with connect() as con:
con.execute("CREATE TABLE test (t tinyint, s smallint, i int, b bigint, r real, f float)")
con.execute("CREATE TABLE test (t tinyint, s smallint, i int, h bigint, r real, f float, b bool)")
con.execute(
"""
INSERT INTO test VALUES (2^8, 2^16, 2^32, 2^64, 0.12345, 0.123456789),
(NULL, NULL, NULL, NULL, NULL, NULL),
(0, 0, 0, 0, 0.0, 0.0),
(-2^8, -2^16, -2^32, -2^64, -0.12345, -0.123456789)
INSERT INTO test VALUES (2^8, 2^16, 2^32, 2^64, 0.12345, 0.123456789, true),
(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
(0, 0, 0, 0, 0.0, 0.0, false),
(-2^8, -2^16, -2^32, -2^64, -0.12345, -0.123456789, false)
"""
)
data = con.execute("select * from test").fetchnumpy()
con._internal.append(schema='sys', table='test', data=data)
con.cursor().insert(table='test', values=data)

def test_append_numpy_only_types(self):
"""
test numpy types don't have have a direct 1-on-1 sql mapping
"""
with connect() as con:
table = 'i'
con.execute(f"CREATE TABLE {table} (i int)")
data = {'i': np.ndarray([0], dtype=np.uint32)}
con._internal.append(schema='sys', table=table, data=data)
con.cursor().insert(table=table, values=data)

def test_append_unsupported_types(self):
with connect() as con:
con.execute("CREATE TABLE test (s string, b blob, d date, t time, ts timestamp)")
Expand Down
Loading