Skip to content

Commit

Permalink
Added JSON JSONB and UUID data types.
Browse files Browse the repository at this point in the history
  • Loading branch information
xmnlab committed Sep 28, 2019
1 parent 79bab94 commit 298efc2
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 13 deletions.
89 changes: 84 additions & 5 deletions ibis/expr/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,6 @@ def _literal_value_hash_key(self, value) -> int:


class Any(DataType):
scalar = ir.AnyScalar
column = ir.AnyColumn

__slots__ = ()


Expand Down Expand Up @@ -639,6 +636,23 @@ def _literal_value_hash_key(self, value):
return self, _tuplize(value.items())


class JSON(String):
"""JSON (JavaScript Object Notation) text format."""

scalar = ir.JSONScalar
column = ir.JSONColumn


class JSONB(Binary):
"""JSON (JavaScript Object Notation) data stored as a binary
representation, which eliminates whitespace, duplicate keys,
and key ordering.
"""

scalar = ir.JSONBScalar
column = ir.JSONBColumn


class GeoSpatial(DataType):
__slots__ = 'geotype', 'srid'

Expand Down Expand Up @@ -782,6 +796,17 @@ class MultiPolygon(GeoSpatial):
__slots__ = ()


class UUID(String):
"""A universally unique identifier (UUID) is a 128-bit number used to
identify information in computer systems.
"""

scalar = ir.UUIDScalar
column = ir.UUIDColumn

__slots__ = ()


# ---------------------------------------------------------------------
any = Any()
null = Null()
Expand Down Expand Up @@ -818,7 +843,11 @@ class MultiPolygon(GeoSpatial):
multilinestring = MultiLineString()
multipoint = MultiPoint()
multipolygon = MultiPolygon()

# json
json = JSON()
jsonb = JSONB()
# special string based data type
uuid = UUID()

_primitive_types = [
('any', any),
Expand Down Expand Up @@ -884,6 +913,9 @@ class Tokens:
MULTIPOINT = 28
MULTIPOLYGON = 29
SEMICOLON = 30
JSON = 31
JSONB = 32
UUID = 33

@staticmethod
def name(value):
Expand All @@ -894,7 +926,6 @@ def name(value):
(getattr(Tokens, n), n) for n in dir(Tokens) if n.isalpha() and n.isupper()
)


Token = collections.namedtuple('Token', ('type', 'value'))


Expand Down Expand Up @@ -1008,6 +1039,22 @@ def name(value):
),
)
]
+ [
# json data type
(
'(?P<{}>{})'.format(token.upper(), token),
lambda token, toktype=toktype: Token(toktype, token),
)
for token, toktype in zip(
# note: `jsonb` should be first to avoid conflict with `json`
('jsonb', 'json'),
(Tokens.JSONB, Tokens.JSON),
)
]
+ [
# special string based data types
('(?P<UUID>uuid)', lambda token: Token(Tokens.UUID, token))
]
+ [
# integers, for decimal spec
(r'(?P<INTEGER>\d+)', lambda token: Token(Tokens.INTEGER, int(token))),
Expand Down Expand Up @@ -1212,6 +1259,12 @@ def type(self) -> DataType:
| "multipolygon" ":" geotype
| "multipolygon" ";" srid ":" geotype
json : "json"
jsonb : "jsonb"
uuid : "uuid"
"""
if self._accept(Tokens.PRIMITIVE):
assert self.tok is not None
Expand Down Expand Up @@ -1325,6 +1378,13 @@ def type(self) -> DataType:
self._expect(Tokens.RBRACKET)
return Struct(names, types)

# json data types
elif self._accept(Tokens.JSON):
return JSON()

elif self._accept(Tokens.JSONB):
return JSONB()

# geo spatial data type
elif self._accept(Tokens.GEOMETRY):
return Geometry()
Expand Down Expand Up @@ -1434,6 +1494,10 @@ def type(self) -> DataType:

return MultiPolygon(geotype=geotype, srid=srid)

# special string based data types
elif self._accept(Tokens.UUID):
return UUID()

else:
raise SyntaxError('Type cannot be parsed: {}'.format(self.text))

Expand Down Expand Up @@ -1766,6 +1830,16 @@ def can_cast_variadic(
return castable(source.value_type, target.value_type)


@castable.register(JSON, JSON)
def can_cast_json(source, target, **kwargs):
return True


@castable.register(JSONB, JSONB)
def can_cast_jsonb(source, target, **kwargs):
return True


# geo spatial data type
# cast between same type, used to cast from/to geometry and geography
GEO_TYPES = (
Expand All @@ -1785,6 +1859,11 @@ def can_cast_geospatial(source, target, **kwargs):
return True


@castable.register(UUID, UUID)
def can_cast_special_string(source, target, **kwargs):
return True


# @castable.register(Map, Map)
# def can_cast_maps(source, target):
# return (source.equals(target) or
Expand Down
36 changes: 36 additions & 0 deletions ibis/expr/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,30 @@ class MapColumn(AnyColumn, MapValue):
pass # noqa: E701,E302


class JSONValue(StringValue):
pass # noqa: E701,E302


class JSONScalar(StringScalar, JSONValue):
pass # noqa: E701,E302


class JSONColumn(StringColumn, JSONValue):
pass # noqa: E701,E302


class JSONBValue(BinaryValue):
pass # noqa: E701,E302


class JSONBScalar(BinaryScalar, JSONBValue):
pass # noqa: E701,E302


class JSONBColumn(BinaryColumn, JSONBValue):
pass # noqa: E701,E302


class StructValue(AnyValue):
def __dir__(self):
return sorted(
Expand Down Expand Up @@ -909,6 +933,18 @@ class MultiPolygonColumn(GeoSpatialColumn, MultiPolygonValue): # noqa: E302
pass # noqa: E701


class UUIDValue(StringValue):
pass # noqa: E701,E302


class UUIDScalar(StringScalar, UUIDValue):
pass # noqa: E701,E302


class UUIDColumn(StringColumn, UUIDValue):
pass # noqa: E701,E302


class ListExpr(ColumnExpr, AnyValue):
@property
def values(self):
Expand Down
6 changes: 3 additions & 3 deletions ibis/sql/alchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,17 @@ def sa_double(_, satype, nullable=True):

@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.UUID)
def sa_uuid(_, satype, nullable=True):
return dt.Any(nullable=nullable)
return dt.UUID(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.JSON)
def sa_json(_, satype, nullable=True):
return dt.Any(nullable=nullable)
return dt.JSON(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.JSONB)
def sa_jsonb(_, satype, nullable=True):
return dt.Any(nullable=nullable)
return dt.JSONB(nullable=nullable)


if geospatial_supported:
Expand Down
8 changes: 4 additions & 4 deletions ibis/sql/postgres/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ def test_schema_table():
assert isinstance(schema['tables'], ir.TableExpr)


def test_schema_unsupported_type_conversion():
def test_schema_type_conversion():
typespec = [
# name, type, nullable
('json', sa.dialects.postgresql.JSON, True, dt.any),
('jsonb', sa.dialects.postgresql.JSONB, True, dt.any),
('uuid', sa.dialects.postgresql.UUID, True, dt.any),
('json', sa.dialects.postgresql.JSON, True, dt.JSON),
('jsonb', sa.dialects.postgresql.JSONB, True, dt.JSONB),
('uuid', sa.dialects.postgresql.UUID, True, dt.UUID),
]

sqla_types = []
Expand Down
31 changes: 31 additions & 0 deletions ibis/tests/all/test_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
""" Tests for json data types"""
import json

import pytest
from pytest import param

import ibis
from ibis.tests.backends import PostgreSQL

# add here backends that support json types
all_db_geo_supported = [PostgreSQL]


@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
@pytest.mark.only_on_backends(all_db_geo_supported)
def test_json(backend, con, data, alltypes):
json_value = json.dumps(data)
lit = ibis.literal(json_value, type='json').name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == json_value


@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
@pytest.mark.only_on_backends(all_db_geo_supported)
def test_jsonb(backend, con, data, alltypes):
jsonb_value = json.dumps(data).encode('utf8')
lit = ibis.literal(jsonb_value, type='jsonb').name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == jsonb_value
14 changes: 13 additions & 1 deletion ibis/tests/all/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.tests.backends import Clickhouse, Impala, PySpark, Spark
from ibis.tests.backends import Clickhouse, Impala, PostgreSQL, PySpark, Spark


def test_string_col_is_unicode(backend, alltypes, df):
Expand Down Expand Up @@ -233,3 +233,15 @@ def test_string(backend, alltypes, df, result_func, expected_func):

expected = backend.default_series_rename(expected_func(df))
backend.assert_series_equal(result, expected)


@pytest.mark.parametrize(
'data, data_type',
[param('123e4567-e89b-12d3-a456-426655440000', 'uuid', id='uuid')],
)
@pytest.mark.only_on_backends([PostgreSQL])
def test_special_strings(backend, con, alltypes, data, data_type):
lit = ibis.literal(data, type=data_type).name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == data

0 comments on commit 298efc2

Please sign in to comment.