Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Json UUID any #1962

Merged
merged 5 commits into from
Oct 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 84 additions & 2 deletions ibis/expr/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,23 @@ def _literal_value_hash_key(self, value):
return self, _tuplize(value.items())


class JSON(String):
"""JSON (JavaScript Object Notation) text format."""

scalar = ir.JSONScalar
column = ir.JSONColumn


class JSONB(Binary):
"""JSON (JavaScript Object Notation) data stored as a binary
representation, which eliminates whitespace, duplicate keys,
and key ordering.
"""

scalar = ir.JSONBScalar
column = ir.JSONBColumn


class GeoSpatial(DataType):
__slots__ = 'geotype', 'srid'

Expand Down Expand Up @@ -779,6 +796,17 @@ class MultiPolygon(GeoSpatial):
__slots__ = ()


class UUID(String):
"""A universally unique identifier (UUID) is a 128-bit number used to
identify information in computer systems.
"""

scalar = ir.UUIDScalar
column = ir.UUIDColumn

__slots__ = ()


# ---------------------------------------------------------------------
any = Any()
null = Null()
Expand Down Expand Up @@ -815,7 +843,11 @@ class MultiPolygon(GeoSpatial):
multilinestring = MultiLineString()
multipoint = MultiPoint()
multipolygon = MultiPolygon()

# json
json = JSON()
jsonb = JSONB()
# special string based data type
uuid = UUID()

_primitive_types = [
('any', any),
Expand Down Expand Up @@ -881,6 +913,9 @@ class Tokens:
MULTIPOINT = 28
MULTIPOLYGON = 29
SEMICOLON = 30
JSON = 31
JSONB = 32
UUID = 33

@staticmethod
def name(value):
Expand All @@ -891,7 +926,6 @@ def name(value):
(getattr(Tokens, n), n) for n in dir(Tokens) if n.isalpha() and n.isupper()
)


Token = collections.namedtuple('Token', ('type', 'value'))


Expand Down Expand Up @@ -1005,6 +1039,22 @@ def name(value):
),
)
]
+ [
# json data type
(
'(?P<{}>{})'.format(token.upper(), token),
lambda token, toktype=toktype: Token(toktype, token),
)
for token, toktype in zip(
# note: `jsonb` should be first to avoid conflict with `json`
('jsonb', 'json'),
(Tokens.JSONB, Tokens.JSON),
)
]
+ [
# special string based data types
('(?P<UUID>uuid)', lambda token: Token(Tokens.UUID, token))
]
+ [
# integers, for decimal spec
(r'(?P<INTEGER>\d+)', lambda token: Token(Tokens.INTEGER, int(token))),
Expand Down Expand Up @@ -1209,6 +1259,12 @@ def type(self) -> DataType:
| "multipolygon" ":" geotype
| "multipolygon" ";" srid ":" geotype

json : "json"

jsonb : "jsonb"

uuid : "uuid"

"""
if self._accept(Tokens.PRIMITIVE):
assert self.tok is not None
Expand Down Expand Up @@ -1322,6 +1378,13 @@ def type(self) -> DataType:
self._expect(Tokens.RBRACKET)
return Struct(names, types)

# json data types
elif self._accept(Tokens.JSON):
return JSON()

elif self._accept(Tokens.JSONB):
return JSONB()

# geo spatial data type
elif self._accept(Tokens.GEOMETRY):
return Geometry()
Expand Down Expand Up @@ -1431,6 +1494,10 @@ def type(self) -> DataType:

return MultiPolygon(geotype=geotype, srid=srid)

# special string based data types
elif self._accept(Tokens.UUID):
return UUID()

else:
raise SyntaxError('Type cannot be parsed: {}'.format(self.text))

Expand Down Expand Up @@ -1763,6 +1830,16 @@ def can_cast_variadic(
return castable(source.value_type, target.value_type)


@castable.register(JSON, JSON)
def can_cast_json(source, target, **kwargs):
return True


@castable.register(JSONB, JSONB)
def can_cast_jsonb(source, target, **kwargs):
return True


# geo spatial data type
# cast between same type, used to cast from/to geometry and geography
GEO_TYPES = (
Expand All @@ -1782,6 +1859,11 @@ def can_cast_geospatial(source, target, **kwargs):
return True


@castable.register(UUID, UUID)
def can_cast_special_string(source, target, **kwargs):
return True


# @castable.register(Map, Map)
# def can_cast_maps(source, target):
# return (source.equals(target) or
Expand Down
36 changes: 36 additions & 0 deletions ibis/expr/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,30 @@ class MapColumn(AnyColumn, MapValue):
pass # noqa: E701,E302


class JSONValue(StringValue):
pass # noqa: E701,E302


class JSONScalar(StringScalar, JSONValue):
pass # noqa: E701,E302


class JSONColumn(StringColumn, JSONValue):
pass # noqa: E701,E302


class JSONBValue(BinaryValue):
pass # noqa: E701,E302


class JSONBScalar(BinaryScalar, JSONBValue):
pass # noqa: E701,E302


class JSONBColumn(BinaryColumn, JSONBValue):
pass # noqa: E701,E302


class StructValue(AnyValue):
def __dir__(self):
return sorted(
Expand Down Expand Up @@ -909,6 +933,18 @@ class MultiPolygonColumn(GeoSpatialColumn, MultiPolygonValue): # noqa: E302
pass # noqa: E701


class UUIDValue(StringValue):
pass # noqa: E701,E302


class UUIDScalar(StringScalar, UUIDValue):
pass # noqa: E701,E302


class UUIDColumn(StringColumn, UUIDValue):
pass # noqa: E701,E302


class ListExpr(ColumnExpr, AnyValue):
@property
def values(self):
Expand Down
15 changes: 15 additions & 0 deletions ibis/sql/alchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,21 @@ def sa_double(_, satype, nullable=True):
return dt.Double(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.UUID)
def sa_uuid(_, satype, nullable=True):
return dt.UUID(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.JSON)
def sa_json(_, satype, nullable=True):
return dt.JSON(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.JSONB)
def sa_jsonb(_, satype, nullable=True):
return dt.JSONB(nullable=nullable)


if geospatial_supported:

@dt.dtype.register(SQLAlchemyDialect, (ga.Geometry, ga.types._GISType))
Expand Down
30 changes: 29 additions & 1 deletion ibis/sql/postgres/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
import ibis
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.sql.alchemy as alch # noqa: E402
from ibis.tests.util import assert_equal

pytest.importorskip('sqlalchemy')
sa = pytest.importorskip('sqlalchemy')
pytest.importorskip('psycopg2')

pytestmark = pytest.mark.postgresql
Expand Down Expand Up @@ -136,6 +137,33 @@ def test_schema_table():
assert isinstance(schema['tables'], ir.TableExpr)


def test_schema_type_conversion():
typespec = [
# name, type, nullable
('json', sa.dialects.postgresql.JSON, True, dt.JSON),
('jsonb', sa.dialects.postgresql.JSONB, True, dt.JSONB),
('uuid', sa.dialects.postgresql.UUID, True, dt.UUID),
]

sqla_types = []
ibis_types = []
for name, t, nullable, ibis_type in typespec:
sqla_type = sa.Column(name, t, nullable=nullable)
sqla_types.append(sqla_type)
ibis_types.append((name, ibis_type(nullable=nullable)))

# Create a table with placeholder stubs for JSON, JSONB, and UUID.
engine = sa.create_engine('postgresql://')
table = sa.Table('tname', sa.MetaData(bind=engine), *sqla_types)

# Check that we can correctly create a schema with dt.any for the
# missing types.
schema = alch.schema_from_table(table)
expected = ibis.schema(ibis_types)

assert_equal(schema, expected)


def test_interval_films_schema(con):
t = con.table("films")
assert t.len.type() == dt.Interval(unit="m")
Expand Down
31 changes: 31 additions & 0 deletions ibis/tests/all/test_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
""" Tests for json data types"""
import json

import pytest
from pytest import param

import ibis
from ibis.tests.backends import PostgreSQL

# add here backends that support json types
all_db_geo_supported = [PostgreSQL]


@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
@pytest.mark.only_on_backends(all_db_geo_supported)
def test_json(backend, con, data, alltypes):
json_value = json.dumps(data)
lit = ibis.literal(json_value, type='json').name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == json_value


@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
@pytest.mark.only_on_backends(all_db_geo_supported)
def test_jsonb(backend, con, data, alltypes):
jsonb_value = json.dumps(data).encode('utf8')
lit = ibis.literal(jsonb_value, type='jsonb').name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == jsonb_value
14 changes: 13 additions & 1 deletion ibis/tests/all/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.tests.backends import Clickhouse, Impala, PySpark, Spark
from ibis.tests.backends import Clickhouse, Impala, PostgreSQL, PySpark, Spark


def test_string_col_is_unicode(backend, alltypes, df):
Expand Down Expand Up @@ -233,3 +233,15 @@ def test_string(backend, alltypes, df, result_func, expected_func):

expected = backend.default_series_rename(expected_func(df))
backend.assert_series_equal(result, expected)


@pytest.mark.parametrize(
'data, data_type',
[param('123e4567-e89b-12d3-a456-426655440000', 'uuid', id='uuid')],
)
@pytest.mark.only_on_backends([PostgreSQL])
def test_special_strings(backend, con, alltypes, data, data_type):
lit = ibis.literal(data, type=data_type).name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == data