From 654bcdac24a3ed807a2f03e9ce19a980531c1406 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Fri, 15 Mar 2024 11:15:14 -0800 Subject: [PATCH] feat: support type kwarg in array() and map() fixes https://github.com/ibis-project/ibis/issues/8289 This does a lot of changes. It was hard for me to separate them out as I implemented them. But now that it's all hashed out, I can try to split this up into separate commits if you want. But that might be sorta hard in some cases. Several of the backends were always broken here, they just weren't getting caught. I marked them as broken, we can fix them in a followup. You can test this locally with eg `pytest -m duckdb -k factory ibis/backends/tests/test_array.py ibis/backends/tests/test_map.py ibis/backends/tests/test_struct.py` Also, fix a typing bug: map() can accept ArrayValues, not just ArrayColumns Also, support passing in None. Also, error when the value type can't be inferred from empty python literals (eg what is the value type for the elements of []?) Also, make the type argument for struct() always have an effect, not just when passing in python literals. So basically it can act like a cast. Also, make these constructors idempotent. --- ibis/backends/tests/test_array.py | 31 ++++++++++++++++++ ibis/backends/tests/test_map.py | 27 ++++++++++++++++ ibis/backends/tests/test_struct.py | 29 +++++++++++++++++ ibis/expr/types/arrays.py | 51 +++++++++++++++++++++-------- ibis/expr/types/maps.py | 52 +++++++++++++++++++++--------- ibis/expr/types/structs.py | 49 +++++++++++++++++----------- 6 files changed, 191 insertions(+), 48 deletions(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 156c31bda8d5f..6a1d0469df2bb 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -70,6 +70,37 @@ # list. +def test_array_factory(con): + a = ibis.array([1, 2, 3]) + assert con.execute(a) == [1, 2, 3] + a2 = ibis.array(a) + assert con.execute(a2) == [1, 2, 3] + typed = ibis.array([1, 2, 3], type="array") + assert con.execute(typed) == [1.0, 2.0, 3.0] + typed2 = ibis.array(a, type="array") + assert con.execute(typed2) == [1.0, 2.0, 3.0] + + +@pytest.mark.notimpl("postgres", raises=PsycoPg2IndeterminateDatatype) +def test_array_factory_empty(con): + with pytest.raises(TypeError): + ibis.array([]) + + empty_typed = ibis.array([], type="array") + assert str(empty_typed.type()) == "array" + assert con.execute(empty_typed) == [] + + +@pytest.mark.broken("polars", raises=AssertionError) +@pytest.mark.broken("pandas", raises=TypeError) +def test_array_factory_null(con): + with pytest.raises(TypeError): + ibis.array(None) + none_typed = ibis.array(None, type="array") + assert str(none_typed.type()) == "array" + assert con.execute(none_typed) is None + + def test_array_column(backend, alltypes, df): expr = ibis.array( [alltypes["double_col"], alltypes["double_col"], 5.0, ibis.literal(6.0)] diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index a80945653325f..3de2ffb6900a7 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -26,6 +26,33 @@ ] +def test_map_factory(con): + m = ibis.map({"a": 1, "b": 2}) + assert con.execute(m) == {"a": 1, "b": 2} + m2 = ibis.map(m) + assert con.execute(m2) == {"a": 1, "b": 2} + typed = ibis.map({"a": 1, "b": 2}, type="map") + assert con.execute(typed) == {"a": 1.0, "b": 2.0} + typed2 = ibis.map(m, type="map") + assert con.execute(typed2) == {"a": 1.0, "b": 2.0} + + +def test_map_factory_empty(con): + with pytest.raises(TypeError): + ibis.map({}) + empty_typed = ibis.map({}, type="map") + assert str(empty_typed.type()) == "map" + assert con.execute(empty_typed) == {} + + +def test_map_factory_null(con): + with pytest.raises(TypeError): + ibis.map(None) + null_typed = ibis.map(None, type="map") + assert str(null_typed.type()) == "map" + assert con.execute(null_typed) is None + + @pytest.mark.notimpl(["pandas", "dask"]) def test_map_table(backend): table = backend.map diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 682da63faf273..dab0ca83ca9d1 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -23,6 +23,35 @@ ] +@pytest.mark.notimpl(["postgres"]) +@pytest.mark.broken(["pandas", "dask"], reason="casting is broken") +def test_struct_factory(con): + s = ibis.struct({"a": 1, "b": 2}) + assert con.execute(s) == {"a": 1, "b": 2} + s2 = ibis.struct(s) + assert con.execute(s2) == {"a": 1, "b": 2} + typed = ibis.struct({"a": 1, "b": 2}, type="struct") + assert con.execute(typed) == {"a": 1.0, "b": 2.0} + typed2 = ibis.struct(s, type="struct") + assert con.execute(typed2) == {"a": 1.0, "b": 2.0} + + +def test_struct_factory_empty(con): + with pytest.raises(TypeError): + ibis.struct({}) + with pytest.raises(TypeError): + ibis.struct({}, type="struct") + + +@pytest.mark.broken("polars", raises=AttributeError) +def test_struct_factory_null(con): + with pytest.raises(TypeError): + ibis.struct(None) + none_typed = ibis.struct(None, type="struct") + assert str(none_typed.type()) == "struct" + assert con.execute(none_typed) is None + + @pytest.mark.notimpl(["dask"]) @pytest.mark.parametrize( ("field", "expected"), diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index e7c54135b089b..1a4912981b12e 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -5,14 +5,16 @@ from public import public +import ibis import ibis.expr.operations as ops +import ibis.expr.types as ir from ibis.common.deferred import Deferred, deferrable from ibis.expr.types.generic import Column, Scalar, Value if TYPE_CHECKING: from collections.abc import Iterable - import ibis.expr.types as ir + from ibis.expr.types import dt from ibis.expr.types.typing import V import ibis.common.exceptions as com @@ -1081,7 +1083,10 @@ def __getitem__(self, index: int | ir.IntegerValue | slice) -> ir.Column: @public @deferrable -def array(values: Iterable[V]) -> ArrayValue: +def array( + values: ArrayValue | Iterable[V] | None, + type: str | dt.DataType | None = None, +) -> ArrayValue: """Create an array expression. If any values are [column expressions](../concepts/datatypes.qmd) the @@ -1092,6 +1097,9 @@ def array(values: Iterable[V]) -> ArrayValue: ---------- values An iterable of Ibis expressions or Python literals + type + An instance of `ibis.expr.datatypes.DataType` or a string indicating + the Ibis type of `value`. eg `array`. Returns ------- @@ -1120,15 +1128,32 @@ def array(values: Iterable[V]) -> ArrayValue: │ [3, 42, ... +1] │ └──────────────────────┘ - >>> ibis.array([t.a, 42 + ibis.literal(5)]) - ┏━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ Array() ┃ - ┡━━━━━━━━━━━━━━━━━━━━━━┩ - │ array │ - ├──────────────────────┤ - │ [1, 47] │ - │ [2, 47] │ - │ [3, 47] │ - └──────────────────────┘ + >>> ibis.array([t.a, 42 + ibis.literal(5)], type="array") + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Cast(Array(), array) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ + ├───────────────────────────────┤ + │ [1.0, 47.0] │ + │ [2.0, 47.0] │ + │ [3.0, 47.0] │ + └───────────────────────────────┘ """ - return ops.Array(tuple(values)).to_expr() + if values is None: + if type is None: + raise TypeError("type must be specified when values is None") + return ibis.literal(None, type=type) + + if isinstance(values, ir.ArrayValue): + result = values + else: + values = tuple(values) + if len(values) == 0: + if type is None: + raise TypeError("type must be specified when values empty") + result = ibis.literal([], type=type) + else: + result = ops.Array(values).to_expr() + if type is not None: + result = result.cast(type) + return result diff --git a/ibis/expr/types/maps.py b/ibis/expr/types/maps.py index b61f7caceedd8..1151de1c47109 100644 --- a/ibis/expr/types/maps.py +++ b/ibis/expr/types/maps.py @@ -4,15 +4,16 @@ from public import public +import ibis import ibis.expr.operations as ops +import ibis.expr.types as ir from ibis.common.deferred import deferrable from ibis.expr.types.generic import Column, Scalar, Value if TYPE_CHECKING: from collections.abc import Iterable, Mapping - import ibis.expr.types as ir - from ibis.expr.types.arrays import ArrayColumn + from ibis.expr.types import dt @public @@ -435,8 +436,10 @@ def __getitem__(self, key: ir.Value) -> ir.Column: @public @deferrable def map( - keys: Iterable[Any] | Mapping[Any, Any] | ArrayColumn, - values: Iterable[Any] | ArrayColumn | None = None, + keys: Iterable[Any] | Mapping[Any, Any] | ir.ArrayValue | MapValue | None, + values: Iterable[Any] | ir.ArrayValue | None = None, + *, + type: str | dt.DataType | None = None, ) -> MapValue: """Create a MapValue. @@ -449,6 +452,9 @@ def map( Keys of the map or `Mapping`. If `keys` is a `Mapping`, `values` must be `None`. values Values of the map or `None`. If `None`, the `keys` argument must be a `Mapping`. + type + An instance of `ibis.expr.datatypes.DataType` or a string indicating + the Ibis type of `value`. eg `map`. Returns ------- @@ -476,16 +482,30 @@ def map( │ ['a', 'b'] │ [1, 2] │ │ ['b'] │ [3] │ └──────────────────────┴──────────────────────┘ - >>> ibis.map(t.keys, t.values) - ┏━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ Map(keys, values) ┃ - ┡━━━━━━━━━━━━━━━━━━━━━━┩ - │ map │ - ├──────────────────────┤ - │ {'a': 1, 'b': 2} │ - │ {'b': 3} │ - └──────────────────────┘ + >>> ibis.map(t.keys, t.values, type="map") + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Cast(Map(keys, values), map) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ map │ + ├───────────────────────────────────────────────┤ + │ {'a': 1.0, 'b': 2.0} │ + │ {'b': 3.0} │ + └───────────────────────────────────────────────┘ """ - if values is None: - keys, values = tuple(keys.keys()), tuple(keys.values()) - return ops.Map(keys, values).to_expr() + if keys is None: + if type is None: + raise TypeError("Must specify a type when keys is None") + return ibis.literal(None, type=type) + + if isinstance(keys, MapValue): + result = keys + else: + if values is None: + keys, values = tuple(keys.keys()), tuple(keys.values()) + if len(keys) == 0 and type is None: + raise TypeError("Must specify a type when keys is empty") + result = ops.Map(keys, values).to_expr() + + if type is not None: + result = result.cast(type) + return result diff --git a/ibis/expr/types/structs.py b/ibis/expr/types/structs.py index 65a16700318a8..af4c7c86e7a0c 100644 --- a/ibis/expr/types/structs.py +++ b/ibis/expr/types/structs.py @@ -22,7 +22,7 @@ @public @deferrable def struct( - value: Iterable[tuple[str, V]] | Mapping[str, V], + value: Iterable[tuple[str, V]] | Mapping[str, V] | StructValue | None, type: str | dt.DataType | None = None, ) -> StructValue: """Create a struct expression. @@ -37,8 +37,7 @@ def struct( `(str, Value)`. type An instance of `ibis.expr.datatypes.DataType` or a string indicating - the Ibis type of `value`. This is only used if all of the input values - are Python literals. eg `struct`. + the Ibis type of `value`. eg `struct`. Returns ------- @@ -62,26 +61,38 @@ def struct( Create a struct column from a column and a scalar literal >>> t = ibis.memtable({"a": [1, 2, 3]}) - >>> ibis.struct([("a", t.a), ("b", "foo")]) - ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ StructColumn() ┃ - ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ - │ struct │ - ├─────────────────────────────┤ - │ {'a': 1, 'b': 'foo'} │ - │ {'a': 2, 'b': 'foo'} │ - │ {'a': 3, 'b': 'foo'} │ - └─────────────────────────────┘ + >>> ibis.struct([("a", t.a), ("b", "foo")], type="struct") + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Cast(StructColumn(), struct) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ struct │ + ├─────────────────────────────────────────────────────┤ + │ {'a': 1.0, 'b': 'foo'} │ + │ {'a': 2.0, 'b': 'foo'} │ + │ {'a': 3.0, 'b': 'foo'} │ + └─────────────────────────────────────────────────────┘ """ import ibis.expr.operations as ops - fields = dict(value) - if any(isinstance(value, Value) for value in fields.values()): - names = tuple(fields.keys()) - values = tuple(fields.values()) - return ops.StructColumn(names=names, values=values).to_expr() + if value is None: + if type is None: + raise TypeError("Must specify type if value is None") + return literal(None, type=type) + + if isinstance(value, StructValue): + result = value else: - return literal(collections.OrderedDict(fields), type=type) + fields = dict(value) + if any(isinstance(value, Value) for value in fields.values()): + names = tuple(fields.keys()) + values = tuple(fields.values()) + result = ops.StructColumn(names=names, values=values).to_expr() + else: + result = literal(collections.OrderedDict(fields), type=type) + + if type is not None: + result = result.cast(type) + return result @public