From 6a618c88ae754bf545275ded74bcc047c6f483e2 Mon Sep 17 00:00:00 2001 From: Chloe He Date: Wed, 27 Mar 2024 15:57:30 -0700 Subject: [PATCH] chore(flink): update pytest markers and run linting --- ibis/backends/flink/tests/test_memtable.py | 3 +- ibis/backends/sql/dialects.py | 3 +- ibis/backends/tests/test_array.py | 46 +++++++++++++++++----- ibis/formats/pandas.py | 17 ++++++-- 4 files changed, 53 insertions(+), 16 deletions(-) diff --git a/ibis/backends/flink/tests/test_memtable.py b/ibis/backends/flink/tests/test_memtable.py index bfeb06d2b4e66..29ad04096f531 100644 --- a/ibis/backends/flink/tests/test_memtable.py +++ b/ibis/backends/flink/tests/test_memtable.py @@ -33,7 +33,8 @@ def test_create_memtable(con, data, schema, expected): t = ibis.memtable(data, schema=ibis.schema(schema)) # cannot use con.execute(t) directly because of some behavioral discrepancy between - # `TableEnvironment.execute_sql()` and `TableEnvironment.sql_query()` + # `TableEnvironment.execute_sql()` and `TableEnvironment.sql_query()`; this doesn't + # seem to be an issue if we don't execute memtable directly result = con.raw_sql(con.compile(t)) # raw_sql() returns a `TableResult` object and doesn't natively convert to pandas assert list(result.collect()) == expected diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py index 9c615d6eb617b..f9c7a7e405733 100644 --- a/ibis/backends/sql/dialects.py +++ b/ibis/backends/sql/dialects.py @@ -200,7 +200,8 @@ class Generator(Hive.Generator): sge.ArrayConcat: rename_func("array_concat"), sge.ArraySize: rename_func("cardinality"), sge.Length: rename_func("char_length"), - sge.TryCast: lambda self, e: f"TRY_CAST({e.this.sql(self.dialect)} AS {e.to.sql(self.dialect)})", + sge.TryCast: lambda self, + e: f"TRY_CAST({e.this.sql(self.dialect)} AS {e.to.sql(self.dialect)})", sge.DayOfYear: rename_func("dayofyear"), sge.DayOfWeek: rename_func("dayofweek"), sge.DayOfMonth: rename_func("dayofmonth"), diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 5fe451d5b3aa1..973566ca1b1e5 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -807,12 +807,6 @@ def test_array_intersect(con, data): @pytest.mark.broken( ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError ) -@pytest.mark.broken( - ["flink"], - raises=Py4JJavaError, - reason="flink throws exception on named struct with a single field", - # also cannot do array> in flink; needs to be written as row>> -) def test_unnest_struct(con): data = {"value": [[{"a": 1}, {"a": 2}], [{"a": 3}, {"a": 4}]]} t = ibis.memtable(data, schema=ibis.schema({"value": "!array>"})) @@ -824,6 +818,39 @@ def test_unnest_struct(con): tm.assert_series_equal(result, expected) +@builtin_array +@pytest.mark.notimpl( + ["clickhouse"], + raises=ClickHouseDatabaseError, + reason="ClickHouse won't accept dicts for struct type values", +) +@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError) +@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError) +@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.broken( + ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError +) +@pytest.mark.broken( + ["flink"], reason="flink unnests a and b as separate columns", raises=Py4JJavaError +) +def test_unnest_struct_with_multiple_fields(con): + data = { + "value": [ + [{"a": 1, "b": "banana"}, {"a": 2, "b": "apple"}], + [{"a": 3, "b": "coconut"}, {"a": 4, "b": "orange"}], + ] + } + t = ibis.memtable( + data, schema=ibis.schema({"value": "!array>"}) + ) + expr = t.value.unnest() + + result = con.execute(expr) + + expected = pd.DataFrame(data).explode("value").iloc[:, 0].reset_index(drop=True) + tm.assert_series_equal(result, expected) + + array_zip_notimpl = pytest.mark.notimpl( [ "dask", @@ -909,11 +936,10 @@ def test_zip_null(con, fn): @pytest.mark.broken( ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError ) -@pytest.mark.broken( +@pytest.mark.notyet( ["flink"], raises=Py4JJavaError, - reason="flink throws exception on named struct with a single field", - # also cannot do array> in flink; needs to be written as row>> + reason="does not seem to support field selection on unnest", ) def test_array_of_struct_unnest(con): jobs = ibis.memtable( @@ -1095,7 +1121,7 @@ def test_range_start_stop_step_zero(con, start, stop): raises=com.OperationNotDefinedError, reason="backend doesn't support unnest", ) -@pytest.mark.broken( +@pytest.mark.notyet( ["flink"], raises=Py4JJavaError, reason="SQL validation failed; Flink does not support ARRAY[]", # https://issues.apache.org/jira/browse/FLINK-20578 diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index e6619030b3bea..70ba4ffa68449 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -14,6 +14,7 @@ import ibis.expr.datatypes as dt import ibis.expr.schema as sch +from ibis import util from ibis.common.numeric import normalize_decimal from ibis.common.temporal import normalize_timezone from ibis.formats import DataMapper, SchemaMapper, TableProxy @@ -178,9 +179,13 @@ def convert_GeoSpatial(cls, s, dtype, pandas_type): return gpd.GeoSeries(s) return gpd.GeoSeries.from_wkb(s) - convert_Point = convert_LineString = convert_Polygon = convert_MultiLineString = ( - convert_MultiPoint - ) = convert_MultiPolygon = convert_GeoSpatial + convert_Point = ( + convert_LineString + ) = ( + convert_Polygon + ) = ( + convert_MultiLineString + ) = convert_MultiPoint = convert_MultiPolygon = convert_GeoSpatial @classmethod def convert_default(cls, s, dtype, pandas_type): @@ -305,7 +310,11 @@ def convert(values, names=dtype.names, converters=converters): if values is None: return values - items = values.items() if isinstance(values, dict) else zip(names, values) + items = ( + values.items() + if isinstance(values, dict) + else zip(names, util.promote_list(values)) + ) return { k: converter(v) if v is not None else v for converter, (k, v) in zip(converters, items)