From 6a618c88ae754bf545275ded74bcc047c6f483e2 Mon Sep 17 00:00:00 2001
From: Chloe He <chloehe47@gmail.com>
Date: Wed, 27 Mar 2024 15:57:30 -0700
Subject: [PATCH] chore(flink): update pytest markers and run linting

---
 ibis/backends/flink/tests/test_memtable.py |  3 +-
 ibis/backends/sql/dialects.py              |  3 +-
 ibis/backends/tests/test_array.py          | 46 +++++++++++++++++-----
 ibis/formats/pandas.py                     | 17 ++++++--
 4 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/ibis/backends/flink/tests/test_memtable.py b/ibis/backends/flink/tests/test_memtable.py
index bfeb06d2b4e66..29ad04096f531 100644
--- a/ibis/backends/flink/tests/test_memtable.py
+++ b/ibis/backends/flink/tests/test_memtable.py
@@ -33,7 +33,8 @@
 def test_create_memtable(con, data, schema, expected):
     t = ibis.memtable(data, schema=ibis.schema(schema))
     # cannot use con.execute(t) directly because of some behavioral discrepancy between
-    # `TableEnvironment.execute_sql()` and `TableEnvironment.sql_query()`
+    # `TableEnvironment.execute_sql()` and `TableEnvironment.sql_query()`; this doesn't
+    # seem to be an issue if we don't execute memtable directly
     result = con.raw_sql(con.compile(t))
     # raw_sql() returns a `TableResult` object and doesn't natively convert to pandas
     assert list(result.collect()) == expected
diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py
index 9c615d6eb617b..f9c7a7e405733 100644
--- a/ibis/backends/sql/dialects.py
+++ b/ibis/backends/sql/dialects.py
@@ -200,7 +200,8 @@ class Generator(Hive.Generator):
             sge.ArrayConcat: rename_func("array_concat"),
             sge.ArraySize: rename_func("cardinality"),
             sge.Length: rename_func("char_length"),
-            sge.TryCast: lambda self, e: f"TRY_CAST({e.this.sql(self.dialect)} AS {e.to.sql(self.dialect)})",
+            sge.TryCast: lambda self,
+            e: f"TRY_CAST({e.this.sql(self.dialect)} AS {e.to.sql(self.dialect)})",
             sge.DayOfYear: rename_func("dayofyear"),
             sge.DayOfWeek: rename_func("dayofweek"),
             sge.DayOfMonth: rename_func("dayofmonth"),
diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py
index 5fe451d5b3aa1..973566ca1b1e5 100644
--- a/ibis/backends/tests/test_array.py
+++ b/ibis/backends/tests/test_array.py
@@ -807,12 +807,6 @@ def test_array_intersect(con, data):
 @pytest.mark.broken(
     ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
 )
-@pytest.mark.broken(
-    ["flink"],
-    raises=Py4JJavaError,
-    reason="flink throws exception on named struct with a single field",
-    # also cannot do array<struct<>> in flink; needs to be written as row<row<struct<>>>
-)
 def test_unnest_struct(con):
     data = {"value": [[{"a": 1}, {"a": 2}], [{"a": 3}, {"a": 4}]]}
     t = ibis.memtable(data, schema=ibis.schema({"value": "!array<!struct<a: !int>>"}))
@@ -824,6 +818,39 @@ def test_unnest_struct(con):
     tm.assert_series_equal(result, expected)
 
 
+@builtin_array
+@pytest.mark.notimpl(
+    ["clickhouse"],
+    raises=ClickHouseDatabaseError,
+    reason="ClickHouse won't accept dicts for struct type values",
+)
+@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
+@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError)
+@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError)
+@pytest.mark.broken(
+    ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
+)
+@pytest.mark.broken(
+    ["flink"], reason="flink unnests a and b as separate columns", raises=Py4JJavaError
+)
+def test_unnest_struct_with_multiple_fields(con):
+    data = {
+        "value": [
+            [{"a": 1, "b": "banana"}, {"a": 2, "b": "apple"}],
+            [{"a": 3, "b": "coconut"}, {"a": 4, "b": "orange"}],
+        ]
+    }
+    t = ibis.memtable(
+        data, schema=ibis.schema({"value": "!array<!struct<a: !int, b: !string>>"})
+    )
+    expr = t.value.unnest()
+
+    result = con.execute(expr)
+
+    expected = pd.DataFrame(data).explode("value").iloc[:, 0].reset_index(drop=True)
+    tm.assert_series_equal(result, expected)
+
+
 array_zip_notimpl = pytest.mark.notimpl(
     [
         "dask",
@@ -909,11 +936,10 @@ def test_zip_null(con, fn):
 @pytest.mark.broken(
     ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
 )
-@pytest.mark.broken(
+@pytest.mark.notyet(
     ["flink"],
     raises=Py4JJavaError,
-    reason="flink throws exception on named struct with a single field",
-    # also cannot do array<struct<>> in flink; needs to be written as row<row<struct<>>>
+    reason="does not seem to support field selection on unnest",
 )
 def test_array_of_struct_unnest(con):
     jobs = ibis.memtable(
@@ -1095,7 +1121,7 @@ def test_range_start_stop_step_zero(con, start, stop):
     raises=com.OperationNotDefinedError,
     reason="backend doesn't support unnest",
 )
-@pytest.mark.broken(
+@pytest.mark.notyet(
     ["flink"],
     raises=Py4JJavaError,
     reason="SQL validation failed; Flink does not support ARRAY[]",  # https://issues.apache.org/jira/browse/FLINK-20578
diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py
index e6619030b3bea..70ba4ffa68449 100644
--- a/ibis/formats/pandas.py
+++ b/ibis/formats/pandas.py
@@ -14,6 +14,7 @@
 
 import ibis.expr.datatypes as dt
 import ibis.expr.schema as sch
+from ibis import util
 from ibis.common.numeric import normalize_decimal
 from ibis.common.temporal import normalize_timezone
 from ibis.formats import DataMapper, SchemaMapper, TableProxy
@@ -178,9 +179,13 @@ def convert_GeoSpatial(cls, s, dtype, pandas_type):
             return gpd.GeoSeries(s)
         return gpd.GeoSeries.from_wkb(s)
 
-    convert_Point = convert_LineString = convert_Polygon = convert_MultiLineString = (
-        convert_MultiPoint
-    ) = convert_MultiPolygon = convert_GeoSpatial
+    convert_Point = (
+        convert_LineString
+    ) = (
+        convert_Polygon
+    ) = (
+        convert_MultiLineString
+    ) = convert_MultiPoint = convert_MultiPolygon = convert_GeoSpatial
 
     @classmethod
     def convert_default(cls, s, dtype, pandas_type):
@@ -305,7 +310,11 @@ def convert(values, names=dtype.names, converters=converters):
             if values is None:
                 return values
 
-            items = values.items() if isinstance(values, dict) else zip(names, values)
+            items = (
+                values.items()
+                if isinstance(values, dict)
+                else zip(names, util.promote_list(values))
+            )
             return {
                 k: converter(v) if v is not None else v
                 for converter, (k, v) in zip(converters, items)