From 844018b8d3a3398d746fdc04c966c7e19d311998 Mon Sep 17 00:00:00 2001 From: tobymao Date: Thu, 8 Feb 2024 19:32:12 -0800 Subject: [PATCH] fix: explode_outer to unnest closes #2941 --- sqlglot/transforms.py | 10 ++++++++++ tests/dialects/test_bigquery.py | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/sqlglot/transforms.py b/sqlglot/transforms.py index f13569f2a3..23b5d309f2 100644 --- a/sqlglot/transforms.py +++ b/sqlglot/transforms.py @@ -213,6 +213,16 @@ def new_name(names: t.Set[str], name: str) -> str: is_posexplode = isinstance(explode, exp.Posexplode) explode_arg = explode.this + if isinstance(explode, exp.ExplodeOuter): + explode_arg = exp.func( + "IF", + exp.func( + "ARRAY_SIZE", exp.func("COALESCE", explode_arg, exp.Array()) + ).eq(0), + exp.Array(expressions=[exp.null()]), + explode_arg, + ) + # This ensures that we won't use [POS]EXPLODE's argument as a new selection if isinstance(explode_arg, exp.Column): taken_select_names.add(explode_arg.output_name) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 5cc5480013..f532e4acf5 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -620,6 +620,10 @@ def test_bigquery(self): "spark": "WITH cte AS (SELECT ARRAY(1, 2, 3) AS arr) SELECT EXPLODE(arr) FROM cte" }, ) + self.validate_all( + "SELECT IF(pos = pos_2, col, NULL) AS col FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [NULL], []))) - 1)) AS pos CROSS JOIN UNNEST(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [NULL], [])) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [NULL], [])) - 1) AND pos_2 = (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [NULL], [])) - 1))", + read={"spark": "select explode_outer([])"}, + ) self.validate_all( "SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z", write={