From 413df3bcee21faadade61549ca4e778e4b60fb7d Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 5 Sep 2024 14:10:21 -0400 Subject: [PATCH] fix(impala): fix lstrip, rstrip, strip --- .../test_string_builtins/lstrip/out.sql | 2 +- .../test_string_builtins/rstrip/out.sql | 2 +- .../test_string_builtins/strip/out.sql | 2 +- ibis/backends/sql/compilers/impala.py | 9 ++++++--- ibis/backends/tests/test_string.py | 14 ++------------ 5 files changed, 11 insertions(+), 18 deletions(-) diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql index f4ce746edd1d..361d6f5fb2e9 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/lstrip/out.sql @@ -1,3 +1,3 @@ SELECT - LTRIM(`t0`.`string_col`) AS `LStrip(string_col)` + LTRIM(`t0`.`string_col`, ' \t\n\r\v\f') AS `LStrip(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql index 825a36325c6c..4506448e7e27 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rstrip/out.sql @@ -1,3 +1,3 @@ SELECT - RTRIM(`t0`.`string_col`) AS `RStrip(string_col)` + RTRIM(`t0`.`string_col`, ' \t\n\r\v\f') AS `RStrip(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql index a3e2c1476733..bb26534ade86 100644 --- a/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/strip/out.sql @@ -1,3 +1,3 @@ SELECT - TRIM(`t0`.`string_col`) AS `Strip(string_col)` + RTRIM(LTRIM(`t0`.`string_col`, ' \t\n\r\v\f'), ' \t\n\r\v\f') AS `Strip(string_col)` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index e7cff88a9a8e..969d27f6714a 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -60,10 +60,7 @@ class ImpalaCompiler(SQLGlotCompiler): ops.DayOfWeekName: "dayname", ops.ExtractEpochSeconds: "unix_timestamp", ops.Hash: "fnv_hash", - ops.LStrip: "ltrim", ops.Ln: "ln", - ops.RStrip: "rtrim", - ops.Strip: "trim", ops.TypeOf: "typeof", } @@ -324,5 +321,11 @@ def visit_DateDelta(self, op, *, left, right, part): ) return self.f.datediff(left, right) + def visit_Strip(self, op, *, arg): + # Impala's `TRIM` doesn't allow specifying characters to trim off, unlike + # Impala's `RTRIM` and `LTRIM` which accept a set of characters to + # remove. + return self.visit_RStrip(op, arg=self.visit_LStrip(op, arg=arg)) + compiler = ImpalaCompiler() diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index cfaab851e56a..915a9f6f1ede 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1265,7 +1265,7 @@ def string_temp_table(backend, con): id="lstrip", marks=[ pytest.mark.notimpl( - ["impala", "pyspark"], + ["pyspark"], raises=AssertionError, reason="doesn't strip newline or tabs", ), @@ -1282,7 +1282,7 @@ def string_temp_table(backend, con): id="rstrip", marks=[ pytest.mark.notimpl( - ["impala", "pyspark"], + ["pyspark"], raises=AssertionError, reason="doesn't strip newline or tabs", ), @@ -1298,16 +1298,6 @@ def string_temp_table(backend, con): lambda t: t.str.strip(), id="strip", marks=[ - pytest.mark.notimpl( - ["impala"], - raises=AssertionError, - reason=""" - not stripping anything but space - can use - TRIM(TRAILING '\t\n\r ' FROM string_col) - TRIM(LEADING '\t\n\r ' FROM string_col) - """, - ), pytest.mark.notimpl( ["flink"], raises=AssertionError,