Skip to content

Commit

Permalink
refactor(padding): follow python string padding conventions (ibis-pro…
Browse files Browse the repository at this point in the history
…ject#10096)

BREAKING CHANGE: String padding operations now follow Python semantics and leave strings greater than the padding length untouched.
  • Loading branch information
gforsyth authored and ncclementi committed Sep 24, 2024
1 parent f910cef commit 3a83aa4
Show file tree
Hide file tree
Showing 16 changed files with 206 additions and 38 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
LPAD(`t0`.`string_col`, 1, 'a') AS `LPad(string_col, 1, 'a')`
LPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 1), 'a') AS `LPad(string_col, 1, 'a')`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
LPAD(`t0`.`string_col`, 25, ' ') AS `LPad(string_col, 25, ' ')`
LPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 25), ' ') AS `LPad(string_col, 25, ' ')`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
RPAD(`t0`.`string_col`, 1, 'a') AS `RPad(string_col, 1, 'a')`
RPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 1), 'a') AS `RPad(string_col, 1, 'a')`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
RPAD(`t0`.`string_col`, 25, ' ') AS `RPad(string_col, 25, ' ')`
RPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 25), ' ') AS `RPad(string_col, 25, ' ')`
FROM `functional_alltypes` AS `t0`
4 changes: 2 additions & 2 deletions ibis/backends/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,9 @@ def test_decimal_builtins_2(con, func, expected):
(L("0123").translate("012", "abc"), "abc3"),
(L("abcd").find("a"), 0),
(L("baaaab").find("b", 2), 5),
(L("abcd").lpad(1, "-"), "a"),
(L("abcd").lpad(1, "-"), "abcd"),
(L("abcd").lpad(5), " abcd"),
(L("abcd").rpad(1, "-"), "a"),
(L("abcd").rpad(1, "-"), "abcd"),
(L("abcd").rpad(5), "abcd "),
(L("abcd").find_in_set(["a", "b", "abcd"]), 2),
(L(", ").join(["a", "b"]), "a, b"),
Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/sql/compilers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,6 @@ class SQLGlotCompiler(abc.ABC):
ops.IsInf: "isinf",
ops.IsNan: "isnan",
ops.JSONGetItem: "json_extract",
ops.LPad: "lpad",
LastValue: "last_value",
ops.Levenshtein: "levenshtein",
ops.Ln: "ln",
Expand All @@ -347,7 +346,6 @@ class SQLGlotCompiler(abc.ABC):
ops.PercentRank: "percent_rank",
ops.Pi: "pi",
ops.Power: "pow",
ops.RPad: "rpad",
ops.Radians: "radians",
ops.RegexSearch: "regexp_like",
ops.RegexSplit: "regexp_split",
Expand Down Expand Up @@ -985,6 +983,12 @@ def visit_RStrip(self, op, *, arg):
def visit_LStrip(self, op, *, arg):
return self.f.ltrim(arg, string.whitespace)

def visit_LPad(self, op, *, arg, length, pad):
return self.f.lpad(arg, self.f.greatest(self.f.length(arg), length), pad)

def visit_RPad(self, op, *, arg, length, pad):
return self.f.rpad(arg, self.f.greatest(self.f.length(arg), length), pad)

def visit_Substring(self, op, *, arg, start, length):
if isinstance(op.length, ops.Literal) and (value := op.length.value) < 0:
raise com.IbisInputError(
Expand Down
2 changes: 0 additions & 2 deletions ibis/backends/sql/compilers/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,6 @@ class BigQueryCompiler(SQLGlotCompiler):
ops.IsInf: "is_inf",
ops.IsNan: "is_nan",
ops.Log10: "log10",
ops.LPad: "lpad",
ops.RPad: "rpad",
ops.Levenshtein: "edit_distance",
ops.Modulus: "mod",
ops.RegexReplace: "regexp_replace",
Expand Down
10 changes: 10 additions & 0 deletions ibis/backends/sql/compilers/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,16 @@ def visit_Strip(self, op, *, arg):
this=arg, position="BOTH", expression=sge.Literal.string(whitespace)
)

def visit_LPad(self, op, *, arg, length, pad):
return self.f.leftPadUTF8(
arg, self.f.greatest(self.f.lengthUTF8(arg), length), pad
)

def visit_RPad(self, op, *, arg, length, pad):
return self.f.rightPadUTF8(
arg, self.f.greatest(self.f.lengthUTF8(arg), length), pad
)

def visit_DayOfWeekIndex(self, op, *, arg):
weekdays = len(calendar.day_name)
return (((self.f.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays
Expand Down
14 changes: 14 additions & 0 deletions ibis/backends/sql/compilers/datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,20 @@ def visit_RegexSearch(self, op, *, arg, pattern):
def visit_StringContains(self, op, *, haystack, needle):
return self.f.strpos(haystack, needle) > sg.exp.convert(0)

def visit_LPad(self, op, *, arg, length, pad):
return self.if_(
length <= self.f.length(arg),
arg,
self.f.concat(self.f.repeat(pad, length - self.f.length(arg)), arg),
)

def visit_RPad(self, op, *, arg, length, pad):
return self.if_(
length <= self.f.length(arg),
arg,
self.f.concat(arg, self.f.repeat(pad, length - self.f.length(arg))),
)

def visit_ExtractFragment(self, op, *, arg):
return self.f.extract_url_field(arg, "fragment")

Expand Down
14 changes: 14 additions & 0 deletions ibis/backends/sql/compilers/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,20 @@ def visit_Hash(self, op, *, arg):
def visit_StringConcat(self, op, *, arg):
return reduce(lambda x, y: sge.DPipe(this=x, expression=y), arg)

def visit_LPad(self, op, *, arg, length, pad):
return self.if_(
length <= self.f.length(arg),
arg,
self.f.concat(self.f.repeat(pad, length - self.f.length(arg)), arg),
)

def visit_RPad(self, op, *, arg, length, pad):
return self.if_(
length <= self.f.length(arg),
arg,
self.f.concat(arg, self.f.repeat(pad, length - self.f.length(arg))),
)

def visit_StringSlice(self, op, *, arg, start, end):
if start is not None:
start += 1
Expand Down
14 changes: 14 additions & 0 deletions ibis/backends/sql/compilers/flink.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,20 @@ def visit_StringFind(self, op, *, arg, substr, start, end):

return self.f.instr(arg, substr)

def visit_LPad(self, op, *, arg, length, pad):
return self.if_(
length <= self.f.length(arg),
arg,
self.f.concat(self.f.repeat(pad, length - self.f.length(arg)), arg),
)

def visit_RPad(self, op, *, arg, length, pad):
return self.if_(
length <= self.f.length(arg),
arg,
self.f.concat(arg, self.f.repeat(pad, length - self.f.length(arg))),
)

def visit_StartsWith(self, op, *, arg, start):
return self.f.left(arg, self.f.char_length(start)).eq(start)

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sql/compilers/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ def visit_LPad(self, op, *, arg, length, pad):
return self.if_(
length <= self.f.length(arg),
arg,
self.f.left(
self.f.right(
self.f.concat(self.f.replicate(pad, length - self.f.length(arg)), arg),
length,
),
Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/sql/compilers/oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,6 @@ class OracleCompiler(SQLGlotCompiler):
ops.BitXor: "bit_xor_agg",
ops.BitwiseAnd: "bitand",
ops.Hash: "ora_hash",
ops.LPad: "lpad",
ops.RPad: "rpad",
ops.StringAscii: "ascii",
ops.Mode: "stats_mode",
}
Expand Down Expand Up @@ -275,6 +273,12 @@ def visit_StringContains(self, op, *, haystack, needle):
def visit_StringJoin(self, op, *, arg, sep):
return self.f.concat(*toolz.interpose(sep, arg))

def visit_LPad(self, op, *, arg, length, pad):
return self.f.lpad(arg, self.f.greatest(self.f.length(arg), length), pad)

def visit_RPad(self, op, *, arg, length, pad):
return self.f.rpad(arg, self.f.greatest(self.f.length(arg), length), pad)

## Aggregate stuff

def visit_Correlation(self, op, *, left, right, where, how):
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/sqlite/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,12 @@ def _ibis_string_ascii(string):

@udf
def _ibis_rpad(string, width, pad):
return string.ljust(width, pad)[:width]
return string.ljust(width, pad)


@udf
def _ibis_lpad(string, width, pad):
return string.rjust(width, pad)[:width]
return string.rjust(width, pad)


@udf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,22 @@ FROM (
"t1"."ancestor_level_number",
"t1"."ancestor_node_sort_order",
"t1"."descendant_node_natural_key",
LPAD('-', (
"t1"."ancestor_level_number" - 1
) * 7, '-') || "t1"."ancestor_level_name" AS "product_level_name"
CASE
WHEN (
(
"t1"."ancestor_level_number" - 1
) * 7
) <= LENGTH('-')
THEN '-'
ELSE CONCAT(
REPEAT('-', (
(
"t1"."ancestor_level_number" - 1
) * 7
) - LENGTH('-')),
'-'
)
END || "t1"."ancestor_level_name" AS "product_level_name"
FROM "products" AS "t1"
) AS "t4"
ON "t2"."product_id" = "t4"."descendant_node_natural_key"
Expand Down
Loading

0 comments on commit 3a83aa4

Please sign in to comment.