Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(bigquery): strip whitespace from bigquery field names #9160

Merged
merged 2 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ def visit_HashBytes(self, op, *, arg, how):

@staticmethod
def _gen_valid_name(name: str) -> str:
return "_".join(_NAME_REGEX.findall(name)) or "tmp"
return "_".join(map(str.strip, _NAME_REGEX.findall(name))) or "tmp"

def visit_CountStar(self, op, *, arg, where):
if where is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
approx_quantiles(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL), IF(`t0`.`month` > 0, 2, NULL))[offset(1)] AS `ApproxMedian_double_col_ Greater_month_ 0`
approx_quantiles(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL), IF(`t0`.`month` > 0, 2, NULL))[offset(1)] AS `ApproxMedian_double_col_Greater_month_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
APPROX_COUNT_DISTINCT(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL)) AS `ApproxCountDistinct_double_col_ Greater_month_ 0`
APPROX_COUNT_DISTINCT(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL)) AS `ApproxCountDistinct_double_col_Greater_month_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(`t0`.`value` AS BYTES) AS `Cast_value_ binary`
CAST(`t0`.`value` AS BYTES) AS `Cast_value_binary`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
bit_and(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitAnd_int_col_ Greater_bigint_col_ 0`
bit_and(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitAnd_int_col_Greater_bigint_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
bit_or(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitOr_int_col_ Greater_bigint_col_ 0`
bit_or(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitOr_int_col_Greater_bigint_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
bit_xor(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitXor_int_col_ Greater_bigint_col_ 0`
bit_xor(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitXor_int_col_Greater_bigint_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ SELECT
CAST(`t0`.`bool_col` AS INT64),
NULL
)
) AS `Sum_bool_col_ And_Greater_month_ 6_ Less_month_ 10`
) AS `Sum_bool_col_And_Greater_month_6_Less_month_10`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
AVG(IF(`t0`.`month` > 6, CAST(`t0`.`bool_col` AS INT64), NULL)) AS `Mean_bool_col_ Greater_month_ 6`
AVG(IF(`t0`.`month` > 6, CAST(`t0`.`bool_col` AS INT64), NULL)) AS `Mean_bool_col_Greater_month_6`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(trunc(`t0`.`double_col`) AS INT64) AS `Cast_double_col_ int64`
CAST(trunc(`t0`.`double_col`) AS INT64) AS `Cast_double_col_int64`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
COVAR_POP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_ double_col`
COVAR_POP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_double_col`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
COVAR_SAMP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_ double_col`
COVAR_SAMP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_double_col`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1`
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1`
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1`
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1`
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1`
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1`
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(FLOOR(ieee_divide(`t0`.`double_col`, 0)) AS INT64) AS `FloorDivide_double_col_ 0`
CAST(FLOOR(ieee_divide(`t0`.`double_col`, 0)) AS INT64) AS `FloorDivide_double_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
ieee_divide(`t0`.`double_col`, 0) AS `Divide_double_col_ 0`
ieee_divide(`t0`.`double_col`, 0) AS `Divide_double_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1`
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1`
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1`
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
parse_timestamp('%F', `t0`.`date_string_col`, 'UTC') AS `StringToTimestamp_date_string_col_ '%F'`
parse_timestamp('%F', `t0`.`date_string_col`, 'UTC') AS `StringToTimestamp_date_string_col_'%F'`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
parse_timestamp('%F %Z', CONCAT(`t0`.`date_string_col`, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_ '%F %Z'`
parse_timestamp('%F %Z', CONCAT(`t0`.`date_string_col`, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_'%F %Z'`
FROM `functional_alltypes` AS `t0`
13 changes: 13 additions & 0 deletions ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis import _
from ibis.backends.bigquery.compiler import BigQueryCompiler
from ibis.common.annotations import ValidationError

to_sql = ibis.bigquery.compile
Expand Down Expand Up @@ -633,3 +634,15 @@ def test_unnest(snapshot):
).select(level_two=lambda t: t.level_one.unnest())
)
snapshot.assert_match(result, "out_two_unnests.sql")


@pytest.mark.parametrize(
"fieldname, expected",
[
("TryCast(b, Float64)", "TryCast_b_Float64"),
("Cast(b, Int64)", "Cast_b_Int64"),
("that, is, a, lot, of, spaces", "that_is_a_lot_of_spaces"),
],
)
def test_field_names_strip_whitespace(fieldname, expected):
assert BigQueryCompiler._gen_valid_name(fieldname) == expected
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
farm_fingerprint(CAST('48656c6c6f2c20576f726c6421' AS BYTES FORMAT 'HEX')) AS `farm_fingerprint_0_b'Hello_ World_'`
farm_fingerprint(CAST('48656c6c6f2c20576f726c6421' AS BYTES FORMAT 'HEX')) AS `farm_fingerprint_0_b'Hello_World_'`
Loading