From 33be493fbd8b18661e7fa4bef42210b3ac2e6de8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Osipiuk?= Date: Thu, 8 Jul 2021 17:35:37 +0200 Subject: [PATCH] Add support for from_utc_timestamp in Hive views translation --- .../hive/CanonicalizeHiveTimezoneId.java | 36 ++++++++ .../java/io/trino/plugin/hive/HivePlugin.java | 8 ++ pom.xml | 4 +- .../tests/product/hive/TestHiveViews.java | 92 +++++++++++++++++++ 4 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java diff --git a/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java b/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java new file mode 100644 index 0000000000000..d1fa4a782f5f0 --- /dev/null +++ b/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/CanonicalizeHiveTimezoneId.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive; + +import io.airlift.slice.Slice; +import io.trino.spi.function.LiteralParameters; +import io.trino.spi.function.ScalarFunction; +import io.trino.spi.function.SqlType; + +/** + * Translate timezone id used by Hive to canonical form which is understandable by Trino; used in Hive view translation logic + */ +public final class CanonicalizeHiveTimezoneId +{ + private CanonicalizeHiveTimezoneId() {} + + @ScalarFunction(value = "$canonicalize_hive_timezone_id", hidden = true) + @LiteralParameters("x") + @SqlType("varchar") + public static Slice canonicalizeHiveTimezoneId(@SqlType("varchar(x)") Slice hiveTimeZoneId) + { + // TODO(https://github.com/trinodb/trino/issues/8853) no-op for now; actual cannicalization logic to be added + return hiveTimeZoneId; + } +} diff --git a/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/HivePlugin.java b/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/HivePlugin.java index 26f32ce1e1e7f..fcae690541516 100644 --- a/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/HivePlugin.java +++ b/plugin/trino-hive-hadoop2/src/main/java/io/trino/plugin/hive/HivePlugin.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import io.airlift.log.Logger; import io.trino.spi.Plugin; import io.trino.spi.connector.Connector; @@ -21,6 +22,7 @@ import io.trino.spi.connector.ConnectorFactory; import java.util.Map; +import java.util.Set; public class HivePlugin implements Plugin @@ -33,6 +35,12 @@ public Iterable getConnectorFactories() return ImmutableList.of(new HiveConnectorFactory("hive"), new LegacyHiveConnectorFactory()); } + @Override + public Set> getFunctions() + { + return ImmutableSet.of(CanonicalizeHiveTimezoneId.class); + } + private static class LegacyHiveConnectorFactory extends HiveConnectorFactory { diff --git a/pom.xml b/pom.xml index 96171d7dfc204..6db60864e2c32 100644 --- a/pom.xml +++ b/pom.xml @@ -62,7 +62,7 @@ 2.8.0 1.16.0 3.2.11 - 1.0.60 + 1.0.77 5.5.2 2.12.3 @@ -1038,7 +1038,7 @@ com.linkedin.calcite calcite-core - 1.21.0.146 + 1.21.0.150 shaded diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java index 87b85a435763e..41b5b8b6207fc 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveViews.java @@ -172,4 +172,96 @@ public void testLateralViewJsonTupleAs() "SELECT * FROM test_json_tuple_view", queryAssert -> queryAssert.containsOnly(row(3, "Mateusz", "Gajewski", "true", "1000", null, null))); } + + @Test(groups = HIVE_VIEWS) + public void testFromUtcTimestamp() + { + onTrino().executeQuery("DROP TABLE IF EXISTS test_from_utc_timestamp_source"); + onHive().executeQuery("CREATE TABLE test_from_utc_timestamp_source AS SELECT " + + " CAST(123 AS tinyint) source_tinyint, " + + " CAST(10123 AS smallint) source_smallint, " + + " CAST(2592000123 AS int) source_integer, " + + " CAST(2592000123 AS bigint) source_bigint, " + + " CAST(2592000.0 AS float) source_float, " + + " CAST(2592000.123 AS double) source_double, " + + " CAST(2592000.123 AS decimal(10,3)) source_decimal_three," + + " CAST(2592000 AS DECIMAL(10,0)) source_decimal_zero," + + " timestamp '1970-01-30 16:00:00' source_timestamp, " + + " date '1970-01-30' source_date "); + + onHive().executeQuery("DROP VIEW IF EXISTS test_from_utc_timestamp_view"); + onHive().executeQuery("CREATE VIEW " + + "test_from_utc_timestamp_view " + + "AS SELECT " + + // TODO(https://github.com/trinodb/trino/issues/8853) add testcases with 3-letter tz names (like PST) when we have $canonicalize_hive_timezone_id logic in place + " CAST(from_utc_timestamp(source_tinyint, 'America/Los_Angeles') AS STRING) ts_tinyint, " + + " CAST(from_utc_timestamp(source_smallint, 'America/Los_Angeles') AS STRING) ts_smallint, " + + " CAST(from_utc_timestamp(source_integer, 'America/Los_Angeles') AS STRING) ts_integer, " + + " CAST(from_utc_timestamp(source_bigint, 'America/Los_Angeles') AS STRING) ts_bigint, " + + " CAST(from_utc_timestamp(source_float, 'America/Los_Angeles') AS STRING) ts_float, " + + " CAST(from_utc_timestamp(source_double, 'America/Los_Angeles') AS STRING) ts_double, " + + " CAST(from_utc_timestamp(source_decimal_three, 'America/Los_Angeles') AS STRING) ts_decimal_three, " + + " CAST(from_utc_timestamp(source_decimal_zero, 'America/Los_Angeles') AS STRING) ts_decimal_zero, " + + " CAST(from_utc_timestamp(source_timestamp, 'America/Los_Angeles') AS STRING) ts_timestamp, " + + " CAST(from_utc_timestamp(source_date, 'America/Los_Angeles') AS STRING) ts_date " + + "FROM test_from_utc_timestamp_source"); + + // check result on Trino + assertThat(query("SELECT * FROM test_from_utc_timestamp_view")) + .containsOnly(row( + "1969-12-31 16:00:00.123", + "1969-12-31 16:00:10.123", + "1969-12-11 22:57:12.827", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.000", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.000", + "1970-01-30 08:00:00.000", + "1970-01-29 16:00:00.000")); + + // check result on Hive + assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_view")) + .containsOnly(row( + "1969-12-31 16:00:00.123", + "1969-12-31 16:00:10.123", + "1969-12-11 22:57:12.827", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00.123", + "1970-01-30 16:00:00", + "1970-01-30 08:00:00", + "1970-01-29 16:00:00")); + } + + @Test(groups = HIVE_VIEWS) + public void testFromUtcTimestampCornerCases() + { + onTrino().executeQuery("DROP TABLE IF EXISTS test_from_utc_timestamp_corner_cases_source"); + onTrino().executeQuery("CREATE TABLE test_from_utc_timestamp_corner_cases_source AS SELECT * FROM (VALUES " + + " CAST(-5000000000001 AS BIGINT)," + + " CAST(-1000000000001 AS BIGINT)," + + " -1," + + " 1," + + " 5000000000001" + + ")" + + "AS source(source_bigint)"); + + onHive().executeQuery("DROP VIEW IF EXISTS test_from_utc_timestamp_corner_cases_view"); + onHive().executeQuery("CREATE VIEW " + + "test_from_utc_timestamp_corner_cases_view " + + "AS SELECT " + + " CAST(from_utc_timestamp(source_bigint, 'America/Los_Angeles') as STRING) ts_bigint " + + "FROM test_from_utc_timestamp_corner_cases_source"); + + // check result on Trino + assertViewQuery("SELECT * FROM test_from_utc_timestamp_corner_cases_view", + assertion -> assertion.containsOnly( + row("1811-07-23 07:13:41.999"), + row("1938-04-24 14:13:19.999"), + row("1969-12-31 15:59:59.999"), + row("1969-12-31 16:00:00.001"), + row("2128-06-11 01:53:20.001"))); + } }