From 815723fa0f2b7b659c15f158f68a051a907c473e Mon Sep 17 00:00:00 2001 From: David Phillips Date: Mon, 31 Jul 2023 14:59:17 -0700 Subject: [PATCH] Remove support for S3 Select --- docs/src/main/sphinx/connector/hive-s3.md | 82 ----- docs/src/main/sphinx/connector/hive.md | 10 - docs/src/main/sphinx/release/release-300.md | 2 +- .../java/io/trino/hdfs/s3/HiveS3Module.java | 1 - .../bin/run_hive_s3_select_json_tests.sh | 67 ---- .../bin/run_hive_s3_tests.sh | 32 -- plugin/trino-hive-hadoop2/pom.xml | 23 -- .../hive/AbstractTestHiveFileSystemAbfs.java | 1 - .../hive/AbstractTestHiveFileSystemS3.java | 261 --------------- .../plugin/hive/TestHiveFileSystemAdl.java | 2 +- .../plugin/hive/TestHiveFileSystemS3.java | 231 +++++++++++++- .../plugin/hive/TestHiveFileSystemWasb.java | 2 +- .../hive/s3select/S3SelectTestHelper.java | 287 ----------------- ...leSystemS3SelectCsvPushdownWithSplits.java | 116 ------- ...estHiveFileSystemS3SelectJsonPushdown.java | 102 ------ ...eSystemS3SelectJsonPushdownWithSplits.java | 116 ------- .../TestHiveFileSystemS3SelectPushdown.java | 115 ------- plugin/trino-hive/pom.xml | 13 +- .../hive/BackgroundHiveSplitLoader.java | 11 +- .../java/io/trino/plugin/hive/HiveConfig.java | 46 +-- .../java/io/trino/plugin/hive/HiveModule.java | 3 - .../plugin/hive/HivePageSourceProvider.java | 2 - .../plugin/hive/HiveSessionProperties.java | 11 - .../java/io/trino/plugin/hive/HiveSplit.java | 10 - .../io/trino/plugin/hive/HiveSplitSource.java | 1 - .../trino/plugin/hive/InternalHiveSplit.java | 8 - .../hive/s3select/IonSqlQueryBuilder.java | 274 ---------------- .../hive/s3select/S3SelectDataType.java | 19 -- .../hive/s3select/S3SelectPushdown.java | 165 ---------- .../s3select/S3SelectSerDeDataTypeMapper.java | 40 --- .../hive/s3select/TrinoS3ClientFactory.java | 243 -------------- .../hive/s3select/TrinoS3SelectClient.java | 85 ----- .../hive/util/InternalHiveSplitFactory.java | 5 - .../hive/AbstractTestHiveFileSystem.java | 5 +- .../plugin/hive/BaseTestHiveOnDataLake.java | 247 --------------- .../hive/TestBackgroundHiveSplitLoader.java | 2 - .../io/trino/plugin/hive/TestHiveConfig.java | 9 - .../plugin/hive/TestHiveFileFormats.java | 1 - .../trino/plugin/hive/TestHivePageSink.java | 1 - .../io/trino/plugin/hive/TestHiveSplit.java | 2 - .../plugin/hive/TestHiveSplitSource.java | 1 - .../TestNodeLocalDynamicSplitPruning.java | 1 - .../hive/TestOrcPageSourceMemoryTracking.java | 1 - .../hive/benchmark/AbstractFileFormat.java | 1 - .../plugin/hive/orc/TestOrcPredicates.java | 1 - .../hive/s3/TestMinioS3SelectQueries.java | 123 -------- .../plugin/hive/s3/TestS3SelectQueries.java | 191 ----------- .../hive/s3select/TestIonSqlQueryBuilder.java | 175 ----------- .../hive/s3select/TestS3SelectPushdown.java | 297 ------------------ 49 files changed, 244 insertions(+), 3200 deletions(-) delete mode 100755 plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemS3.java delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/S3SelectTestHelper.java delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectCsvPushdownWithSplits.java delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdown.java delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdownWithSplits.java delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectPushdown.java delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/IonSqlQueryBuilder.java delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectDataType.java delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectPushdown.java delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectSerDeDataTypeMapper.java delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3ClientFactory.java delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3SelectClient.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestMinioS3SelectQueries.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3SelectQueries.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestIonSqlQueryBuilder.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestS3SelectPushdown.java diff --git a/docs/src/main/sphinx/connector/hive-s3.md b/docs/src/main/sphinx/connector/hive-s3.md index 70c11a6b82198..8cfdfb450de7a 100644 --- a/docs/src/main/sphinx/connector/hive-s3.md +++ b/docs/src/main/sphinx/connector/hive-s3.md @@ -312,85 +312,3 @@ classpath and must be able to communicate with your custom key management system the `org.apache.hadoop.conf.Configurable` interface from the Hadoop Java API, then the Hadoop configuration is passed in after the object instance is created, and before it is asked to provision or retrieve any encryption keys. - -(s3selectpushdown)= - -## S3 Select pushdown - -S3 Select pushdown enables pushing down projection (SELECT) and predicate (WHERE) -processing to [S3 Select](https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html). -With S3 Select Pushdown, Trino only retrieves the required data from S3 instead -of entire S3 objects, reducing both latency and network usage. - -### Is S3 Select a good fit for my workload? - -Performance of S3 Select pushdown depends on the amount of data filtered by the -query. Filtering a large number of rows should result in better performance. If -the query doesn't filter any data, then pushdown may not add any additional value -and the user is charged for S3 Select requests. Thus, we recommend that you -benchmark your workloads with and without S3 Select to see if using it may be -suitable for your workload. By default, S3 Select Pushdown is disabled and you -should enable it in production after proper benchmarking and cost analysis. For -more information on S3 Select request cost, please see -[Amazon S3 Cloud Storage Pricing](https://aws.amazon.com/s3/pricing/). - -Use the following guidelines to determine if S3 Select is a good fit for your -workload: - -- Your query filters out more than half of the original data set. -- Your query filter predicates use columns that have a data type supported by - Trino and S3 Select. - The `TIMESTAMP`, `DECIMAL`, `REAL`, and `DOUBLE` data types are not - supported by S3 Select Pushdown. For more information about supported data - types for S3 Select, see the - [Data Types documentation](https://docs.aws.amazon.com/AmazonS3/latest/dev/s3-glacier-select-sql-reference-data-types.html). -- Your network connection between Amazon S3 and the Amazon EMR cluster has good - transfer speed and available bandwidth. Amazon S3 Select does not compress - HTTP responses, so the response size may increase for compressed input files. - -### Considerations and limitations - -- Only objects stored in JSON format are supported. Objects can be uncompressed, - or optionally compressed with gzip or bzip2. -- The "AllowQuotedRecordDelimiters" property is not supported. If this property - is specified, the query fails. -- Amazon S3 server-side encryption with customer-provided encryption keys - (SSE-C) and client-side encryption are not supported. -- S3 Select Pushdown is not a substitute for using columnar or compressed file - formats such as ORC and Parquet. - -### Enabling S3 Select pushdown - -You can enable S3 Select Pushdown using the `s3_select_pushdown_enabled` -Hive session property, or using the `hive.s3select-pushdown.enabled` -configuration property. The session property overrides the config -property, allowing you enable or disable on a per-query basis. Non-filtering -queries (`SELECT * FROM table`) are not pushed down to S3 Select, -as they retrieve the entire object content. - -For uncompressed files, S3 Select scans ranges of bytes in parallel. The scan range -requests run across the byte ranges of the internal Hive splits for the query fragments -pushed down to S3 Select. Changes in the Hive connector {ref}`performance tuning -configuration properties ` are likely to impact -S3 Select pushdown performance. - -S3 Select can be enabled for TEXTFILE data using the -`hive.s3select-pushdown.experimental-textfile-pushdown-enabled` configuration property, -however this has been shown to produce incorrect results. For more information see -[the GitHub Issue.](https://github.com/trinodb/trino/issues/17775) - -### Understanding and tuning the maximum connections - -Trino can use its native S3 file system or EMRFS. When using the native FS, the -maximum connections is configured via the `hive.s3.max-connections` -configuration property. When using EMRFS, the maximum connections is configured -via the `fs.s3.maxConnections` Hadoop configuration property. - -S3 Select Pushdown bypasses the file systems, when accessing Amazon S3 for -predicate operations. In this case, the value of -`hive.s3select-pushdown.max-connections` determines the maximum number of -client connections allowed for those operations from worker nodes. - -If your workload experiences the error *Timeout waiting for connection from -pool*, increase the value of both `hive.s3select-pushdown.max-connections` and -the maximum connections configuration for the file system you are using. diff --git a/docs/src/main/sphinx/connector/hive.md b/docs/src/main/sphinx/connector/hive.md index c972867bacee7..05875dae76565 100644 --- a/docs/src/main/sphinx/connector/hive.md +++ b/docs/src/main/sphinx/connector/hive.md @@ -253,16 +253,6 @@ Hive connector documentation. - Enables automatic column level statistics collection on write. See `Table Statistics <#table-statistics>`__ for details. - ``true`` - * - ``hive.s3select-pushdown.enabled`` - - Enable query pushdown to JSON files using the AWS S3 Select service. - - ``false`` - * - ``hive.s3select-pushdown.experimental-textfile-pushdown-enabled`` - - Enable query pushdown to TEXTFILE tables using the AWS S3 Select service. - - ``false`` - * - ``hive.s3select-pushdown.max-connections`` - - Maximum number of simultaneously open connections to S3 for - :ref:`s3selectpushdown`. - - 500 * - ``hive.file-status-cache-tables`` - Cache directory listing for specific tables. Examples: diff --git a/docs/src/main/sphinx/release/release-300.md b/docs/src/main/sphinx/release/release-300.md index ae16f369073f2..ecf4917cac99b 100644 --- a/docs/src/main/sphinx/release/release-300.md +++ b/docs/src/main/sphinx/release/release-300.md @@ -46,7 +46,7 @@ (e.g., min > max). To disable this behavior, set the configuration property `hive.parquet.fail-on-corrupted-statistics` or session property `parquet_fail_with_corrupted_statistics` to false. -- Add support for {ref}`s3selectpushdown`, which enables pushing down +- Add support for S3 Select pushdown, which enables pushing down column selection and range filters into S3 for text files. ## Kudu connector diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Module.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Module.java index 99aed93534351..501bfcb973f7a 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Module.java +++ b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Module.java @@ -83,7 +83,6 @@ private void bindSecurityMapping(Binder binder) newSetBinder(binder, DynamicConfigurationProvider.class).addBinding() .to(S3SecurityMappingConfigurationProvider.class).in(Scopes.SINGLETON); - checkArgument(!getProperty("hive.s3select-pushdown.enabled").map(Boolean::parseBoolean).orElse(false), "S3 security mapping is not compatible with S3 Select pushdown"); checkArgument(!buildConfigObject(RubixEnabledConfig.class).isCacheEnabled(), "S3 security mapping is not compatible with Hive caching"); } diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh deleted file mode 100755 index 1d7976ede475c..0000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash - -# Similar to run_hive_s3_tests.sh, but has only Amazon S3 Select JSON tests. This is in a separate file as the JsonSerDe -# class is only available in Hadoop 3.1 version, and so we would only test JSON pushdown against the 3.1 version. - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -check_vars S3_BUCKET S3_BUCKET_ENDPOINT \ - AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)-s3select-json" - -# insert AWS credentials -deploy_core_site_xml core-site.xml.s3-template \ - AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY S3_BUCKET_ENDPOINT - -# create test tables -# can't use create_test_tables because the first table is created with different commands -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_s3select_test_external_fs_json/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container /docker/files/hadoop-put.sh /docker/files/test_table.json{,.gz,.bz2} "${table_path}" -exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -e " - CREATE EXTERNAL TABLE trino_s3select_test_external_fs_json(col_1 bigint, col_2 bigint) - ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' - LOCATION '${table_path}'" - -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_s3select_test_json_scan_range_pushdown/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container /docker/files/hadoop-put.sh /docker/files/test_table_json_scan_range_select_pushdown_{1,2,3}.json "${table_path}" -exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -e " - CREATE EXTERNAL TABLE trino_s3select_test_json_scan_range_pushdown(col_1 bigint, col_2 string, col_3 string, - col_4 string, col_5 string, col_6 string, col_7 string, col_8 string, col_9 string, col_10 string, col_11 string, - col_12 string, col_13 string, col_14 string) - ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' - LOCATION '${table_path}'" -stop_unnecessary_hadoop_services - -# restart hive-metastore to apply S3 changes in core-site.xml -docker exec "$(hadoop_master_container)" supervisorctl restart hive-metastore -retry check_hadoop - -# run product tests -pushd "${PROJECT_ROOT}" -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-s3-select-json \ - -DHADOOP_USER_NAME=hive \ - -Dhive.hadoop2.metastoreHost=localhost \ - -Dhive.hadoop2.metastorePort=9083 \ - -Dhive.hadoop2.databaseName=default \ - -Dhive.hadoop2.s3.awsAccessKey="${AWS_ACCESS_KEY_ID}" \ - -Dhive.hadoop2.s3.awsSecretKey="${AWS_SECRET_ACCESS_KEY}" \ - -Dhive.hadoop2.s3.writableBucket="${S3_BUCKET}" \ - -Dhive.hadoop2.s3.testDirectory="${test_directory}" -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit "${EXIT_CODE}" diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh index 57c3c090bf75f..0b9fb473e6dba 100755 --- a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh +++ b/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh @@ -46,38 +46,6 @@ exec_in_hadoop_master_container /usr/bin/hive -e " LOCATION '${table_path}' TBLPROPERTIES ('skip.header.line.count'='2', 'skip.footer.line.count'='2')" -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_s3select_test_external_fs_with_pipe_delimiter/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container hadoop fs -put -f /docker/files/test_table_with_pipe_delimiter.csv{,.gz,.bz2} "${table_path}" -exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE trino_s3select_test_external_fs_with_pipe_delimiter(t_bigint bigint, s_bigint bigint) - ROW FORMAT DELIMITED - FIELDS TERMINATED BY '|' - STORED AS TEXTFILE - LOCATION '${table_path}'" - -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_s3select_test_external_fs_with_comma_delimiter/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container hadoop fs -put -f /docker/files/test_table_with_comma_delimiter.csv{,.gz,.bz2} "${table_path}" -exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE trino_s3select_test_external_fs_with_comma_delimiter(t_bigint bigint, s_bigint bigint) - ROW FORMAT DELIMITED - FIELDS TERMINATED BY ',' - STORED AS TEXTFILE - LOCATION '${table_path}'" - -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_s3select_test_csv_scan_range_pushdown/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container /docker/files/hadoop-put.sh /docker/files/test_table_csv_scan_range_select_pushdown_{1,2,3}.csv "${table_path}" -exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -e " - CREATE EXTERNAL TABLE trino_s3select_test_csv_scan_range_pushdown(index bigint, id string, value1 bigint, value2 bigint, value3 bigint, - value4 bigint, value5 bigint, title string, firstname string, lastname string, flag string, day bigint, - month bigint, year bigint, country string, comment string, email string, identifier string) - ROW FORMAT DELIMITED - FIELDS TERMINATED BY '|' - STORED AS TEXTFILE - LOCATION '${table_path}'" - stop_unnecessary_hadoop_services # restart hive-metastore to apply S3 changes in core-site.xml diff --git a/plugin/trino-hive-hadoop2/pom.xml b/plugin/trino-hive-hadoop2/pom.xml index 20ca170cad3b6..842f419698f83 100644 --- a/plugin/trino-hive-hadoop2/pom.xml +++ b/plugin/trino-hive-hadoop2/pom.xml @@ -221,10 +221,6 @@ **/TestHive.java **/TestHiveThriftMetastoreWithS3.java **/TestHiveFileSystemS3.java - **/TestHiveFileSystemS3SelectPushdown.java - **/TestHiveFileSystemS3SelectJsonPushdown.java - **/TestHiveFileSystemS3SelectCsvPushdownWithSplits.java - **/TestHiveFileSystemS3SelectJsonPushdownWithSplits.java **/TestHiveFileSystemWasb.java **/TestHiveFileSystemAbfsAccessKey.java **/TestHiveFileSystemAbfsOAuth.java @@ -263,25 +259,6 @@ **/TestHiveThriftMetastoreWithS3.java **/TestHiveFileSystemS3.java - **/TestHiveFileSystemS3SelectPushdown.java - **/TestHiveFileSystemS3SelectCsvPushdownWithSplits.java - - - - - - - - test-hive-hadoop2-s3-select-json - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHiveFileSystemS3SelectJsonPushdown.java - **/TestHiveFileSystemS3SelectJsonPushdownWithSplits.java diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemAbfs.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemAbfs.java index ad253f1166e80..532323a3ffa0f 100644 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemAbfs.java +++ b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemAbfs.java @@ -66,7 +66,6 @@ protected void setup(String host, int port, String databaseName, String containe checkParameter(host, "host"), port, checkParameter(databaseName, "database name"), - false, createHdfsConfiguration()); } diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemS3.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemS3.java deleted file mode 100644 index 801ea4f667c03..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystemS3.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Streams; -import com.google.common.net.MediaType; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.HdfsNamenodeStats; -import io.trino.hdfs.TrinoHdfsFileSystemStats; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; -import io.trino.plugin.hive.fs.FileSystemDirectoryLister; -import io.trino.plugin.hive.fs.HiveFileIterator; -import io.trino.plugin.hive.fs.TrinoFileStatus; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.List; -import java.util.Optional; - -import static com.google.common.base.Preconditions.checkArgument; -import static io.trino.plugin.hive.HiveTestUtils.SESSION; -import static io.trino.plugin.hive.HiveType.HIVE_LONG; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static java.io.InputStream.nullInputStream; -import static java.lang.String.format; -import static org.assertj.core.api.Assertions.assertThat; -import static org.testng.Assert.assertFalse; -import static org.testng.util.Strings.isNullOrEmpty; - -public abstract class AbstractTestHiveFileSystemS3 - extends AbstractTestHiveFileSystem -{ - private static final MediaType DIRECTORY_MEDIA_TYPE = MediaType.create("application", "x-directory"); - - private String awsAccessKey; - private String awsSecretKey; - private String writableBucket; - private String testDirectory; - private AmazonS3 s3Client; - - protected void setup( - String host, - int port, - String databaseName, - String s3endpoint, - String awsAccessKey, - String awsSecretKey, - String writableBucket, - String testDirectory, - boolean s3SelectPushdownEnabled) - { - checkArgument(!isNullOrEmpty(host), "Expected non empty host"); - checkArgument(!isNullOrEmpty(databaseName), "Expected non empty databaseName"); - checkArgument(!isNullOrEmpty(awsAccessKey), "Expected non empty awsAccessKey"); - checkArgument(!isNullOrEmpty(awsSecretKey), "Expected non empty awsSecretKey"); - checkArgument(!isNullOrEmpty(s3endpoint), "Expected non empty s3endpoint"); - checkArgument(!isNullOrEmpty(writableBucket), "Expected non empty writableBucket"); - checkArgument(!isNullOrEmpty(testDirectory), "Expected non empty testDirectory"); - this.awsAccessKey = awsAccessKey; - this.awsSecretKey = awsSecretKey; - this.writableBucket = writableBucket; - this.testDirectory = testDirectory; - - s3Client = AmazonS3Client.builder() - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3endpoint, null)) - .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey))) - .build(); - - setup(host, port, databaseName, s3SelectPushdownEnabled, createHdfsConfiguration()); - } - - private HdfsConfiguration createHdfsConfiguration() - { - ConfigurationInitializer s3Config = new TrinoS3ConfigurationInitializer(new HiveS3Config() - .setS3AwsAccessKey(awsAccessKey) - .setS3AwsSecretKey(awsSecretKey)); - HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of(s3Config)); - return new DynamicHdfsConfiguration(initializer, ImmutableSet.of()); - } - - @Override - protected Path getBasePath() - { - // HDP 3.1 does not understand s3:// out of the box. - return new Path(format("s3a://%s/%s/", writableBucket, testDirectory)); - } - - @Test - public void testIgnoreHadoopFolderMarker() - throws Exception - { - Path basePath = getBasePath(); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - - String markerFileName = "test_table_$folder$"; - Path filePath = new Path(basePath, markerFileName); - fs.create(filePath).close(); - - assertFalse(Arrays.stream(fs.listStatus(basePath)).anyMatch(file -> file.getPath().getName().equalsIgnoreCase(markerFileName))); - } - - /** - * Tests the same functionality like {@link #testFileIteratorPartitionedListing()} with the - * setup done by native {@link AmazonS3} - */ - @Test - public void testFileIteratorPartitionedListingNativeS3Client() - throws Exception - { - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(table.getSchemaName()) - .setTableName(table.getTableName()) - .setDataColumns(ImmutableList.of(new Column("data", HIVE_LONG, Optional.empty()))) - .setPartitionColumns(ImmutableList.of(new Column("part", HIVE_STRING, Optional.empty()))) - .setOwner(Optional.empty()) - .setTableType("fake"); - tableBuilder.getStorageBuilder() - .setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.CSV)); - Table fakeTable = tableBuilder.build(); - - Path basePath = new Path(getBasePath(), "test-file-iterator-partitioned-listing-native-setup"); - FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); - TrinoFileSystem trinoFileSystem = new HdfsFileSystemFactory(hdfsEnvironment, new TrinoHdfsFileSystemStats()).create(SESSION); - fs.mkdirs(basePath); - String basePrefix = basePath.toUri().getPath().substring(1); - - // Expected file system tree: - // test-file-iterator-partitioned-listing-native-setup/ - // .hidden/ - // nested-file-in-hidden.txt - // part=simple/ - // _hidden-file.txt - // plain-file.txt - // part=nested/ - // parent/ - // _nested-hidden-file.txt - // nested-file.txt - // part=plus+sign/ - // plus-file.txt - // part=percent%sign/ - // percent-file.txt - // part=url%20encoded/ - // url-encoded-file.txt - // part=level1|level2/ - // pipe-file.txt - // parent1/ - // parent2/ - // deeply-nested-file.txt - // part=level1 | level2/ - // pipe-blanks-file.txt - // empty-directory/ - // .hidden-in-base.txt - - createFile(writableBucket, format("%s/.hidden/nested-file-in-hidden.txt", basePrefix)); - createFile(writableBucket, format("%s/part=simple/_hidden-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=simple/plain-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=nested/parent/_nested-hidden-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=nested/parent/nested-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=plus+sign/plus-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=percent%%sign/percent-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=url%%20encoded/url-encoded-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=level1|level2/pipe-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=level1|level2/parent1/parent2/deeply-nested-file.txt", basePrefix)); - createFile(writableBucket, format("%s/part=level1 | level2/pipe-blanks-file.txt", basePrefix)); - createDirectory(writableBucket, format("%s/empty-directory/", basePrefix)); - createFile(writableBucket, format("%s/.hidden-in-base.txt", basePrefix)); - - // List recursively through hive file iterator - HiveFileIterator recursiveIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - new HdfsNamenodeStats(), - HiveFileIterator.NestedDirectoryPolicy.RECURSE); - - List recursiveListing = Streams.stream(recursiveIterator) - .map(TrinoFileStatus::getPath) - .toList(); - // Should not include directories, or files underneath hidden directories - assertThat(recursiveListing).containsExactlyInAnyOrder( - format("%s/part=simple/plain-file.txt", basePath), - format("%s/part=nested/parent/nested-file.txt", basePath), - format("%s/part=plus+sign/plus-file.txt", basePath), - format("%s/part=percent%%sign/percent-file.txt", basePath), - format("%s/part=url%%20encoded/url-encoded-file.txt", basePath), - format("%s/part=level1|level2/pipe-file.txt", basePath), - format("%s/part=level1|level2/parent1/parent2/deeply-nested-file.txt", basePath), - format("%s/part=level1 | level2/pipe-blanks-file.txt", basePath)); - - HiveFileIterator shallowIterator = new HiveFileIterator( - fakeTable, - Location.of(basePath.toString()), - trinoFileSystem, - new FileSystemDirectoryLister(), - new HdfsNamenodeStats(), - HiveFileIterator.NestedDirectoryPolicy.IGNORED); - List shallowListing = Streams.stream(shallowIterator) - .map(TrinoFileStatus::getPath) - .map(Path::new) - .toList(); - // Should not include any hidden files, folders, or nested files - assertThat(shallowListing).isEmpty(); - } - - protected void createDirectory(String bucketName, String key) - { - // create meta-data for your folder and set content-length to 0 - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentLength(0); - metadata.setContentType(DIRECTORY_MEDIA_TYPE.toString()); - // create a PutObjectRequest passing the folder name suffixed by / - if (!key.endsWith("/")) { - key += "/"; - } - PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, nullInputStream(), metadata); - // send request to S3 to create folder - s3Client.putObject(putObjectRequest); - } - - protected void createFile(String bucketName, String key) - { - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentLength(0); - PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, nullInputStream(), metadata); - s3Client.putObject(putObjectRequest); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAdl.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAdl.java index fad889d28fac5..b31c642964658 100644 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAdl.java +++ b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemAdl.java @@ -73,7 +73,7 @@ public void setup(String host, int port, String databaseName, String dataLakeNam this.refreshUrl = refreshUrl; this.testDirectory = testDirectory; - super.setup(host, port, databaseName, false, createHdfsConfiguration()); + super.setup(host, port, databaseName, createHdfsConfiguration()); } private HdfsConfiguration createHdfsConfiguration() diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemS3.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemS3.java index c522c25d6a104..21d9dda644066 100644 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemS3.java +++ b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemS3.java @@ -13,12 +13,65 @@ */ package io.trino.plugin.hive; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Streams; +import com.google.common.net.MediaType; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.hdfs.HdfsFileSystemFactory; +import io.trino.hdfs.ConfigurationInitializer; +import io.trino.hdfs.DynamicHdfsConfiguration; +import io.trino.hdfs.HdfsConfig; +import io.trino.hdfs.HdfsConfiguration; +import io.trino.hdfs.HdfsConfigurationInitializer; +import io.trino.hdfs.HdfsNamenodeStats; +import io.trino.hdfs.TrinoHdfsFileSystemStats; +import io.trino.hdfs.s3.HiveS3Config; +import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; +import io.trino.plugin.hive.fs.FileSystemDirectoryLister; +import io.trino.plugin.hive.fs.HiveFileIterator; +import io.trino.plugin.hive.fs.TrinoFileStatus; +import io.trino.plugin.hive.metastore.Column; +import io.trino.plugin.hive.metastore.StorageFormat; +import io.trino.plugin.hive.metastore.Table; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.testng.annotations.BeforeClass; import org.testng.annotations.Parameters; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static io.trino.plugin.hive.HiveTestUtils.SESSION; +import static io.trino.plugin.hive.HiveType.HIVE_LONG; +import static io.trino.plugin.hive.HiveType.HIVE_STRING; +import static java.io.InputStream.nullInputStream; +import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; +import static org.testng.Assert.assertFalse; +import static org.testng.util.Strings.isNullOrEmpty; public class TestHiveFileSystemS3 - extends AbstractTestHiveFileSystemS3 + extends AbstractTestHiveFileSystem { + private static final MediaType DIRECTORY_MEDIA_TYPE = MediaType.create("application", "x-directory"); + private String awsAccessKey; + private String awsSecretKey; + private String writableBucket; + private String testDirectory; + private AmazonS3 s3Client; + @Parameters({ "hive.hadoop2.metastoreHost", "hive.hadoop2.metastorePort", @@ -32,6 +85,180 @@ public class TestHiveFileSystemS3 @BeforeClass public void setup(String host, int port, String databaseName, String s3endpoint, String awsAccessKey, String awsSecretKey, String writableBucket, String testDirectory) { - super.setup(host, port, databaseName, s3endpoint, awsAccessKey, awsSecretKey, writableBucket, testDirectory, false); + checkArgument(!isNullOrEmpty(host), "Expected non empty host"); + checkArgument(!isNullOrEmpty(databaseName), "Expected non empty databaseName"); + checkArgument(!isNullOrEmpty(awsAccessKey), "Expected non empty awsAccessKey"); + checkArgument(!isNullOrEmpty(awsSecretKey), "Expected non empty awsSecretKey"); + checkArgument(!isNullOrEmpty(s3endpoint), "Expected non empty s3endpoint"); + checkArgument(!isNullOrEmpty(writableBucket), "Expected non empty writableBucket"); + checkArgument(!isNullOrEmpty(testDirectory), "Expected non empty testDirectory"); + this.awsAccessKey = awsAccessKey; + this.awsSecretKey = awsSecretKey; + this.writableBucket = writableBucket; + this.testDirectory = testDirectory; + + s3Client = AmazonS3Client.builder() + .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3endpoint, null)) + .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey))) + .build(); + + setup(host, port, databaseName, createHdfsConfiguration()); + } + + private HdfsConfiguration createHdfsConfiguration() + { + ConfigurationInitializer s3Config = new TrinoS3ConfigurationInitializer(new HiveS3Config() + .setS3AwsAccessKey(awsAccessKey) + .setS3AwsSecretKey(awsSecretKey)); + HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of(s3Config)); + return new DynamicHdfsConfiguration(initializer, ImmutableSet.of()); + } + + @Override + protected Path getBasePath() + { + // HDP 3.1 does not understand s3:// out of the box. + return new Path(format("s3a://%s/%s/", writableBucket, testDirectory)); + } + + @Test + public void testIgnoreHadoopFolderMarker() + throws Exception + { + Path basePath = getBasePath(); + FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); + + String markerFileName = "test_table_$folder$"; + Path filePath = new Path(basePath, markerFileName); + fs.create(filePath).close(); + + assertFalse(Arrays.stream(fs.listStatus(basePath)).anyMatch(file -> file.getPath().getName().equalsIgnoreCase(markerFileName))); + } + + /** + * Tests the same functionality like {@link #testFileIteratorPartitionedListing()} with the + * setup done by native {@link AmazonS3} + */ + @Test + public void testFileIteratorPartitionedListingNativeS3Client() + throws Exception + { + Table.Builder tableBuilder = Table.builder() + .setDatabaseName(table.getSchemaName()) + .setTableName(table.getTableName()) + .setDataColumns(ImmutableList.of(new Column("data", HIVE_LONG, Optional.empty()))) + .setPartitionColumns(ImmutableList.of(new Column("part", HIVE_STRING, Optional.empty()))) + .setOwner(Optional.empty()) + .setTableType("fake"); + tableBuilder.getStorageBuilder() + .setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.CSV)); + Table fakeTable = tableBuilder.build(); + + Path basePath = new Path(getBasePath(), "test-file-iterator-partitioned-listing-native-setup"); + FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); + TrinoFileSystem trinoFileSystem = new HdfsFileSystemFactory(hdfsEnvironment, new TrinoHdfsFileSystemStats()).create(SESSION); + fs.mkdirs(basePath); + String basePrefix = basePath.toUri().getPath().substring(1); + + // Expected file system tree: + // test-file-iterator-partitioned-listing-native-setup/ + // .hidden/ + // nested-file-in-hidden.txt + // part=simple/ + // _hidden-file.txt + // plain-file.txt + // part=nested/ + // parent/ + // _nested-hidden-file.txt + // nested-file.txt + // part=plus+sign/ + // plus-file.txt + // part=percent%sign/ + // percent-file.txt + // part=url%20encoded/ + // url-encoded-file.txt + // part=level1|level2/ + // pipe-file.txt + // parent1/ + // parent2/ + // deeply-nested-file.txt + // part=level1 | level2/ + // pipe-blanks-file.txt + // empty-directory/ + // .hidden-in-base.txt + + createFile(writableBucket, format("%s/.hidden/nested-file-in-hidden.txt", basePrefix)); + createFile(writableBucket, format("%s/part=simple/_hidden-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=simple/plain-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=nested/parent/_nested-hidden-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=nested/parent/nested-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=plus+sign/plus-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=percent%%sign/percent-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=url%%20encoded/url-encoded-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=level1|level2/pipe-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=level1|level2/parent1/parent2/deeply-nested-file.txt", basePrefix)); + createFile(writableBucket, format("%s/part=level1 | level2/pipe-blanks-file.txt", basePrefix)); + createDirectory(writableBucket, format("%s/empty-directory/", basePrefix)); + createFile(writableBucket, format("%s/.hidden-in-base.txt", basePrefix)); + + // List recursively through hive file iterator + HiveFileIterator recursiveIterator = new HiveFileIterator( + fakeTable, + Location.of(basePath.toString()), + trinoFileSystem, + new FileSystemDirectoryLister(), + new HdfsNamenodeStats(), + HiveFileIterator.NestedDirectoryPolicy.RECURSE); + + List recursiveListing = Streams.stream(recursiveIterator) + .map(TrinoFileStatus::getPath) + .toList(); + // Should not include directories, or files underneath hidden directories + assertThat(recursiveListing).containsExactlyInAnyOrder( + format("%s/part=simple/plain-file.txt", basePath), + format("%s/part=nested/parent/nested-file.txt", basePath), + format("%s/part=plus+sign/plus-file.txt", basePath), + format("%s/part=percent%%sign/percent-file.txt", basePath), + format("%s/part=url%%20encoded/url-encoded-file.txt", basePath), + format("%s/part=level1|level2/pipe-file.txt", basePath), + format("%s/part=level1|level2/parent1/parent2/deeply-nested-file.txt", basePath), + format("%s/part=level1 | level2/pipe-blanks-file.txt", basePath)); + + HiveFileIterator shallowIterator = new HiveFileIterator( + fakeTable, + Location.of(basePath.toString()), + trinoFileSystem, + new FileSystemDirectoryLister(), + new HdfsNamenodeStats(), + HiveFileIterator.NestedDirectoryPolicy.IGNORED); + List shallowListing = Streams.stream(shallowIterator) + .map(TrinoFileStatus::getPath) + .map(Path::new) + .toList(); + // Should not include any hidden files, folders, or nested files + assertThat(shallowListing).isEmpty(); + } + + protected void createDirectory(String bucketName, String key) + { + // create meta-data for your folder and set content-length to 0 + ObjectMetadata metadata = new ObjectMetadata(); + metadata.setContentLength(0); + metadata.setContentType(DIRECTORY_MEDIA_TYPE.toString()); + // create a PutObjectRequest passing the folder name suffixed by / + if (!key.endsWith("/")) { + key += "/"; + } + PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, nullInputStream(), metadata); + // send request to S3 to create folder + s3Client.putObject(putObjectRequest); + } + + protected void createFile(String bucketName, String key) + { + ObjectMetadata metadata = new ObjectMetadata(); + metadata.setContentLength(0); + PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, nullInputStream(), metadata); + s3Client.putObject(putObjectRequest); } } diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemWasb.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemWasb.java index 3ac98d86636a8..cab2e698f4c98 100644 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemWasb.java +++ b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveFileSystemWasb.java @@ -61,7 +61,7 @@ public void setup(String host, int port, String databaseName, String container, this.accessKey = accessKey; this.testDirectory = testDirectory; - super.setup(host, port, databaseName, false, createHdfsConfiguration()); + super.setup(host, port, databaseName, createHdfsConfiguration()); } private HdfsConfiguration createHdfsConfiguration() diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/S3SelectTestHelper.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/S3SelectTestHelper.java deleted file mode 100644 index e42dd3002ef12..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/S3SelectTestHelper.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.net.HostAndPort; -import io.airlift.concurrent.BoundedExecutor; -import io.airlift.json.JsonCodec; -import io.airlift.stats.CounterStat; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.hdfs.HdfsNamenodeStats; -import io.trino.hdfs.authentication.NoHdfsAuthentication; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; -import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.AbstractTestHiveFileSystem.TestingHiveMetastore; -import io.trino.plugin.hive.DefaultHiveMaterializedViewMetadataFactory; -import io.trino.plugin.hive.HiveConfig; -import io.trino.plugin.hive.HiveLocationService; -import io.trino.plugin.hive.HiveMetadataFactory; -import io.trino.plugin.hive.HivePageSourceProvider; -import io.trino.plugin.hive.HivePartitionManager; -import io.trino.plugin.hive.HiveSplitManager; -import io.trino.plugin.hive.HiveTransactionManager; -import io.trino.plugin.hive.LocationService; -import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.NoneHiveRedirectionsProvider; -import io.trino.plugin.hive.PartitionUpdate; -import io.trino.plugin.hive.PartitionsSystemTableProvider; -import io.trino.plugin.hive.PropertiesSystemTableProvider; -import io.trino.plugin.hive.aws.athena.PartitionProjectionService; -import io.trino.plugin.hive.fs.FileSystemDirectoryLister; -import io.trino.plugin.hive.fs.TransactionScopeCachingDirectoryListerFactory; -import io.trino.plugin.hive.metastore.HiveMetastoreConfig; -import io.trino.plugin.hive.metastore.HiveMetastoreFactory; -import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; -import io.trino.plugin.hive.security.SqlStandardAccessControlMetadata; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.ConnectorPageSourceProvider; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorSplitManager; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.type.TestingTypeManager; -import io.trino.testing.MaterializedResult; -import org.apache.hadoop.fs.Path; - -import java.io.IOException; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledExecutorService; -import java.util.stream.LongStream; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; -import static io.airlift.concurrent.Threads.daemonThreadsNamed; -import static io.trino.plugin.hive.HiveFileSystemTestUtils.filterTable; -import static io.trino.plugin.hive.HiveFileSystemTestUtils.getSplitsCount; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories; -import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; -import static io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static java.lang.String.format; -import static java.util.concurrent.Executors.newCachedThreadPool; -import static java.util.concurrent.Executors.newScheduledThreadPool; -import static org.testng.util.Strings.isNullOrEmpty; - -public class S3SelectTestHelper -{ - private HdfsEnvironment hdfsEnvironment; - private LocationService locationService; - private TestingHiveMetastore metastoreClient; - private HiveMetadataFactory metadataFactory; - private HiveTransactionManager transactionManager; - private ConnectorSplitManager splitManager; - private ConnectorPageSourceProvider pageSourceProvider; - - private ExecutorService executorService; - private HiveConfig hiveConfig; - private ScheduledExecutorService heartbeatService; - - public S3SelectTestHelper(String host, - int port, - String databaseName, - String awsAccessKey, - String awsSecretKey, - String writableBucket, - String testDirectory, - HiveConfig hiveConfig) - { - checkArgument(!isNullOrEmpty(host), "Expected non empty host"); - checkArgument(!isNullOrEmpty(databaseName), "Expected non empty databaseName"); - checkArgument(!isNullOrEmpty(awsAccessKey), "Expected non empty awsAccessKey"); - checkArgument(!isNullOrEmpty(awsSecretKey), "Expected non empty awsSecretKey"); - checkArgument(!isNullOrEmpty(writableBucket), "Expected non empty writableBucket"); - checkArgument(!isNullOrEmpty(testDirectory), "Expected non empty testDirectory"); - - executorService = newCachedThreadPool(daemonThreadsNamed("s3select-tests-%s")); - heartbeatService = newScheduledThreadPool(1); - - ConfigurationInitializer s3Config = new TrinoS3ConfigurationInitializer(new HiveS3Config() - .setS3AwsAccessKey(awsAccessKey) - .setS3AwsSecretKey(awsSecretKey)); - HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of(s3Config)); - HdfsConfiguration hdfsConfiguration = new DynamicHdfsConfiguration(initializer, ImmutableSet.of()); - - this.hiveConfig = hiveConfig; - HivePartitionManager hivePartitionManager = new HivePartitionManager(this.hiveConfig); - - hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, new HdfsConfig(), new NoHdfsAuthentication()); - locationService = new HiveLocationService(hdfsEnvironment, hiveConfig); - JsonCodec partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); - - metastoreClient = new TestingHiveMetastore( - new BridgingHiveMetastore( - testingThriftHiveMetastoreBuilder() - .metastoreClient(HostAndPort.fromParts(host, port)) - .hiveConfig(this.hiveConfig) - .hdfsEnvironment(hdfsEnvironment) - .build()), - new Path(format("s3a://%s/%s/", writableBucket, testDirectory)), - hdfsEnvironment); - metadataFactory = new HiveMetadataFactory( - new CatalogName("hive"), - this.hiveConfig, - new HiveMetastoreConfig(), - HiveMetastoreFactory.ofInstance(metastoreClient), - getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), - new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS), - hdfsEnvironment, - hivePartitionManager, - newDirectExecutorService(), - heartbeatService, - TESTING_TYPE_MANAGER, - NOOP_METADATA_PROVIDER, - locationService, - partitionUpdateCodec, - new NodeVersion("test_version"), - new NoneHiveRedirectionsProvider(), - ImmutableSet.of( - new PartitionsSystemTableProvider(hivePartitionManager, TESTING_TYPE_MANAGER), - new PropertiesSystemTableProvider()), - new DefaultHiveMaterializedViewMetadataFactory(), - SqlStandardAccessControlMetadata::new, - new FileSystemDirectoryLister(), - new TransactionScopeCachingDirectoryListerFactory(hiveConfig), - new PartitionProjectionService(this.hiveConfig, ImmutableMap.of(), new TestingTypeManager()), - true); - transactionManager = new HiveTransactionManager(metadataFactory); - - splitManager = new HiveSplitManager( - transactionManager, - hivePartitionManager, - new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS), - new HdfsNamenodeStats(), - new BoundedExecutor(executorService, this.hiveConfig.getMaxSplitIteratorThreads()), - new CounterStat(), - this.hiveConfig.getMaxOutstandingSplits(), - this.hiveConfig.getMaxOutstandingSplitsSize(), - this.hiveConfig.getMinPartitionBatchSize(), - this.hiveConfig.getMaxPartitionBatchSize(), - this.hiveConfig.getMaxInitialSplits(), - this.hiveConfig.getSplitLoaderConcurrency(), - this.hiveConfig.getMaxSplitsPerSecond(), - this.hiveConfig.getRecursiveDirWalkerEnabled(), - TESTING_TYPE_MANAGER, - this.hiveConfig.getMaxPartitionsPerScan()); - - pageSourceProvider = new HivePageSourceProvider( - TESTING_TYPE_MANAGER, - this.hiveConfig, - getDefaultHivePageSourceFactories(hdfsEnvironment, this.hiveConfig)); - } - - public S3SelectTestHelper(String host, - int port, - String databaseName, - String awsAccessKey, - String awsSecretKey, - String writableBucket, - String testDirectory) - { - this(host, port, databaseName, awsAccessKey, awsSecretKey, writableBucket, testDirectory, new HiveConfig().setS3SelectPushdownEnabled(true)); - } - - public HiveTransactionManager getTransactionManager() - { - return transactionManager; - } - - public ConnectorSplitManager getSplitManager() - { - return splitManager; - } - - public ConnectorPageSourceProvider getPageSourceProvider() - { - return pageSourceProvider; - } - - public HiveConfig getHiveConfig() - { - return hiveConfig; - } - - public void tearDown() - { - hdfsEnvironment = null; - locationService = null; - metastoreClient = null; - metadataFactory = null; - transactionManager = null; - splitManager = null; - pageSourceProvider = null; - hiveConfig = null; - if (executorService != null) { - executorService.shutdownNow(); - executorService = null; - } - if (heartbeatService != null) { - heartbeatService.shutdownNow(); - heartbeatService = null; - } - } - - int getTableSplitsCount(SchemaTableName table) - { - return getSplitsCount( - table, - getTransactionManager(), - getHiveConfig(), - getSplitManager()); - } - - MaterializedResult getFilteredTableResult(SchemaTableName table, ColumnHandle column) - { - try { - return filterTable( - table, - List.of(column), - getTransactionManager(), - getHiveConfig(), - getPageSourceProvider(), - getSplitManager()); - } - catch (IOException ignored) { - } - - return null; - } - - static MaterializedResult expectedResult(ConnectorSession session, int start, int end) - { - MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, BIGINT); - LongStream.rangeClosed(start, end).forEach(builder::row); - return builder.build(); - } - - static boolean isSplitCountInOpenInterval(int splitCount, - int lowerBound, - int upperBound) - { - // Split number may vary, the minimum number of splits being obtained with - // the first split of maxInitialSplitSize and the rest of maxSplitSize - return lowerBound < splitCount && splitCount < upperBound; - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectCsvPushdownWithSplits.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectCsvPushdownWithSplits.java deleted file mode 100644 index 2edc5bd71f0bd..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectCsvPushdownWithSplits.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import io.airlift.units.DataSize; -import io.trino.plugin.hive.HiveConfig; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.SchemaTableName; -import io.trino.testing.MaterializedResult; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Parameters; -import org.testng.annotations.Test; - -import java.util.Optional; - -import static io.airlift.units.DataSize.Unit.KILOBYTE; -import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; -import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveFileSystemTestUtils.newSession; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.s3select.S3SelectTestHelper.expectedResult; -import static io.trino.plugin.hive.s3select.S3SelectTestHelper.isSplitCountInOpenInterval; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static org.testng.Assert.assertTrue; - -public class TestHiveFileSystemS3SelectCsvPushdownWithSplits -{ - private String host; - private int port; - private String databaseName; - private String awsAccessKey; - private String awsSecretKey; - private String writableBucket; - private String testDirectory; - - private SchemaTableName tableCsvWithSplits; - - @Parameters({ - "hive.hadoop2.metastoreHost", - "hive.hadoop2.metastorePort", - "hive.hadoop2.databaseName", - "hive.hadoop2.s3.awsAccessKey", - "hive.hadoop2.s3.awsSecretKey", - "hive.hadoop2.s3.writableBucket", - "hive.hadoop2.s3.testDirectory", - }) - @BeforeClass - public void setup(String host, int port, String databaseName, String awsAccessKey, String awsSecretKey, String writableBucket, String testDirectory) - { - this.host = host; - this.port = port; - this.databaseName = databaseName; - this.awsAccessKey = awsAccessKey; - this.awsSecretKey = awsSecretKey; - this.writableBucket = writableBucket; - this.testDirectory = testDirectory; - - tableCsvWithSplits = new SchemaTableName(databaseName, "trino_s3select_test_csv_scan_range_pushdown"); - } - - @DataProvider(name = "testSplitSize") - public static Object[][] splitSizeParametersProvider() - { - return new Object[][] {{3, 2, 15, 30}, {50, 30, 2, 4}}; - } - - @Test(dataProvider = "testSplitSize") - public void testQueryPushdownWithSplitSizeForCsv(int maxSplitSizeKB, - int maxInitialSplitSizeKB, - int minSplitCount, - int maxSplitCount) - { - S3SelectTestHelper s3SelectTestHelper = null; - try { - HiveConfig hiveConfig = new HiveConfig() - .setS3SelectPushdownEnabled(true) - .setMaxSplitSize(DataSize.of(maxSplitSizeKB, KILOBYTE)) - .setMaxInitialSplitSize(DataSize.of(maxInitialSplitSizeKB, KILOBYTE)); - s3SelectTestHelper = new S3SelectTestHelper( - host, - port, - databaseName, - awsAccessKey, - awsSecretKey, - writableBucket, - testDirectory, - hiveConfig); - - int tableSplitsCount = s3SelectTestHelper.getTableSplitsCount(tableCsvWithSplits); - assertTrue(isSplitCountInOpenInterval(tableSplitsCount, minSplitCount, maxSplitCount)); - - ColumnHandle indexColumn = createBaseColumn("index", 0, HIVE_INT, BIGINT, REGULAR, Optional.empty()); - MaterializedResult filteredTableResult = s3SelectTestHelper.getFilteredTableResult(tableCsvWithSplits, indexColumn); - assertEqualsIgnoreOrder(filteredTableResult, - expectedResult(newSession(s3SelectTestHelper.getHiveConfig()), 1, 300)); - } - finally { - if (s3SelectTestHelper != null) { - s3SelectTestHelper.tearDown(); - } - } - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdown.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdown.java deleted file mode 100644 index 260d03608d2c6..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdown.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.google.common.collect.ImmutableList; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.SchemaTableName; -import io.trino.testing.MaterializedResult; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Parameters; -import org.testng.annotations.Test; - -import java.util.List; -import java.util.Optional; - -import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; -import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveFileSystemTestUtils.filterTable; -import static io.trino.plugin.hive.HiveFileSystemTestUtils.newSession; -import static io.trino.plugin.hive.HiveFileSystemTestUtils.readTable; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; - -public class TestHiveFileSystemS3SelectJsonPushdown -{ - private SchemaTableName tableJson; - - private S3SelectTestHelper s3SelectTestHelper; - - @Parameters({ - "hive.hadoop2.metastoreHost", - "hive.hadoop2.metastorePort", - "hive.hadoop2.databaseName", - "hive.hadoop2.s3.awsAccessKey", - "hive.hadoop2.s3.awsSecretKey", - "hive.hadoop2.s3.writableBucket", - "hive.hadoop2.s3.testDirectory", - }) - @BeforeClass - public void setup(String host, int port, String databaseName, String awsAccessKey, String awsSecretKey, String writableBucket, String testDirectory) - { - s3SelectTestHelper = new S3SelectTestHelper(host, port, databaseName, awsAccessKey, awsSecretKey, writableBucket, testDirectory); - tableJson = new SchemaTableName(databaseName, "trino_s3select_test_external_fs_json"); - } - - @Test - public void testGetRecordsJson() - throws Exception - { - assertEqualsIgnoreOrder( - readTable(tableJson, - s3SelectTestHelper.getTransactionManager(), - s3SelectTestHelper.getHiveConfig(), - s3SelectTestHelper.getPageSourceProvider(), - s3SelectTestHelper.getSplitManager()), - MaterializedResult.resultBuilder(newSession(s3SelectTestHelper.getHiveConfig()), BIGINT, BIGINT) - .row(2L, 4L).row(5L, 6L) // test_table.json - .row(7L, 23L).row(28L, 22L).row(13L, 10L) // test_table.json.gz - .row(1L, 19L).row(6L, 3L).row(24L, 22L).row(100L, 77L) // test_table.json.bz2 - .build()); - } - - @Test - public void testFilterRecordsJson() - throws Exception - { - List projectedColumns = ImmutableList.of( - createBaseColumn("col_1", 0, HIVE_INT, BIGINT, REGULAR, Optional.empty())); - - assertEqualsIgnoreOrder( - filterTable(tableJson, - projectedColumns, - s3SelectTestHelper.getTransactionManager(), - s3SelectTestHelper.getHiveConfig(), - s3SelectTestHelper.getPageSourceProvider(), - s3SelectTestHelper.getSplitManager()), - MaterializedResult.resultBuilder(newSession(s3SelectTestHelper.getHiveConfig()), BIGINT) - .row(2L).row(5L) // test_table.json - .row(7L).row(28L).row(13L) // test_table.json.gz - .row(1L).row(6L).row(24L).row(100L) // test_table.json.bz2 - .build()); - } - - @AfterClass(alwaysRun = true) - public void tearDown() - { - s3SelectTestHelper.tearDown(); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdownWithSplits.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdownWithSplits.java deleted file mode 100644 index 1998ec9368daf..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectJsonPushdownWithSplits.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import io.airlift.units.DataSize; -import io.trino.plugin.hive.HiveConfig; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.SchemaTableName; -import io.trino.testing.MaterializedResult; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Parameters; -import org.testng.annotations.Test; - -import java.util.Optional; - -import static io.airlift.units.DataSize.Unit.KILOBYTE; -import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; -import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveFileSystemTestUtils.newSession; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.s3select.S3SelectTestHelper.expectedResult; -import static io.trino.plugin.hive.s3select.S3SelectTestHelper.isSplitCountInOpenInterval; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static org.testng.Assert.assertTrue; - -public class TestHiveFileSystemS3SelectJsonPushdownWithSplits -{ - private String host; - private int port; - private String databaseName; - private String awsAccessKey; - private String awsSecretKey; - private String writableBucket; - private String testDirectory; - - private SchemaTableName tableJsonWithSplits; - - @Parameters({ - "hive.hadoop2.metastoreHost", - "hive.hadoop2.metastorePort", - "hive.hadoop2.databaseName", - "hive.hadoop2.s3.awsAccessKey", - "hive.hadoop2.s3.awsSecretKey", - "hive.hadoop2.s3.writableBucket", - "hive.hadoop2.s3.testDirectory", - }) - @BeforeClass - public void setup(String host, int port, String databaseName, String awsAccessKey, String awsSecretKey, String writableBucket, String testDirectory) - { - this.host = host; - this.port = port; - this.databaseName = databaseName; - this.awsAccessKey = awsAccessKey; - this.awsSecretKey = awsSecretKey; - this.writableBucket = writableBucket; - this.testDirectory = testDirectory; - - this.tableJsonWithSplits = new SchemaTableName(databaseName, "trino_s3select_test_json_scan_range_pushdown"); - } - - @DataProvider(name = "testSplitSize") - public static Object[][] splitSizeParametersProvider() - { - return new Object[][] {{15, 10, 6, 12}, {50, 30, 2, 4}}; - } - - @Test(dataProvider = "testSplitSize") - public void testQueryPushdownWithSplitSizeForJson(int maxSplitSizeKB, - int maxInitialSplitSizeKB, - int minSplitCount, - int maxSplitCount) - { - S3SelectTestHelper s3SelectTestHelper = null; - try { - HiveConfig hiveConfig = new HiveConfig() - .setS3SelectPushdownEnabled(true) - .setMaxSplitSize(DataSize.of(maxSplitSizeKB, KILOBYTE)) - .setMaxInitialSplitSize(DataSize.of(maxInitialSplitSizeKB, KILOBYTE)); - s3SelectTestHelper = new S3SelectTestHelper( - host, - port, - databaseName, - awsAccessKey, - awsSecretKey, - writableBucket, - testDirectory, - hiveConfig); - - int tableSplitsCount = s3SelectTestHelper.getTableSplitsCount(tableJsonWithSplits); - assertTrue(isSplitCountInOpenInterval(tableSplitsCount, minSplitCount, maxSplitCount)); - - ColumnHandle indexColumn = createBaseColumn("col_1", 0, HIVE_INT, BIGINT, REGULAR, Optional.empty()); - MaterializedResult filteredTableResult = s3SelectTestHelper.getFilteredTableResult(tableJsonWithSplits, indexColumn); - assertEqualsIgnoreOrder(filteredTableResult, - expectedResult(newSession(s3SelectTestHelper.getHiveConfig()), 1, 300)); - } - finally { - if (s3SelectTestHelper != null) { - s3SelectTestHelper.tearDown(); - } - } - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectPushdown.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectPushdown.java deleted file mode 100644 index eef3f86a1ad53..0000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/s3select/TestHiveFileSystemS3SelectPushdown.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.google.common.collect.ImmutableList; -import io.trino.plugin.hive.AbstractTestHiveFileSystemS3; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.SchemaTableName; -import io.trino.testing.MaterializedResult; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Parameters; -import org.testng.annotations.Test; - -import java.util.List; -import java.util.Optional; - -import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; -import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; - -public class TestHiveFileSystemS3SelectPushdown - extends AbstractTestHiveFileSystemS3 -{ - protected SchemaTableName tableWithPipeDelimiter; - protected SchemaTableName tableWithCommaDelimiter; - - @Parameters({ - "hive.hadoop2.metastoreHost", - "hive.hadoop2.metastorePort", - "hive.hadoop2.databaseName", - "hive.hadoop2.s3.endpoint", - "hive.hadoop2.s3.awsAccessKey", - "hive.hadoop2.s3.awsSecretKey", - "hive.hadoop2.s3.writableBucket", - "hive.hadoop2.s3.testDirectory", - }) - @BeforeClass - public void setup(String host, int port, String databaseName, String s3endpoint, String awsAccessKey, String awsSecretKey, String writableBucket, String testDirectory) - { - super.setup(host, port, databaseName, s3endpoint, awsAccessKey, awsSecretKey, writableBucket, testDirectory, true); - tableWithPipeDelimiter = new SchemaTableName(database, "trino_s3select_test_external_fs_with_pipe_delimiter"); - tableWithCommaDelimiter = new SchemaTableName(database, "trino_s3select_test_external_fs_with_comma_delimiter"); - } - - @Test - public void testGetRecordsWithPipeDelimiter() - throws Exception - { - assertEqualsIgnoreOrder( - readTable(tableWithPipeDelimiter), - MaterializedResult.resultBuilder(newSession(), BIGINT, BIGINT) - .row(1L, 2L).row(3L, 4L).row(55L, 66L) // test_table_with_pipe_delimiter.csv - .row(27L, 10L).row(8L, 2L).row(456L, 789L) // test_table_with_pipe_delimiter.csv.gzip - .row(22L, 11L).row(78L, 76L).row(1L, 2L).row(36L, 90L) // test_table_with_pipe_delimiter.csv.bz2 - .build()); - } - - @Test - public void testFilterRecordsWithPipeDelimiter() - throws Exception - { - List projectedColumns = ImmutableList.of( - createBaseColumn("t_bigint", 0, HIVE_INT, BIGINT, REGULAR, Optional.empty())); - - assertEqualsIgnoreOrder( - filterTable(tableWithPipeDelimiter, projectedColumns), - MaterializedResult.resultBuilder(newSession(), BIGINT) - .row(1L).row(3L).row(55L) // test_table_with_pipe_delimiter.csv - .row(27L).row(8L).row(456L) // test_table_with_pipe_delimiter.csv.gzip - .row(22L).row(78L).row(1L).row(36L) // test_table_with_pipe_delimiter.csv.bz2 - .build()); - } - - @Test - public void testGetRecordsWithCommaDelimiter() - throws Exception - { - assertEqualsIgnoreOrder( - readTable(tableWithCommaDelimiter), - MaterializedResult.resultBuilder(newSession(), BIGINT, BIGINT) - .row(7L, 1L).row(19L, 10L).row(1L, 345L) // test_table_with_comma_delimiter.csv - .row(27L, 10L).row(28L, 9L).row(90L, 94L) // test_table_with_comma_delimiter.csv.gzip - .row(11L, 24L).row(1L, 6L).row(21L, 12L).row(0L, 0L) // test_table_with_comma_delimiter.csv.bz2 - .build()); - } - - @Test - public void testFilterRecordsWithCommaDelimiter() - throws Exception - { - List projectedColumns = ImmutableList.of( - createBaseColumn("t_bigint", 0, HIVE_INT, BIGINT, REGULAR, Optional.empty())); - - assertEqualsIgnoreOrder( - filterTable(tableWithCommaDelimiter, projectedColumns), - MaterializedResult.resultBuilder(newSession(), BIGINT) - .row(7L).row(19L).row(1L) // test_table_with_comma_delimiter.csv - .row(27L).row(28L).row(90L) // test_table_with_comma_delimiter.csv.gzip - .row(11L).row(1L).row(21L).row(0L) // test_table_with_comma_delimiter.csv.bz2 - .build()); - } -} diff --git a/plugin/trino-hive/pom.xml b/plugin/trino-hive/pom.xml index 277ac5acf64d2..644c749499e3b 100644 --- a/plugin/trino-hive/pom.xml +++ b/plugin/trino-hive/pom.xml @@ -36,11 +36,6 @@ aws-java-sdk-glue - - com.amazonaws - aws-java-sdk-s3 - - com.amazonaws aws-java-sdk-sts @@ -286,6 +281,12 @@ provided + + com.amazonaws + aws-java-sdk-s3 + runtime + + io.airlift log-manager @@ -532,7 +533,6 @@ **/TestHiveGlueMetastore.java **/TestHiveS3AndGlueMetastoreTest.java **/TestTrinoS3FileSystemAwsS3.java - **/TestS3SelectQueries.java **/TestFullParquetReader.java **/TestParquetReader.java **/Test*FailureRecoveryTest.java @@ -590,7 +590,6 @@ **/TestHiveGlueMetastore.java **/TestHiveS3AndGlueMetastoreTest.java **/TestTrinoS3FileSystemAwsS3.java - **/TestS3SelectQueries.java diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java index 2c8073d677798..f15040a8eb55c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java @@ -39,7 +39,6 @@ import io.trino.plugin.hive.metastore.Partition; import io.trino.plugin.hive.metastore.StorageFormat; import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.s3select.S3SelectPushdown; import io.trino.plugin.hive.util.AcidTables.AcidState; import io.trino.plugin.hive.util.AcidTables.ParsedDelta; import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion; @@ -408,14 +407,8 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) Location location = Location.of(getPartitionLocation(table, partition.getPartition())); - boolean s3SelectPushdownEnabled = S3SelectPushdown.shouldEnablePushdownForTable(session, table, location.toString(), partition.getPartition()); - - // S3 Select pushdown works at the granularity of individual S3 objects for compressed files - // and finer granularity for uncompressed files using scan range feature. - boolean shouldEnableSplits = S3SelectPushdown.isSplittable(s3SelectPushdownEnabled, schema, location.toString()); - // Skip header / footer lines are not splittable except for a special case when skip.header.line.count=1 - boolean splittable = shouldEnableSplits && getFooterCount(schema) == 0 && getHeaderCount(schema) <= 1; + boolean splittable = getFooterCount(schema) == 0 && getHeaderCount(schema) <= 1; if (SYMLINK_TEXT_INPUT_FORMAT_CLASS.equals(getInputFormatName(schema).orElse(null))) { if (tableBucketInfo.isPresent()) { @@ -436,7 +429,6 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) Optional.empty(), getMaxInitialSplitSize(session), isForceLocalScheduling(session), - s3SelectPushdownEnabled, maxSplitFileSize); for (Entry> entry : Multimaps.asMap(targets).entrySet()) { @@ -488,7 +480,6 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) bucketValidation, getMaxInitialSplitSize(session), isForceLocalScheduling(session), - s3SelectPushdownEnabled, maxSplitFileSize); if (isTransactionalTable(table.getParameters())) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java index 6c0dd6ea13207..02dbd79c67535 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java @@ -59,6 +59,9 @@ "hive.partition-use-column-names", "hive.allow-corrupt-writes-for-testing", "hive.optimize-symlink-listing", + "hive.s3select-pushdown.enabled", + "hive.s3select-pushdown.experimental-textfile-pushdown-enabled", + "hive.s3select-pushdown.max-connections", }) public class HiveConfig { @@ -134,10 +137,6 @@ public class HiveConfig private boolean ignoreCorruptedStatistics; private boolean collectColumnStatisticsOnWrite = true; - private boolean s3SelectPushdownEnabled; - private boolean s3SelectExperimentalPushdownEnabled; - private int s3SelectPushdownMaxConnections = 500; - private boolean isTemporaryStagingDirectoryEnabled = true; private String temporaryStagingDirectoryPath = "/tmp/presto-${USER}"; private boolean delegateTransactionalManagedTableLocationToMetastore; @@ -1000,45 +999,6 @@ public HiveConfig setCollectColumnStatisticsOnWrite(boolean collectColumnStatist return this; } - public boolean isS3SelectPushdownEnabled() - { - return s3SelectPushdownEnabled; - } - - @Config("hive.s3select-pushdown.enabled") - @ConfigDescription("Enable query pushdown to JSON files using the AWS S3 Select service") - public HiveConfig setS3SelectPushdownEnabled(boolean s3SelectPushdownEnabled) - { - this.s3SelectPushdownEnabled = s3SelectPushdownEnabled; - return this; - } - - public boolean isS3SelectExperimentalPushdownEnabled() - { - return s3SelectExperimentalPushdownEnabled; - } - - @Config("hive.s3select-pushdown.experimental-textfile-pushdown-enabled") - @ConfigDescription("Enable query pushdown to TEXTFILE tables using the AWS S3 Select service") - public HiveConfig setS3SelectExperimentalPushdownEnabled(boolean s3SelectExperimentalPushdownEnabled) - { - this.s3SelectExperimentalPushdownEnabled = s3SelectExperimentalPushdownEnabled; - return this; - } - - @Min(1) - public int getS3SelectPushdownMaxConnections() - { - return s3SelectPushdownMaxConnections; - } - - @Config("hive.s3select-pushdown.max-connections") - public HiveConfig setS3SelectPushdownMaxConnections(int s3SelectPushdownMaxConnections) - { - this.s3SelectPushdownMaxConnections = s3SelectPushdownMaxConnections; - return this; - } - @Config("hive.temporary-staging-directory-enabled") @ConfigDescription("Should use (if possible) temporary staging directory for write operations") public HiveConfig setTemporaryStagingDirectoryEnabled(boolean temporaryStagingDirectoryEnabled) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveModule.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveModule.java index 62bc987788b22..c081507cc2892 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveModule.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveModule.java @@ -52,7 +52,6 @@ import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.plugin.hive.rcfile.RcFilePageSourceFactory; -import io.trino.plugin.hive.s3select.TrinoS3ClientFactory; import io.trino.spi.connector.ConnectorNodePartitioningProvider; import io.trino.spi.connector.ConnectorPageSinkProvider; import io.trino.spi.connector.ConnectorPageSourceProvider; @@ -87,8 +86,6 @@ public void configure(Binder binder) newOptionalBinder(binder, HiveMaterializedViewPropertiesProvider.class) .setDefault().toInstance(ImmutableList::of); - binder.bind(TrinoS3ClientFactory.class).in(Scopes.SINGLETON); - binder.bind(CachingDirectoryLister.class).in(Scopes.SINGLETON); newExporter(binder).export(CachingDirectoryLister.class).withGeneratedName(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 399151c851607..709cd854d9716 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -147,7 +147,6 @@ public ConnectorPageSource createPageSource( typeManager, hiveSplit.getBucketConversion(), hiveSplit.getBucketValidation(), - hiveSplit.isS3SelectPushdownEnabled(), hiveSplit.getAcidInfo(), originalFile, hiveTable.getTransaction(), @@ -177,7 +176,6 @@ public static Optional createHivePageSource( TypeManager typeManager, Optional bucketConversion, Optional bucketValidation, - boolean s3SelectPushdownEnabled, Optional acidInfo, boolean originalFile, AcidTransaction transaction, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java index 39c4bcdf5c780..907388434b94b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java @@ -106,7 +106,6 @@ public final class HiveSessionProperties private static final String IGNORE_CORRUPTED_STATISTICS = "ignore_corrupted_statistics"; private static final String COLLECT_COLUMN_STATISTICS_ON_WRITE = "collect_column_statistics_on_write"; private static final String OPTIMIZE_MISMATCHED_BUCKET_COUNT = "optimize_mismatched_bucket_count"; - private static final String S3_SELECT_PUSHDOWN_ENABLED = "s3_select_pushdown_enabled"; private static final String DELEGATE_TRANSACTIONAL_MANAGED_TABLE_LOCATION_TO_METASTORE = "delegate_transactional_managed_table_location_to_metastore"; private static final String IGNORE_ABSENT_PARTITIONS = "ignore_absent_partitions"; private static final String QUERY_PARTITION_FILTER_REQUIRED = "query_partition_filter_required"; @@ -431,11 +430,6 @@ public HiveSessionProperties( "Experimental: Enable optimization to avoid shuffle when bucket count is compatible but not the same", hiveConfig.isOptimizeMismatchedBucketCount(), false), - booleanProperty( - S3_SELECT_PUSHDOWN_ENABLED, - "S3 Select pushdown enabled", - hiveConfig.isS3SelectPushdownEnabled(), - false), booleanProperty( DELEGATE_TRANSACTIONAL_MANAGED_TABLE_LOCATION_TO_METASTORE, "When transactional managed table is created via Trino the location will not be set in request sent to HMS and location will be determined by metastore; if this property is set to true CREATE TABLE AS queries are not supported.", @@ -757,11 +751,6 @@ public static boolean isPropagateTableScanSortingProperties(ConnectorSession ses return session.getProperty(PROPAGATE_TABLE_SCAN_SORTING_PROPERTIES, Boolean.class); } - public static boolean isS3SelectPushdownEnabled(ConnectorSession session) - { - return session.getProperty(S3_SELECT_PUSHDOWN_ENABLED, Boolean.class); - } - public static boolean isStatisticsEnabled(ConnectorSession session) { return session.getProperty(STATISTICS_ENABLED, Boolean.class); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java index 1167039edf67a..62df401b71aa6 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java @@ -55,7 +55,6 @@ public class HiveSplit private final TableToPartitionMapping tableToPartitionMapping; private final Optional bucketConversion; private final Optional bucketValidation; - private final boolean s3SelectPushdownEnabled; private final Optional acidInfo; private final SplitWeight splitWeight; @@ -76,7 +75,6 @@ public HiveSplit( @JsonProperty("tableToPartitionMapping") TableToPartitionMapping tableToPartitionMapping, @JsonProperty("bucketConversion") Optional bucketConversion, @JsonProperty("bucketValidation") Optional bucketValidation, - @JsonProperty("s3SelectPushdownEnabled") boolean s3SelectPushdownEnabled, @JsonProperty("acidInfo") Optional acidInfo, @JsonProperty("splitWeight") SplitWeight splitWeight) { @@ -110,7 +108,6 @@ public HiveSplit( this.tableToPartitionMapping = tableToPartitionMapping; this.bucketConversion = bucketConversion; this.bucketValidation = bucketValidation; - this.s3SelectPushdownEnabled = s3SelectPushdownEnabled; this.acidInfo = acidInfo; this.splitWeight = requireNonNull(splitWeight, "splitWeight is null"); } @@ -212,12 +209,6 @@ public boolean isRemotelyAccessible() return !forceLocalScheduling; } - @JsonProperty - public boolean isS3SelectPushdownEnabled() - { - return s3SelectPushdownEnabled; - } - @JsonProperty public Optional getAcidInfo() { @@ -261,7 +252,6 @@ public Object getInfo() .put("forceLocalScheduling", forceLocalScheduling) .put("partitionName", partitionName) .put("deserializerClassName", getDeserializerClassName(schema)) - .put("s3SelectPushdownEnabled", s3SelectPushdownEnabled) .buildOrThrow(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java index 1c8affff2a004..63f36aec02698 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplitSource.java @@ -312,7 +312,6 @@ else if (maxSplitBytes * 2 >= remainingBlockBytes) { internalSplit.getTableToPartitionMapping(), internalSplit.getBucketConversion(), internalSplit.getBucketValidation(), - internalSplit.isS3SelectPushdownEnabled(), internalSplit.getAcidInfo(), splitWeightProvider.weightForSplitSizeInBytes(splitBytes))); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java index 40c7e7a52b6e8..ae6c28e0c63bd 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java @@ -54,7 +54,6 @@ public class InternalHiveSplit private final TableToPartitionMapping tableToPartitionMapping; private final Optional bucketConversion; private final Optional bucketValidation; - private final boolean s3SelectPushdownEnabled; private final Optional acidInfo; private final BooleanSupplier partitionMatchSupplier; @@ -78,7 +77,6 @@ public InternalHiveSplit( TableToPartitionMapping tableToPartitionMapping, Optional bucketConversion, Optional bucketValidation, - boolean s3SelectPushdownEnabled, Optional acidInfo, BooleanSupplier partitionMatchSupplier) { @@ -114,7 +112,6 @@ public InternalHiveSplit( this.tableToPartitionMapping = tableToPartitionMapping; this.bucketConversion = bucketConversion; this.bucketValidation = bucketValidation; - this.s3SelectPushdownEnabled = s3SelectPushdownEnabled; this.acidInfo = acidInfo; this.partitionMatchSupplier = partitionMatchSupplier; } @@ -144,11 +141,6 @@ public long getFileModifiedTime() return fileModifiedTime; } - public boolean isS3SelectPushdownEnabled() - { - return s3SelectPushdownEnabled; - } - public Properties getSchema() { return schema; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/IonSqlQueryBuilder.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/IonSqlQueryBuilder.java deleted file mode 100644 index 53a1ca72288b7..0000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/IonSqlQueryBuilder.java +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; -import com.google.common.primitives.Shorts; -import com.google.common.primitives.SignedBytes; -import io.airlift.slice.Slice; -import io.trino.plugin.hive.HiveColumnHandle; -import io.trino.spi.predicate.Domain; -import io.trino.spi.predicate.Range; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.type.Type; -import io.trino.spi.type.TypeManager; -import io.trino.spi.type.VarcharType; -import org.joda.time.format.DateTimeFormatter; - -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.collect.Iterables.getOnlyElement; -import static io.trino.plugin.hive.s3select.S3SelectDataType.CSV; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.BooleanType.BOOLEAN; -import static io.trino.spi.type.DateType.DATE; -import static io.trino.spi.type.IntegerType.INTEGER; -import static io.trino.spi.type.SmallintType.SMALLINT; -import static io.trino.spi.type.TinyintType.TINYINT; -import static java.lang.Math.toIntExact; -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.DAYS; -import static java.util.stream.Collectors.joining; -import static org.joda.time.chrono.ISOChronology.getInstanceUTC; -import static org.joda.time.format.ISODateTimeFormat.date; - -/** - * S3 Select uses Ion SQL++ query language. This class is used to construct a valid Ion SQL++ query - * to be evaluated with S3 Select on an S3 object. - */ -public class IonSqlQueryBuilder -{ - private static final DateTimeFormatter FORMATTER = date().withChronology(getInstanceUTC()); - private static final String DATA_SOURCE = "S3Object s"; - private final TypeManager typeManager; - private final S3SelectDataType s3SelectDataType; - private final String nullPredicate; - private final String notNullPredicate; - - public IonSqlQueryBuilder(TypeManager typeManager, S3SelectDataType s3SelectDataType, Optional optionalNullCharacterEncoding) - { - if (optionalNullCharacterEncoding.isPresent()) { - checkArgument(s3SelectDataType == CSV, "Null character encoding should only be provided for CSV data"); - } - - this.typeManager = requireNonNull(typeManager, "typeManager is null"); - this.s3SelectDataType = requireNonNull(s3SelectDataType, "s3SelectDataType is null"); - - String nullCharacterEncoding = optionalNullCharacterEncoding.orElse(""); - this.nullPredicate = switch (s3SelectDataType) { - case JSON -> "IS NULL"; - case CSV -> "= '%s'".formatted(nullCharacterEncoding); - }; - this.notNullPredicate = switch (s3SelectDataType) { - case JSON -> "IS NOT NULL"; - case CSV -> "!= '%s'".formatted(nullCharacterEncoding); - }; - } - - public String buildSql(List columns, TupleDomain tupleDomain) - { - columns.forEach(column -> checkArgument(column.isBaseColumn(), "%s is not a base column", column)); - tupleDomain.getDomains().ifPresent(domains -> { - domains.keySet().forEach(column -> checkArgument(column.isBaseColumn(), "%s is not a base column", column)); - }); - - // SELECT clause - StringBuilder sql = new StringBuilder("SELECT "); - - if (columns.isEmpty()) { - sql.append("' '"); - } - else { - String columnNames = columns.stream() - .map(this::getFullyQualifiedColumnName) - .collect(joining(", ")); - sql.append(columnNames); - } - - // FROM clause - sql.append(" FROM "); - sql.append(DATA_SOURCE); - - // WHERE clause - List clauses = toConjuncts(columns, tupleDomain); - if (!clauses.isEmpty()) { - sql.append(" WHERE ") - .append(Joiner.on(" AND ").join(clauses)); - } - - return sql.toString(); - } - - private String getFullyQualifiedColumnName(HiveColumnHandle column) - { - return switch (s3SelectDataType) { - case JSON -> "s.%s".formatted(column.getBaseColumnName()); - case CSV -> "s._%d".formatted(column.getBaseHiveColumnIndex() + 1); - }; - } - - private List toConjuncts(List columns, TupleDomain tupleDomain) - { - ImmutableList.Builder builder = ImmutableList.builder(); - for (HiveColumnHandle column : columns) { - Type type = column.getHiveType().getType(typeManager); - if (tupleDomain.getDomains().isPresent() && isSupported(type)) { - Domain domain = tupleDomain.getDomains().get().get(column); - if (domain != null) { - builder.add(toPredicate(domain, type, column)); - } - } - } - return builder.build(); - } - - private static boolean isSupported(Type type) - { - Type validType = requireNonNull(type, "type is null"); - return validType.equals(BIGINT) || - validType.equals(TINYINT) || - validType.equals(SMALLINT) || - validType.equals(INTEGER) || - validType.equals(BOOLEAN) || - validType.equals(DATE) || - validType instanceof VarcharType; - } - - private String toPredicate(Domain domain, Type type, HiveColumnHandle column) - { - checkArgument(domain.getType().isOrderable(), "Domain type must be orderable"); - - if (domain.getValues().isNone()) { - if (domain.isNullAllowed()) { - return getFullyQualifiedColumnName(column) + " " + nullPredicate; - } - return "FALSE"; - } - - if (domain.getValues().isAll()) { - if (domain.isNullAllowed()) { - return "TRUE"; - } - return getFullyQualifiedColumnName(column) + " " + notNullPredicate; - } - - List disjuncts = new ArrayList<>(); - List singleValues = new ArrayList<>(); - for (Range range : domain.getValues().getRanges().getOrderedRanges()) { - checkState(!range.isAll()); - if (range.isSingleValue()) { - singleValues.add(range.getSingleValue()); - continue; - } - List rangeConjuncts = new ArrayList<>(); - if (!range.isLowUnbounded()) { - rangeConjuncts.add(toPredicate(range.isLowInclusive() ? ">=" : ">", range.getLowBoundedValue(), type, column)); - } - if (!range.isHighUnbounded()) { - rangeConjuncts.add(toPredicate(range.isHighInclusive() ? "<=" : "<", range.getHighBoundedValue(), type, column)); - } - // If rangeConjuncts is null, then the range was ALL, which should already have been checked for - checkState(!rangeConjuncts.isEmpty()); - if (rangeConjuncts.size() == 1) { - disjuncts.add("%s %s AND %s".formatted(getFullyQualifiedColumnName(column), notNullPredicate, getOnlyElement(rangeConjuncts))); - } - else { - disjuncts.add("(%s %s AND %s)".formatted(getFullyQualifiedColumnName(column), notNullPredicate, Joiner.on(" AND ").join(rangeConjuncts))); - } - } - - // Add back all of the possible single values either as an equality or an IN predicate - if (singleValues.size() == 1) { - disjuncts.add("%s %s AND %s".formatted(getFullyQualifiedColumnName(column), notNullPredicate, toPredicate("=", getOnlyElement(singleValues), type, column))); - } - else if (singleValues.size() > 1) { - List values = new ArrayList<>(); - for (Object value : singleValues) { - checkType(type); - values.add(valueToQuery(type, value)); - } - disjuncts.add("%s %s AND %s IN (%s)".formatted( - getFullyQualifiedColumnName(column), - notNullPredicate, - createColumn(type, column), - Joiner.on(",").join(values))); - } - - // Add nullability disjuncts - checkState(!disjuncts.isEmpty()); - if (domain.isNullAllowed()) { - disjuncts.add(getFullyQualifiedColumnName(column) + " " + nullPredicate); - } - - return "(" + Joiner.on(" OR ").join(disjuncts) + ")"; - } - - private String toPredicate(String operator, Object value, Type type, HiveColumnHandle column) - { - checkType(type); - - return format("%s %s %s", createColumn(type, column), operator, valueToQuery(type, value)); - } - - private static void checkType(Type type) - { - checkArgument(isSupported(type), "Type not supported: %s", type); - } - - private static String valueToQuery(Type type, Object value) - { - if (type.equals(BIGINT)) { - return String.valueOf((long) value); - } - if (type.equals(INTEGER)) { - return String.valueOf(toIntExact((long) value)); - } - if (type.equals(SMALLINT)) { - return String.valueOf(Shorts.checkedCast((long) value)); - } - if (type.equals(TINYINT)) { - return String.valueOf(SignedBytes.checkedCast((long) value)); - } - if (type.equals(BOOLEAN)) { - return String.valueOf((boolean) value); - } - if (type.equals(DATE)) { - // CAST('2007-04-05T14:30Z' AS TIMESTAMP) - return "'" + FORMATTER.print(DAYS.toMillis((long) value)) + "'"; - } - if (type.equals(VarcharType.VARCHAR)) { - return "'" + ((Slice) value).toStringUtf8().replace("'", "''") + "'"; - } - return "'" + ((Slice) value).toStringUtf8() + "'"; - } - - private String createColumn(Type type, HiveColumnHandle columnHandle) - { - String column = getFullyQualifiedColumnName(columnHandle); - - if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { - return "CAST(" + column + " AS INT)"; - } - if (type.equals(BOOLEAN)) { - return "CAST(" + column + " AS BOOL)"; - } - return column; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectDataType.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectDataType.java deleted file mode 100644 index 70872574d5dbc..0000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectDataType.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -public enum S3SelectDataType { - CSV, - JSON -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectPushdown.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectPushdown.java deleted file mode 100644 index b98e3f7c3bd81..0000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectPushdown.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.google.common.collect.ImmutableSet; -import io.trino.hive.formats.compression.CompressionKind; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.Partition; -import io.trino.plugin.hive.metastore.Table; -import io.trino.spi.connector.ConnectorSession; - -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.Properties; -import java.util.Set; - -import static io.trino.plugin.hive.HiveMetadata.SKIP_FOOTER_COUNT_KEY; -import static io.trino.plugin.hive.HiveMetadata.SKIP_HEADER_COUNT_KEY; -import static io.trino.plugin.hive.HiveSessionProperties.isS3SelectPushdownEnabled; -import static io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema; -import static io.trino.plugin.hive.util.HiveClassNames.TEXT_INPUT_FORMAT_CLASS; -import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; -import static io.trino.plugin.hive.util.HiveUtil.getInputFormatName; -import static java.util.Objects.requireNonNull; - -/** - * S3SelectPushdown uses Amazon S3 Select to push down queries to Amazon S3. This allows Presto to retrieve only a - * subset of data rather than retrieving the full S3 object thus improving Presto query performance. - */ -public final class S3SelectPushdown -{ - private static final Set SUPPORTED_S3_PREFIXES = ImmutableSet.of("s3://", "s3a://", "s3n://"); - - /* - * Double and Real Types lose precision. Thus, they are not pushed down to S3. - * Correctness problems have also been observed with Decimal columns. - * - * When S3 select support was added, Trino did not properly implement TIMESTAMP semantic. This was fixed in 2020, and TIMESTAMPS may be supportable now - * (https://github.com/trinodb/trino/issues/10962). Pushing down timestamps to s3select maybe still be problematic due to ION SQL comparing timestamps - * using precision. This means timestamps with different precisions are not equal even actually they present the same instant of time. - */ - private static final Set SUPPORTED_COLUMN_TYPES = ImmutableSet.of( - "boolean", - "int", - "tinyint", - "smallint", - "bigint", - "string", - "date"); - - private S3SelectPushdown() {} - - private static boolean isSerDeSupported(Properties schema) - { - String serdeName = getDeserializerClassName(schema); - return S3SelectSerDeDataTypeMapper.doesSerDeExist(serdeName); - } - - private static boolean isInputFormatSupported(Properties schema) - { - if (isTextInputFormat(schema)) { - if (!Objects.equals(schema.getProperty(SKIP_HEADER_COUNT_KEY, "0"), "0")) { - // S3 Select supports skipping one line of headers, but it was returning incorrect results for trino-hive-hadoop2/conf/files/test_table_with_header.csv.gz - // TODO https://github.com/trinodb/trino/issues/2349 - return false; - } - - // S3 Select does not support skipping footers - return Objects.equals(schema.getProperty(SKIP_FOOTER_COUNT_KEY, "0"), "0"); - } - - return false; - } - - public static boolean isCompressionCodecSupported(Properties schema, String path) - { - if (isTextInputFormat(schema)) { - // S3 Select supports the following formats: uncompressed, GZIP and BZIP2. - return CompressionKind.forFile(path) - .map(kind -> kind == CompressionKind.GZIP || kind == CompressionKind.BZIP2) - .orElse(true); - } - - return false; - } - - public static boolean isSplittable(boolean s3SelectPushdownEnabled, Properties schema, String path) - { - if (!s3SelectPushdownEnabled) { - return true; - } - - // S3 Select supports splitting uncompressed files - if (isTextInputFormat(schema) && CompressionKind.forFile(path).isEmpty()) { - return isSerDeSupported(schema); - } - - return false; - } - - private static boolean isTextInputFormat(Properties schema) - { - return TEXT_INPUT_FORMAT_CLASS.equals(getInputFormatName(schema).orElse(null)); - } - - private static boolean areColumnTypesSupported(List columns) - { - requireNonNull(columns, "columns is null"); - - if (columns.isEmpty()) { - return false; - } - - for (Column column : columns) { - if (!SUPPORTED_COLUMN_TYPES.contains(column.getType().getHiveTypeName().toString())) { - return false; - } - } - - return true; - } - - private static boolean isS3Storage(String path) - { - return SUPPORTED_S3_PREFIXES.stream().anyMatch(path::startsWith); - } - - public static boolean shouldEnablePushdownForTable(ConnectorSession session, Table table, String path, Optional optionalPartition) - { - if (!isS3SelectPushdownEnabled(session)) { - return false; - } - - if (path == null) { - return false; - } - - // Hive table partitions could be on different storages, - // as a result, we have to check each individual optionalPartition - Properties schema = optionalPartition - .map(partition -> getHiveSchema(partition, table)) - .orElseGet(() -> getHiveSchema(table)); - return shouldEnablePushdownForTable(table, path, schema); - } - - private static boolean shouldEnablePushdownForTable(Table table, String path, Properties schema) - { - return isS3Storage(path) && - isSerDeSupported(schema) && - isInputFormatSupported(schema) && - areColumnTypesSupported(table.getDataColumns()); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectSerDeDataTypeMapper.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectSerDeDataTypeMapper.java deleted file mode 100644 index 4695eb1a7e3be..0000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/S3SelectSerDeDataTypeMapper.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import java.util.Map; -import java.util.Optional; - -import static io.trino.plugin.hive.util.HiveClassNames.JSON_SERDE_CLASS; -import static io.trino.plugin.hive.util.HiveClassNames.LAZY_SIMPLE_SERDE_CLASS; - -public class S3SelectSerDeDataTypeMapper -{ - // Contains mapping of SerDe class name -> data type. Multiple SerDe classes can be mapped to the same data type. - private static final Map serDeToDataTypeMapping = Map.of( - LAZY_SIMPLE_SERDE_CLASS, S3SelectDataType.CSV, - JSON_SERDE_CLASS, S3SelectDataType.JSON); - - private S3SelectSerDeDataTypeMapper() {} - - public static Optional getDataType(String serdeName) - { - return Optional.ofNullable(serDeToDataTypeMapping.get(serdeName)); - } - - public static boolean doesSerDeExist(String serdeName) - { - return serDeToDataTypeMapping.containsKey(serdeName); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3ClientFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3ClientFactory.java deleted file mode 100644 index 9a016c8e41ece..0000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3ClientFactory.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.regions.DefaultAwsRegionProviderChain; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Builder; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.google.errorprone.annotations.concurrent.GuardedBy; -import com.google.inject.Inject; -import io.airlift.log.Logger; -import io.airlift.units.Duration; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3FileSystem; -import io.trino.plugin.hive.HiveConfig; -import org.apache.hadoop.conf.Configuration; - -import java.net.URI; -import java.util.Optional; - -import static com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; -import static com.amazonaws.regions.Regions.US_EAST_1; -import static com.google.common.base.Strings.isNullOrEmpty; -import static com.google.common.base.Verify.verify; -import static io.trino.hdfs.s3.AwsCurrentRegionHolder.getCurrentRegionFromEC2Metadata; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACCESS_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_CONNECT_TIMEOUT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_CONNECT_TTL; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_CREDENTIALS_PROVIDER; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_EXTERNAL_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_IAM_ROLE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_ERROR_RETRIES; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PIN_CLIENT_TO_CURRENT_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ROLE_SESSION_NAME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SECRET_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SESSION_TOKEN; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SOCKET_TIMEOUT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SSL_ENABLED; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STS_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STS_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_USER_AGENT_PREFIX; -import static java.lang.Math.toIntExact; -import static java.lang.String.format; - -/** - * This factory provides AmazonS3 client required for executing S3SelectPushdown requests. - * Normal S3 GET requests use AmazonS3 clients initialized in {@link TrinoS3FileSystem} or EMRFS. - * The ideal state will be to merge this logic with the two file systems and get rid of this - * factory class. - * Please do not use the client provided by this factory for any other use cases. - */ -public class TrinoS3ClientFactory -{ - private static final Logger log = Logger.get(TrinoS3ClientFactory.class); - private static final String S3_SELECT_PUSHDOWN_MAX_CONNECTIONS = "hive.s3select-pushdown.max-connections"; - - private final boolean enabled; - private final int defaultMaxConnections; - - @GuardedBy("this") - private AmazonS3 s3Client; - - @Inject - public TrinoS3ClientFactory(HiveConfig config) - { - this.enabled = config.isS3SelectPushdownEnabled(); - this.defaultMaxConnections = config.getS3SelectPushdownMaxConnections(); - } - - synchronized AmazonS3 getS3Client(Configuration config) - { - if (s3Client == null) { - s3Client = createS3Client(config); - } - return s3Client; - } - - private AmazonS3 createS3Client(Configuration config) - { - HiveS3Config defaults = new HiveS3Config(); - String userAgentPrefix = config.get(S3_USER_AGENT_PREFIX, defaults.getS3UserAgentPrefix()); - int maxErrorRetries = config.getInt(S3_MAX_ERROR_RETRIES, defaults.getS3MaxErrorRetries()); - boolean sslEnabled = config.getBoolean(S3_SSL_ENABLED, defaults.isS3SslEnabled()); - Duration connectTimeout = Duration.valueOf(config.get(S3_CONNECT_TIMEOUT, defaults.getS3ConnectTimeout().toString())); - Duration socketTimeout = Duration.valueOf(config.get(S3_SOCKET_TIMEOUT, defaults.getS3SocketTimeout().toString())); - int maxConnections = config.getInt(S3_SELECT_PUSHDOWN_MAX_CONNECTIONS, defaultMaxConnections); - - ClientConfiguration clientConfiguration = new ClientConfiguration() - .withMaxErrorRetry(maxErrorRetries) - .withProtocol(sslEnabled ? Protocol.HTTPS : Protocol.HTTP) - .withConnectionTimeout(toIntExact(connectTimeout.toMillis())) - .withSocketTimeout(toIntExact(socketTimeout.toMillis())) - .withMaxConnections(maxConnections) - .withUserAgentPrefix(userAgentPrefix) - .withUserAgentSuffix(enabled ? "Trino-select" : "Trino"); - - String connectTtlValue = config.get(S3_CONNECT_TTL); - if (!isNullOrEmpty(connectTtlValue)) { - clientConfiguration.setConnectionTTL(Duration.valueOf(connectTtlValue).toMillis()); - } - - AWSCredentialsProvider awsCredentialsProvider = getAwsCredentialsProvider(config); - AmazonS3Builder, ? extends AmazonS3> clientBuilder = AmazonS3Client.builder() - .withCredentials(awsCredentialsProvider) - .withClientConfiguration(clientConfiguration) - .withMetricsCollector(TrinoS3FileSystem.getFileSystemStats().newRequestMetricCollector()) - .enablePathStyleAccess(); - - boolean regionOrEndpointSet = false; - - String endpoint = config.get(S3_ENDPOINT); - boolean pinS3ClientToCurrentRegion = config.getBoolean(S3_PIN_CLIENT_TO_CURRENT_REGION, defaults.isPinS3ClientToCurrentRegion()); - verify(!pinS3ClientToCurrentRegion || endpoint == null, - "Invalid configuration: either endpoint can be set or S3 client can be pinned to the current region"); - - // use local region when running inside of EC2 - if (pinS3ClientToCurrentRegion) { - clientBuilder.setRegion(getCurrentRegionFromEC2Metadata().getName()); - regionOrEndpointSet = true; - } - - if (!isNullOrEmpty(endpoint)) { - clientBuilder.withEndpointConfiguration(new EndpointConfiguration(endpoint, null)); - regionOrEndpointSet = true; - } - - if (!regionOrEndpointSet) { - clientBuilder.withRegion(US_EAST_1); - clientBuilder.setForceGlobalBucketAccessEnabled(true); - } - - return clientBuilder.build(); - } - - private static AWSCredentialsProvider getAwsCredentialsProvider(Configuration conf) - { - Optional credentials = getAwsCredentials(conf); - if (credentials.isPresent()) { - return new AWSStaticCredentialsProvider(credentials.get()); - } - - String providerClass = conf.get(S3_CREDENTIALS_PROVIDER); - if (!isNullOrEmpty(providerClass)) { - return getCustomAWSCredentialsProvider(conf, providerClass); - } - - AWSCredentialsProvider provider = getAwsCredentials(conf) - .map(value -> (AWSCredentialsProvider) new AWSStaticCredentialsProvider(value)) - .orElseGet(DefaultAWSCredentialsProviderChain::getInstance); - - String iamRole = conf.get(S3_IAM_ROLE); - if (iamRole != null) { - String stsEndpointOverride = conf.get(S3_STS_ENDPOINT); - String stsRegionOverride = conf.get(S3_STS_REGION); - String s3RoleSessionName = conf.get(S3_ROLE_SESSION_NAME); - String externalId = conf.get(S3_EXTERNAL_ID); - - AWSSecurityTokenServiceClientBuilder stsClientBuilder = AWSSecurityTokenServiceClientBuilder.standard() - .withCredentials(provider); - - String region; - if (!isNullOrEmpty(stsRegionOverride)) { - region = stsRegionOverride; - } - else { - DefaultAwsRegionProviderChain regionProviderChain = new DefaultAwsRegionProviderChain(); - try { - region = regionProviderChain.getRegion(); - } - catch (SdkClientException ex) { - log.warn("Falling back to default AWS region %s", US_EAST_1); - region = US_EAST_1.getName(); - } - } - - if (!isNullOrEmpty(stsEndpointOverride)) { - stsClientBuilder.withEndpointConfiguration(new EndpointConfiguration(stsEndpointOverride, region)); - } - else { - stsClientBuilder.withRegion(region); - } - - provider = new STSAssumeRoleSessionCredentialsProvider.Builder(iamRole, s3RoleSessionName) - .withExternalId(externalId) - .withStsClient(stsClientBuilder.build()) - .build(); - } - - return provider; - } - - private static AWSCredentialsProvider getCustomAWSCredentialsProvider(Configuration conf, String providerClass) - { - try { - return conf.getClassByName(providerClass) - .asSubclass(AWSCredentialsProvider.class) - .getConstructor(URI.class, Configuration.class) - .newInstance(null, conf); - } - catch (ReflectiveOperationException e) { - throw new RuntimeException(format("Error creating an instance of %s", providerClass), e); - } - } - - private static Optional getAwsCredentials(Configuration conf) - { - String accessKey = conf.get(S3_ACCESS_KEY); - String secretKey = conf.get(S3_SECRET_KEY); - - if (isNullOrEmpty(accessKey) || isNullOrEmpty(secretKey)) { - return Optional.empty(); - } - String sessionToken = conf.get(S3_SESSION_TOKEN); - if (!isNullOrEmpty(sessionToken)) { - return Optional.of(new BasicSessionCredentials(accessKey, secretKey, sessionToken)); - } - - return Optional.of(new BasicAWSCredentials(accessKey, secretKey)); - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3SelectClient.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3SelectClient.java deleted file mode 100644 index e42777ac40f8c..0000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/s3select/TrinoS3SelectClient.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import org.apache.hadoop.conf.Configuration; - -import java.io.Closeable; -import java.io.IOException; -import java.io.InputStream; - -import static com.amazonaws.services.s3.model.SelectObjectContentEvent.EndEvent; -import static java.util.Objects.requireNonNull; - -class TrinoS3SelectClient - implements Closeable -{ - private final AmazonS3 s3Client; - private boolean requestComplete; - private SelectObjectContentRequest selectObjectRequest; - private SelectObjectContentResult selectObjectContentResult; - - public TrinoS3SelectClient(Configuration configuration, TrinoS3ClientFactory s3ClientFactory) - { - requireNonNull(configuration, "configuration is null"); - requireNonNull(s3ClientFactory, "s3ClientFactory is null"); - this.s3Client = s3ClientFactory.getS3Client(configuration); - } - - public InputStream getRecordsContent(SelectObjectContentRequest selectObjectRequest) - { - this.selectObjectRequest = requireNonNull(selectObjectRequest, "selectObjectRequest is null"); - this.selectObjectContentResult = s3Client.selectObjectContent(selectObjectRequest); - return selectObjectContentResult.getPayload() - .getRecordsInputStream( - new SelectObjectContentEventVisitor() - { - @Override - public void visit(EndEvent endEvent) - { - requestComplete = true; - } - }); - } - - @Override - public void close() - throws IOException - { - selectObjectContentResult.close(); - } - - public String getKeyName() - { - return selectObjectRequest.getKey(); - } - - public String getBucketName() - { - return selectObjectRequest.getBucketName(); - } - - /** - * The End Event indicates all matching records have been transmitted. - * If the End Event is not received, the results may be incomplete. - */ - public boolean isRequestComplete() - { - return requestComplete; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java index 304bcbf20b646..3395cfd426a9b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java @@ -29,7 +29,6 @@ import io.trino.plugin.hive.orc.OrcPageSourceFactory; import io.trino.plugin.hive.parquet.ParquetPageSourceFactory; import io.trino.plugin.hive.rcfile.RcFilePageSourceFactory; -import io.trino.plugin.hive.s3select.S3SelectPushdown; import io.trino.spi.HostAddress; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; @@ -62,7 +61,6 @@ public class InternalHiveSplitFactory private final long minimumTargetSplitSizeInBytes; private final Optional maxSplitFileSize; private final boolean forceLocalScheduling; - private final boolean s3SelectPushdownEnabled; public InternalHiveSplitFactory( String partitionName, @@ -76,7 +74,6 @@ public InternalHiveSplitFactory( Optional bucketValidation, DataSize minimumTargetSplitSize, boolean forceLocalScheduling, - boolean s3SelectPushdownEnabled, Optional maxSplitFileSize) { this.partitionName = requireNonNull(partitionName, "partitionName is null"); @@ -89,7 +86,6 @@ public InternalHiveSplitFactory( this.bucketConversion = requireNonNull(bucketConversion, "bucketConversion is null"); this.bucketValidation = requireNonNull(bucketValidation, "bucketValidation is null"); this.forceLocalScheduling = forceLocalScheduling; - this.s3SelectPushdownEnabled = s3SelectPushdownEnabled; this.minimumTargetSplitSizeInBytes = minimumTargetSplitSize.toBytes(); this.maxSplitFileSize = requireNonNull(maxSplitFileSize, "maxSplitFileSize is null"); checkArgument(minimumTargetSplitSizeInBytes > 0, "minimumTargetSplitSize must be > 0, found: %s", minimumTargetSplitSize); @@ -199,7 +195,6 @@ private Optional createInternalHiveSplit( tableToPartitionMapping, bucketConversion, bucketValidation, - s3SelectPushdownEnabled && S3SelectPushdown.isCompressionCodecSupported(strippedSchema, path), acidInfo, partitionMatchSupplier)); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystem.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystem.java index 66cf0cde67959..ea77cb7c90854 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystem.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveFileSystem.java @@ -185,7 +185,7 @@ public void tearDown() protected void onSetupComplete() {} - protected void setup(String host, int port, String databaseName, boolean s3SelectPushdownEnabled, HdfsConfiguration hdfsConfiguration) + protected void setup(String host, int port, String databaseName, HdfsConfiguration hdfsConfiguration) { database = databaseName; table = new SchemaTableName(database, "trino_test_external_fs"); @@ -197,8 +197,7 @@ protected void setup(String host, int port, String databaseName, boolean s3Selec temporaryCreateTableWithExternalLocation = new SchemaTableName(database, "tmp_trino_test_create_external" + random); config = new HiveConfig() - .setWritesToNonManagedTablesEnabled(true) - .setS3SelectPushdownEnabled(s3SelectPushdownEnabled); + .setWritesToNonManagedTablesEnabled(true); HivePartitionManager hivePartitionManager = new HivePartitionManager(config); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java index 5ff216e40e396..4fa61fef34efa 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java @@ -29,12 +29,8 @@ import io.trino.spi.predicate.NullableValue; import io.trino.spi.predicate.TupleDomain; import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.MaterializedResult; import io.trino.testing.QueryRunner; import io.trino.testing.minio.MinioClient; -import io.trino.testing.sql.TestTable; -import org.intellij.lang.annotations.Language; -import org.testng.SkipException; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -68,7 +64,6 @@ import static java.util.stream.Collectors.joining; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.testng.Assert.assertEquals; public abstract class BaseTestHiveOnDataLake extends AbstractTestQueryFramework @@ -111,7 +106,6 @@ protected QueryRunner createQueryRunner() .put("hive.s3.streaming.part-size", HIVE_S3_STREAMING_PART_SIZE.toString()) // This is required to enable AWS Athena partition projection .put("hive.partition-projection-enabled", "true") - .put("hive.s3select-pushdown.experimental-textfile-pushdown-enabled", "true") .buildOrThrow()) .build(); } @@ -1794,247 +1788,6 @@ public void testPartitionedTableExternalLocationOnTopOfTheBucket() assertUpdate("DROP TABLE " + tableName); } - @Test(dataProvider = "s3SelectFileFormats") - public void testS3SelectPushdown(String tableProperties) - { - if (true) { - throw new SkipException("S3 Select not yet supported"); - } - Session usingAppendInserts = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "insert_existing_partitions_behavior", "APPEND") - .build(); - List values = ImmutableList.of( - "1, true, 11, 111, 1111, 11111, 'one', DATE '2020-01-01'", - "2, true, 22, 222, 2222, 22222, 'two', DATE '2020-02-02'", - "3, NULL, NULL, NULL, NULL, NULL, NULL, NULL", - "4, false, 44, 444, 4444, 44444, 'four', DATE '2020-04-04'"); - try (TestTable table = new TestTable( - sql -> getQueryRunner().execute(usingAppendInserts, sql), - "hive.%s.test_s3_select_pushdown".formatted(HIVE_TEST_SCHEMA), - "(id INT, bool_t BOOLEAN, tiny_t TINYINT, small_t SMALLINT, int_t INT, big_t BIGINT, string_t VARCHAR, date_t DATE) " + - "WITH (" + tableProperties + ")", values)) { - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t = true", "VALUES 1, 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t = false", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t = 22", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t != 22", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t > 22", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t >= 22", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t = 22 OR tiny_t = 44", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t IS NULL OR tiny_t >= 22", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t = 222", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t != 222", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t > 222", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t >= 222", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t = 222 OR small_t = 444", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t IS NULL OR small_t >= 222", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t = 2222", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t != 2222", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t > 2222", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t >= 2222", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t = 2222 OR int_t = 4444", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t IS NULL OR int_t >= 2222", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t = 22222", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t != 22222", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t > 22222", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t >= 22222", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t = 22222 OR big_t = 44444", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t IS NULL OR big_t >= 22222", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t = 'two'", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t != 'two'", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t < 'two'", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t <= 'two'", "VALUES 1, 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t = 'two' OR string_t = 'four'", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t IS NULL OR string_t >= 'two'", "VALUES 2, 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t = DATE '2020-02-02'", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t != DATE '2020-02-02'", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t > DATE '2020-02-02'", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t <= DATE '2020-02-02'", "VALUES 1, 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t = DATE '2020-02-02' OR date_t = DATE '2020-04-04'", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t IS NULL OR date_t >= DATE '2020-02-02'", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t IS NOT NULL", "VALUES 1, 2, 4"); - } - } - - @Test(dataProvider = "s3SelectFileFormats") - public void testS3SelectOnDecimalColumnIsDisabled(String tableProperties) - { - if (true) { - throw new SkipException("S3 Select not yet supported"); - } - Session usingAppendInserts = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "insert_existing_partitions_behavior", "APPEND") - .build(); - List values = ImmutableList.of("1, 1.1", "2, 2.2", "3, NULL", "4, 4.4"); - try (TestTable table = new TestTable( - sql -> getQueryRunner().execute(usingAppendInserts, sql), - "hive.%s.test_s3_select_pushdown".formatted(HIVE_TEST_SCHEMA), - "(id INT, decimal_t DECIMAL(10, 5)) WITH (" + tableProperties + ")", - values)) { - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t = 2.2", "VALUES 2"); - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t != 2.2", "VALUES 1, 4"); - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t < 2.2", "VALUES 1"); - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t <= 2.2", "VALUES 1, 2"); - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t = 2.2 OR decimal_t = 4.4", "VALUES 2, 4"); - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t IS NULL OR decimal_t >= 2.2", "VALUES 2, 3, 4"); - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t IS NULL", "VALUES 3"); - assertNoS3SelectPushdown("SELECT id FROM " + table.getName() + " WHERE decimal_t IS NOT NULL", "VALUES 1, 2, 4"); - } - } - - @Test - public void testJsonS3SelectPushdownWithSpecialCharacters() - { - Session usingAppendInserts = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "insert_existing_partitions_behavior", "APPEND") - .build(); - - List specialCharacterValues = ImmutableList.of( - "1, 'a,comma'", - "2, 'a|pipe'", - "3, 'an''escaped quote'", - "4, 'a\"double quote'"); - try (TestTable table = new TestTable( - sql -> getQueryRunner().execute(usingAppendInserts, sql), - "hive.%s.test_s3_select_pushdown_special_characters".formatted(HIVE_TEST_SCHEMA), - "(id INT, string_t VARCHAR) WITH (format = 'JSON')", - specialCharacterValues)) { - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t ='a,comma'", "VALUES 1"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t ='a|pipe'", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t ='an''escaped quote'", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t ='a\"double quote'", "VALUES 4"); - } - } - - @Test - public void testS3SelectExperimentalPushdown() - { - if (true) { - throw new SkipException("S3 Select not yet supported"); - } - // Demonstrate correctness issues which have resulted in pushdown for TEXTFILE - // using CSV support in S3 Select being put behind a separate "experimental" flag. - // TODO: https://github.com/trinodb/trino/issues/17775 - Session usingAppendInserts = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "insert_existing_partitions_behavior", "APPEND") - .build(); - List values = ImmutableList.of( - "1, true, 11", - "2, true, 22", - "3, NULL, NULL", - "4, false, 44"); - Session withS3SelectPushdown = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "s3_select_pushdown_enabled", "true") - .build(); - - try (TestTable table = new TestTable( - sql -> getQueryRunner().execute(usingAppendInserts, sql), - "hive.%s.test_s3_select_pushdown_experimental_features".formatted(HIVE_TEST_SCHEMA), - "(id INT, bool_t BOOLEAN, int_t INT) WITH (format = 'TEXTFILE')", - values)) { - assertQuery("SELECT id FROM " + table.getName() + " WHERE int_t IS NULL", "VALUES 3"); - assertThat(query(withS3SelectPushdown, "SELECT id FROM " + table.getName() + " WHERE int_t IS NULL")).returnsEmptyResult(); - - assertQueryFails( - withS3SelectPushdown, - "SELECT id FROM " + table.getName() + " WHERE bool_t = true", - "S3 returned an error: Error casting:.*"); - } - - List specialCharacterValues = ImmutableList.of( - "1, 'a,comma'", - "2, 'a|pipe'", - "3, 'an''escaped quote'", - "4, 'a~null encoding'"); - try (TestTable table = new TestTable( - sql -> getQueryRunner().execute(usingAppendInserts, sql), - "hive.%s.test_s3_select_pushdown_special_characters".formatted(HIVE_TEST_SCHEMA), - "(id INT, string_t VARCHAR) WITH (format = 'TEXTFILE', textfile_field_separator=',', textfile_field_separator_escape='|', null_format='~')", - specialCharacterValues)) { - // These two should return a result, but incorrectly return nothing - String selectWithComma = "SELECT id FROM " + table.getName() + " WHERE string_t ='a,comma'"; - assertQuery(selectWithComma, "VALUES 1"); - assertThat(query(withS3SelectPushdown, selectWithComma)).returnsEmptyResult(); - - String selectWithPipe = "SELECT id FROM " + table.getName() + " WHERE string_t ='a|pipe'"; - assertQuery(selectWithPipe, "VALUES 2"); - assertThat(query(withS3SelectPushdown, selectWithPipe)).returnsEmptyResult(); - - // These two are actually correct - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t ='an''escaped quote'", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t ='a~null encoding'", "VALUES 4"); - } - } - - private void assertS3SelectQuery(@Language("SQL") String query, @Language("SQL") String expectedValues) - { - Session withS3SelectPushdown = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "s3_select_pushdown_enabled", "true") - .build(); - - MaterializedResult expectedResult = computeActual(expectedValues); - assertQueryStats( - withS3SelectPushdown, - query, - statsWithPushdown -> { - long inputPositionsWithPushdown = statsWithPushdown.getPhysicalInputPositions(); - assertQueryStats( - getSession(), - query, - statsWithoutPushdown -> assertThat(statsWithoutPushdown.getPhysicalInputPositions()).isGreaterThan(inputPositionsWithPushdown), - results -> assertEquals(results.getOnlyColumnAsSet(), expectedResult.getOnlyColumnAsSet())); - }, - results -> assertEquals(results.getOnlyColumnAsSet(), expectedResult.getOnlyColumnAsSet())); - } - - private void assertNoS3SelectPushdown(@Language("SQL") String query, @Language("SQL") String expectedValues) - { - Session withS3SelectPushdown = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "s3_select_pushdown_enabled", "true") - .build(); - - MaterializedResult expectedResult = computeActual(expectedValues); - assertQueryStats( - withS3SelectPushdown, - query, - statsWithPushdown -> { - long inputPositionsWithPushdown = statsWithPushdown.getPhysicalInputPositions(); - assertQueryStats( - getSession(), - query, - statsWithoutPushdown -> assertThat(statsWithoutPushdown.getPhysicalInputPositions()).isEqualTo(inputPositionsWithPushdown), - results -> assertEquals(results.getOnlyColumnAsSet(), expectedResult.getOnlyColumnAsSet())); - }, - results -> assertEquals(results.getOnlyColumnAsSet(), expectedResult.getOnlyColumnAsSet())); - } - - @DataProvider - public static Object[][] s3SelectFileFormats() - { - return new Object[][] { - {"format = 'JSON'"}, - {"format = 'TEXTFILE', textfile_field_separator=',', textfile_field_separator_escape='|', null_format='~'"} - }; - } - @Test public void testDropStatsPartitionedTable() { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java index 342bf6f2b0707..35ed1caaebea7 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java @@ -821,7 +821,6 @@ public void testBuildManifestFileIterator() Optional.empty(), DataSize.of(512, MEGABYTE), false, - false, Optional.empty()); BackgroundHiveSplitLoader backgroundHiveSplitLoader = backgroundHiveSplitLoader( files, @@ -864,7 +863,6 @@ public void testBuildManifestFileIteratorNestedDirectory() Optional.empty(), DataSize.of(512, MEGABYTE), false, - false, Optional.empty()); BackgroundHiveSplitLoader backgroundHiveSplitLoader = backgroundHiveSplitLoader( diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java index 4033974c91a99..674d5a5352e3c 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java @@ -91,9 +91,6 @@ public void testDefaults() .setPartitionStatisticsSampleSize(100) .setIgnoreCorruptedStatistics(false) .setCollectColumnStatisticsOnWrite(true) - .setS3SelectPushdownEnabled(false) - .setS3SelectExperimentalPushdownEnabled(false) - .setS3SelectPushdownMaxConnections(500) .setTemporaryStagingDirectoryEnabled(true) .setTemporaryStagingDirectoryPath("/tmp/presto-${USER}") .setDelegateTransactionalManagedTableLocationToMetastore(false) @@ -177,9 +174,6 @@ public void testExplicitPropertyMappings() .put("hive.partition-statistics-sample-size", "1234") .put("hive.ignore-corrupted-statistics", "true") .put("hive.collect-column-statistics-on-write", "false") - .put("hive.s3select-pushdown.enabled", "true") - .put("hive.s3select-pushdown.experimental-textfile-pushdown-enabled", "true") - .put("hive.s3select-pushdown.max-connections", "1234") .put("hive.temporary-staging-directory-enabled", "false") .put("hive.temporary-staging-directory-path", "updated") .put("hive.delegate-transactional-managed-table-location-to-metastore", "true") @@ -260,9 +254,6 @@ public void testExplicitPropertyMappings() .setPartitionStatisticsSampleSize(1234) .setIgnoreCorruptedStatistics(true) .setCollectColumnStatisticsOnWrite(false) - .setS3SelectPushdownEnabled(true) - .setS3SelectExperimentalPushdownEnabled(true) - .setS3SelectPushdownMaxConnections(1234) .setTemporaryStagingDirectoryEnabled(false) .setTemporaryStagingDirectoryPath("updated") .setDelegateTransactionalManagedTableLocationToMetastore(true) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index 214e24797f5aa..968e996718626 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -913,7 +913,6 @@ private void testPageSourceFactory( TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), - false, Optional.empty(), false, NO_ACID_TRANSACTION, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java index 9ff58fd5812c7..ff22bf6f76f5f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java @@ -266,7 +266,6 @@ private static ConnectorPageSource createPageSource(HiveTransactionHandle transa TableToPartitionMapping.empty(), Optional.empty(), Optional.empty(), - false, Optional.empty(), SplitWeight.standard()); ConnectorTableHandle table = new HiveTableHandle(SCHEMA_NAME, TABLE_NAME, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java index d90fd803da746..fba3cde81a11a 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java @@ -79,7 +79,6 @@ public void testJsonRoundTrip() 16, ImmutableList.of(createBaseColumn("col", 5, HIVE_LONG, BIGINT, ColumnType.REGULAR, Optional.of("comment"))))), Optional.empty(), - false, Optional.of(acidInfo), SplitWeight.fromProportion(2.0)); // some non-standard value @@ -98,7 +97,6 @@ public void testJsonRoundTrip() assertEquals(actual.getTableToPartitionMapping().getTableToPartitionColumns(), expected.getTableToPartitionMapping().getTableToPartitionColumns()); assertEquals(actual.getBucketConversion(), expected.getBucketConversion()); assertEquals(actual.isForceLocalScheduling(), expected.isForceLocalScheduling()); - assertEquals(actual.isS3SelectPushdownEnabled(), expected.isS3SelectPushdownEnabled()); assertEquals(actual.getAcidInfo().get(), expected.getAcidInfo().get()); assertEquals(actual.getSplitWeight(), expected.getSplitWeight()); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java index 22e008077bfe7..da8b9f9c19d55 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java @@ -335,7 +335,6 @@ private TestSplit(int id, OptionalInt bucketNumber, DataSize fileSize, BooleanSu TableToPartitionMapping.empty(), Optional.empty(), Optional.empty(), - false, Optional.empty(), partitionMatchSupplier); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java index 7869e5eee9490..04350213ce986 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java @@ -134,7 +134,6 @@ private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle TableToPartitionMapping.empty(), Optional.empty(), Optional.empty(), - false, Optional.empty(), SplitWeight.standard()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java index 4fc93b3c0fbf7..386f7f19dbb4e 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java @@ -578,7 +578,6 @@ public ConnectorPageSource newPageSource(FileFormatDataSourceStats stats, Connec TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), - false, Optional.empty(), false, NO_ACID_TRANSACTION, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/AbstractFileFormat.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/AbstractFileFormat.java index 68ee5f756ad9a..77143b82792ae 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/AbstractFileFormat.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/AbstractFileFormat.java @@ -121,7 +121,6 @@ public ConnectorPageSource createGenericReader( TableToPartitionMapping.empty(), Optional.empty(), Optional.empty(), - false, Optional.empty(), SplitWeight.standard()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java index 95f6d3b51cc9d..084223f391932 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java @@ -229,7 +229,6 @@ private ConnectorPageSource createPageSource( TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), - false, Optional.empty(), false, NO_ACID_TRANSACTION, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestMinioS3SelectQueries.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestMinioS3SelectQueries.java deleted file mode 100644 index 49f9af0021dfc..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestMinioS3SelectQueries.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.airlift.units.DataSize; -import io.trino.Session; -import io.trino.plugin.hive.containers.HiveHadoop; -import io.trino.plugin.hive.containers.HiveMinioDataLake; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.QueryRunner; -import io.trino.testing.sql.TestTable; -import org.testng.SkipException; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -import java.util.List; - -import static io.airlift.units.DataSize.Unit.MEGABYTE; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static java.lang.String.format; - -public class TestMinioS3SelectQueries - extends AbstractTestQueryFramework -{ - private static final String HIVE_TEST_SCHEMA = "hive_datalake"; - private static final DataSize HIVE_S3_STREAMING_PART_SIZE = DataSize.of(5, MEGABYTE); - - private String bucketName; - - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - this.bucketName = "test-hive-insert-overwrite-" + randomNameSuffix(); - HiveMinioDataLake hiveMinioDataLake = closeAfterClass(new HiveMinioDataLake(bucketName, HiveHadoop.HIVE3_IMAGE)); - hiveMinioDataLake.start(); - return S3HiveQueryRunner.builder(hiveMinioDataLake) - .setHiveProperties( - ImmutableMap.builder() - .put("hive.non-managed-table-writes-enabled", "true") - .put("hive.metastore-cache-ttl", "1d") - .put("hive.metastore-refresh-interval", "1d") - .put("hive.s3.streaming.part-size", HIVE_S3_STREAMING_PART_SIZE.toString()) - .buildOrThrow()) - .build(); - } - - @BeforeClass - public void setUp() - { - computeActual(format( - "CREATE SCHEMA hive.%1$s WITH (location='s3a://%2$s/%1$s')", - HIVE_TEST_SCHEMA, - bucketName)); - } - - @Test - public void testTextfileQueries() - { - if (true) { - throw new SkipException("S3 Select not yet supported"); - } - // Demonstrate correctness issues which have resulted in pushdown for TEXTFILE - // using CSV support in S3 Select being put behind a separate "experimental" flag. - // TODO: https://github.com/trinodb/trino/issues/17775 - List values = ImmutableList.of( - "1, true, 11", - "2, true, 22", - "3, NULL, NULL", - "4, false, 44"); - Session withS3SelectPushdown = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "s3_select_pushdown_enabled", "true") - .build(); - try (TestTable table = new TestTable( - getQueryRunner()::execute, - "hive.%s.test_textfile_queries".formatted(HIVE_TEST_SCHEMA), - "(id INT, bool_t BOOLEAN, int_t INT) WITH (format = 'TEXTFILE')", - values)) { - assertQuery(withS3SelectPushdown, "SELECT id FROM " + table.getName() + " WHERE int_t IS NULL", "VALUES 3"); - assertQuery(withS3SelectPushdown, "SELECT id FROM " + table.getName() + " WHERE bool_t = true", "VALUES 1, 2"); - } - - List specialCharacterValues = ImmutableList.of( - "1, 'a,comma'", - "2, 'a|pipe'", - "3, 'an''escaped quote'", - "4, 'a~null encoding'"); - try (TestTable table = new TestTable( - getQueryRunner()::execute, - "hive.%s.test_s3_select_pushdown_special_characters".formatted(HIVE_TEST_SCHEMA), - "(id INT, string_t VARCHAR) WITH (format = 'TEXTFILE', textfile_field_separator=',', textfile_field_separator_escape='|', null_format='~')", - specialCharacterValues)) { - String selectWithComma = "SELECT id FROM " + table.getName() + " WHERE string_t = 'a,comma'"; - assertQuery(selectWithComma, "VALUES 1"); - assertQuery(withS3SelectPushdown, selectWithComma, "VALUES 1"); - - String selectWithPipe = "SELECT id FROM " + table.getName() + " WHERE string_t = 'a|pipe'"; - assertQuery(selectWithPipe, "VALUES 2"); - assertQuery(withS3SelectPushdown, selectWithPipe, "VALUES 2"); - - String selectWithQuote = "SELECT id FROM " + table.getName() + " WHERE string_t = 'an''escaped quote'"; - assertQuery(selectWithQuote, "VALUES 3"); - assertQuery(withS3SelectPushdown, selectWithQuote, "VALUES 3"); - - String selectWithNullFormatEncoding = "SELECT id FROM " + table.getName() + " WHERE string_t = 'a~null encoding'"; - assertQuery(selectWithNullFormatEncoding, "VALUES 4"); - assertQuery(withS3SelectPushdown, selectWithNullFormatEncoding, "VALUES 4"); - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3SelectQueries.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3SelectQueries.java deleted file mode 100644 index 774bf40bf5864..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/TestS3SelectQueries.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.trino.Session; -import io.trino.plugin.hive.HiveQueryRunner; -import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.metastore.HiveMetastoreConfig; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; -import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.MaterializedResult; -import io.trino.testing.QueryRunner; -import io.trino.testing.sql.TestTable; -import org.intellij.lang.annotations.Language; -import org.testng.SkipException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Parameters; -import org.testng.annotations.Test; - -import java.io.File; -import java.util.List; - -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static java.util.Objects.requireNonNull; -import static org.assertj.core.api.Assertions.assertThat; -import static org.testng.Assert.assertEquals; - -// The test requires AWS credentials be provided via one of the ways used by the DefaultAWSCredentialsProviderChain. -public class TestS3SelectQueries - extends AbstractTestQueryFramework -{ - private final String bucket; - private final String bucketEndpoint; - - @Parameters({"s3.bucket", "s3.bucket-endpoint"}) - public TestS3SelectQueries(String bucket, String bucketEndpoint) - { - this.bucket = requireNonNull(bucket, "bucket is null"); - this.bucketEndpoint = requireNonNull(bucketEndpoint, "bucketEndpoint is null"); - } - - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - ImmutableMap.Builder hiveProperties = ImmutableMap.builder(); - hiveProperties.put("hive.s3.endpoint", bucketEndpoint); - hiveProperties.put("hive.non-managed-table-writes-enabled", "true"); - hiveProperties.put("hive.s3select-pushdown.experimental-textfile-pushdown-enabled", "true"); - return HiveQueryRunner.builder() - .setHiveProperties(hiveProperties.buildOrThrow()) - .setInitialTables(ImmutableList.of()) - .setMetastore(queryRunner -> { - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data").toFile(); - return new FileHiveMetastore( - new NodeVersion("testversion"), - HDFS_FILE_SYSTEM_FACTORY, - new HiveMetastoreConfig().isHideDeltaLakeTables(), - new FileHiveMetastoreConfig() - .setCatalogDirectory(baseDir.toURI().toString()) - .setMetastoreUser("test") - .setDisableLocationChecks(true)); - }) - .build(); - } - - @Test(dataProvider = "s3SelectFileFormats") - public void testS3SelectPushdown(String tableProperties) - { - if (true) { - throw new SkipException("S3 Select not yet supported"); - } - Session usingAppendInserts = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "insert_existing_partitions_behavior", "APPEND") - .build(); - List values = ImmutableList.of( - "1, true, 11, 111, 1111, 11111, 'one', DATE '2020-01-01'", - "2, true, 22, 222, 2222, 22222, 'two', DATE '2020-02-02'", - "3, NULL, NULL, NULL, NULL, NULL, NULL, NULL", - "4, false, 44, 444, 4444, 44444, '', DATE '2020-04-04'"); - try (TestTable table = new TestTable( - sql -> getQueryRunner().execute(usingAppendInserts, sql), - "hive.%s.test_s3_select_pushdown".formatted(HiveQueryRunner.TPCH_SCHEMA), - "(id INT, bool_t BOOLEAN, tiny_t TINYINT, small_t SMALLINT, int_t INT, big_t BIGINT, string_t VARCHAR, date_t DATE) " + - "WITH (external_location = 's3://" + bucket + "/test_s3_select_pushdown/test_table_" + randomNameSuffix() + "', " + tableProperties + ")", values)) { - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t = true", "VALUES 1, 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t = false", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE bool_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t = 22", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t != 22", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t > 22", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t >= 22", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t = 22 OR tiny_t = 44", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t IS NULL OR tiny_t >= 22", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE tiny_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t = 222", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t != 222", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t > 222", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t >= 222", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t = 222 OR small_t = 444", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t IS NULL OR small_t >= 222", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE small_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t = 2222", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t != 2222", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t > 2222", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t >= 2222", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t = 2222 OR int_t = 4444", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t IS NULL OR int_t >= 2222", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE int_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t = 22222", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t != 22222", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t > 22222", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t >= 22222", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t = 22222 OR big_t = 44444", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t IS NULL OR big_t >= 22222", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE big_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t = 'two'", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t != 'two'", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t < 'two'", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t <= 'two'", "VALUES 1, 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t = 'two' OR string_t = ''", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t IS NULL OR string_t >= 'two'", "VALUES 2, 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE string_t IS NOT NULL", "VALUES 1, 2, 4"); - - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t = DATE '2020-02-02'", "VALUES 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t != DATE '2020-02-02'", "VALUES 1, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t > DATE '2020-02-02'", "VALUES 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t <= DATE '2020-02-02'", "VALUES 1, 2"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t = DATE '2020-02-02' OR date_t = DATE '2020-04-04'", "VALUES 2, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t IS NULL OR date_t >= DATE '2020-02-02'", "VALUES 2, 3, 4"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t IS NULL", "VALUES 3"); - assertS3SelectQuery("SELECT id FROM " + table.getName() + " WHERE date_t IS NOT NULL", "VALUES 1, 2, 4"); - } - } - - private void assertS3SelectQuery(@Language("SQL") String query, @Language("SQL") String expectedValues) - { - Session withS3SelectPushdown = Session.builder(getSession()) - .setCatalogSessionProperty("hive", "s3_select_pushdown_enabled", "true") - .build(); - - MaterializedResult expectedResult = computeActual(expectedValues); - assertQueryStats( - withS3SelectPushdown, - query, - statsWithPushdown -> { - long inputPositionsWithPushdown = statsWithPushdown.getPhysicalInputPositions(); - assertQueryStats( - getSession(), - query, - statsWithoutPushdown -> assertThat(statsWithoutPushdown.getPhysicalInputPositions()).isGreaterThan(inputPositionsWithPushdown), - results -> assertEquals(results.getOnlyColumnAsSet(), expectedResult.getOnlyColumnAsSet())); - }, - results -> assertEquals(results.getOnlyColumnAsSet(), expectedResult.getOnlyColumnAsSet())); - } - - @DataProvider - public static Object[][] s3SelectFileFormats() - { - return new Object[][] { - {"format = 'JSON'"}, - {"format = 'TEXTFILE', textfile_field_separator=',', textfile_field_separator_escape='|', null_format='~'"} - }; - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestIonSqlQueryBuilder.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestIonSqlQueryBuilder.java deleted file mode 100644 index bb29743101532..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestIonSqlQueryBuilder.java +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.airlift.slice.Slices; -import io.trino.plugin.hive.HiveColumnHandle; -import io.trino.plugin.hive.HiveType; -import io.trino.spi.predicate.Domain; -import io.trino.spi.predicate.Range; -import io.trino.spi.predicate.SortedRangeSet; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.predicate.ValueSet; -import io.trino.spi.type.DecimalType; -import io.trino.spi.type.TypeManager; -import io.trino.util.DateTimeUtils; -import org.testng.annotations.Test; - -import java.util.List; -import java.util.Optional; - -import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; -import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveTestUtils.longDecimal; -import static io.trino.plugin.hive.HiveTestUtils.shortDecimal; -import static io.trino.plugin.hive.HiveType.HIVE_DATE; -import static io.trino.plugin.hive.HiveType.HIVE_DOUBLE; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.HiveType.HIVE_TIMESTAMP; -import static io.trino.spi.predicate.TupleDomain.withColumnDomains; -import static io.trino.spi.predicate.ValueSet.ofRanges; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.DateType.DATE; -import static io.trino.spi.type.DoubleType.DOUBLE; -import static io.trino.spi.type.IntegerType.INTEGER; -import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; -import static io.trino.spi.type.VarcharType.VARCHAR; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static org.testng.Assert.assertEquals; - -public class TestIonSqlQueryBuilder -{ - @Test - public void testBuildSQL() - { - List columns = ImmutableList.of( - createBaseColumn("n_nationkey", 0, HIVE_INT, INTEGER, REGULAR, Optional.empty()), - createBaseColumn("n_name", 1, HIVE_STRING, VARCHAR, REGULAR, Optional.empty()), - createBaseColumn("n_regionkey", 2, HIVE_INT, INTEGER, REGULAR, Optional.empty())); - - // CSV - IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.CSV, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, TupleDomain.all()), - "SELECT s._1, s._2, s._3 FROM S3Object s"); - - TupleDomain tupleDomain = withColumnDomains(ImmutableMap.of( - columns.get(2), Domain.create(SortedRangeSet.copyOf(BIGINT, ImmutableList.of(Range.equal(BIGINT, 3L))), false))); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), - "SELECT s._1, s._2, s._3 FROM S3Object s WHERE (s._3 != '' AND CAST(s._3 AS INT) = 3)"); - - // JSON - queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.JSON, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, TupleDomain.all()), - "SELECT s.n_nationkey, s.n_name, s.n_regionkey FROM S3Object s"); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), - "SELECT s.n_nationkey, s.n_name, s.n_regionkey FROM S3Object s WHERE (s.n_regionkey IS NOT NULL AND CAST(s.n_regionkey AS INT) = 3)"); - } - - @Test - public void testEmptyColumns() - { - // CSV - IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.CSV, Optional.empty()); - assertEquals(queryBuilder.buildSql(ImmutableList.of(), TupleDomain.all()), "SELECT ' ' FROM S3Object s"); - - // JSON - queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.JSON, Optional.empty()); - assertEquals(queryBuilder.buildSql(ImmutableList.of(), TupleDomain.all()), "SELECT ' ' FROM S3Object s"); - } - - @Test - public void testDecimalColumns() - { - TypeManager typeManager = TESTING_TYPE_MANAGER; - List columns = ImmutableList.of( - createBaseColumn("quantity", 0, HiveType.valueOf("decimal(20,0)"), DecimalType.createDecimalType(), REGULAR, Optional.empty()), - createBaseColumn("extendedprice", 1, HiveType.valueOf("decimal(20,2)"), DecimalType.createDecimalType(), REGULAR, Optional.empty()), - createBaseColumn("discount", 2, HiveType.valueOf("decimal(10,2)"), DecimalType.createDecimalType(), REGULAR, Optional.empty())); - DecimalType decimalType = DecimalType.createDecimalType(10, 2); - TupleDomain tupleDomain = withColumnDomains( - ImmutableMap.of( - columns.get(0), Domain.create(ofRanges(Range.lessThan(DecimalType.createDecimalType(20, 0), longDecimal("50"))), false), - columns.get(1), Domain.create(ofRanges(Range.equal(HiveType.valueOf("decimal(20,2)").getType(typeManager), longDecimal("0.05"))), false), - columns.get(2), Domain.create(ofRanges(Range.range(decimalType, shortDecimal("0.0"), true, shortDecimal("0.02"), true)), false))); - - // CSV - IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager, S3SelectDataType.CSV, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s._1, s._2, s._3 FROM S3Object s"); - - // JSON - queryBuilder = new IonSqlQueryBuilder(typeManager, S3SelectDataType.JSON, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s.quantity, s.extendedprice, s.discount FROM S3Object s"); - } - - @Test - public void testDateColumn() - { - List columns = ImmutableList.of( - createBaseColumn("t1", 0, HIVE_TIMESTAMP, TIMESTAMP_MILLIS, REGULAR, Optional.empty()), - createBaseColumn("t2", 1, HIVE_DATE, DATE, REGULAR, Optional.empty())); - TupleDomain tupleDomain = withColumnDomains(ImmutableMap.of( - columns.get(1), Domain.create(SortedRangeSet.copyOf(DATE, ImmutableList.of(Range.equal(DATE, (long) DateTimeUtils.parseDate("2001-08-22")))), false))); - - // CSV - IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.CSV, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s._1, s._2 FROM S3Object s WHERE (s._2 != '' AND s._2 = '2001-08-22')"); - - // JSON - queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.JSON, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s.t1, s.t2 FROM S3Object s WHERE (s.t2 IS NOT NULL AND s.t2 = '2001-08-22')"); - } - - @Test - public void testNotPushDoublePredicates() - { - List columns = ImmutableList.of( - createBaseColumn("quantity", 0, HIVE_INT, INTEGER, REGULAR, Optional.empty()), - createBaseColumn("extendedprice", 1, HIVE_DOUBLE, DOUBLE, REGULAR, Optional.empty()), - createBaseColumn("discount", 2, HIVE_DOUBLE, DOUBLE, REGULAR, Optional.empty())); - TupleDomain tupleDomain = withColumnDomains( - ImmutableMap.of( - columns.get(0), Domain.create(ofRanges(Range.lessThan(BIGINT, 50L)), false), - columns.get(1), Domain.create(ofRanges(Range.equal(DOUBLE, 0.05)), false), - columns.get(2), Domain.create(ofRanges(Range.range(DOUBLE, 0.0, true, 0.02, true)), false))); - - // CSV - IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.CSV, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s._1, s._2, s._3 FROM S3Object s WHERE (s._1 != '' AND CAST(s._1 AS INT) < 50)"); - - // JSON - queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.JSON, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s.quantity, s.extendedprice, s.discount FROM S3Object s WHERE (s.quantity IS NOT NULL AND CAST(s.quantity AS INT) < 50)"); - } - - @Test - public void testStringEscaping() - { - List columns = ImmutableList.of( - createBaseColumn("string", 0, HIVE_STRING, VARCHAR, REGULAR, Optional.empty())); - TupleDomain tupleDomain = withColumnDomains(ImmutableMap.of( - columns.get(0), - Domain.create(ValueSet.of(VARCHAR, Slices.utf8Slice("value with a ' quote")), false))); - - // CSV - IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.CSV, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s._1 FROM S3Object s WHERE (s._1 != '' AND s._1 = 'value with a '' quote')"); - - // JSON - queryBuilder = new IonSqlQueryBuilder(TESTING_TYPE_MANAGER, S3SelectDataType.JSON, Optional.empty()); - assertEquals(queryBuilder.buildSql(columns, tupleDomain), "SELECT s.string FROM S3Object s WHERE (s.string IS NOT NULL AND s.string = 'value with a '' quote')"); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestS3SelectPushdown.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestS3SelectPushdown.java deleted file mode 100644 index ceb44e81b4ad2..0000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3select/TestS3SelectPushdown.java +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.s3select; - -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.Partition; -import io.trino.plugin.hive.metastore.Storage; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import io.trino.spi.connector.ConnectorSession; -import io.trino.testing.TestingConnectorSession; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hive.hcatalog.data.JsonSerDe; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.Properties; - -import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_INPUT_FORMAT; -import static io.trino.plugin.hive.HiveMetadata.SKIP_FOOTER_COUNT_KEY; -import static io.trino.plugin.hive.HiveMetadata.SKIP_HEADER_COUNT_KEY; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; -import static io.trino.plugin.hive.HiveType.HIVE_BINARY; -import static io.trino.plugin.hive.HiveType.HIVE_BOOLEAN; -import static io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema; -import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; -import static io.trino.plugin.hive.s3select.S3SelectPushdown.isCompressionCodecSupported; -import static io.trino.plugin.hive.s3select.S3SelectPushdown.isSplittable; -import static io.trino.plugin.hive.s3select.S3SelectPushdown.shouldEnablePushdownForTable; -import static io.trino.spi.session.PropertyMetadata.booleanProperty; -import static java.util.Collections.emptyList; -import static java.util.Collections.emptyMap; -import static java.util.Collections.singletonList; -import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -public class TestS3SelectPushdown -{ - private static final String S3_SELECT_PUSHDOWN_ENABLED = "s3_select_pushdown_enabled"; - - private ConnectorSession session; - private Table table; - private Partition partition; - private Storage storage; - private Column column; - private Properties schema; - - @BeforeClass - public void setUp() - { - session = TestingConnectorSession.builder() - .setPropertyMetadata(List.of(booleanProperty( - S3_SELECT_PUSHDOWN_ENABLED, - "S3 Select pushdown enabled", - true, - false))) - .setPropertyValues(Map.of(S3_SELECT_PUSHDOWN_ENABLED, true)) - .build(); - - column = new Column("column", HIVE_BOOLEAN, Optional.empty()); - - storage = Storage.builder() - .setStorageFormat(fromHiveStorageFormat(TEXTFILE)) - .setLocation("location") - .build(); - - partition = new Partition( - "db", - "table", - emptyList(), - storage, - singletonList(column), - emptyMap()); - - table = new Table( - "db", - "table", - Optional.of("owner"), - "type", - storage, - singletonList(column), - emptyList(), - emptyMap(), - Optional.empty(), - Optional.empty(), - OptionalLong.empty()); - - schema = getHiveSchema(partition, table); - } - - @Test - public void testIsCompressionCodecSupported() - { - assertTrue(isCompressionCodecSupported(schema, "s3://fakeBucket/fakeObject.gz")); - assertTrue(isCompressionCodecSupported(schema, "s3://fakeBucket/fakeObject")); - assertFalse(isCompressionCodecSupported(schema, "s3://fakeBucket/fakeObject.lz4")); - assertFalse(isCompressionCodecSupported(schema, "s3://fakeBucket/fakeObject.snappy")); - assertTrue(isCompressionCodecSupported(schema, "s3://fakeBucket/fakeObject.bz2")); - } - - @Test - public void testShouldEnableSelectPushdown() - { - assertTrue(shouldEnablePushdownForTable(session, table, "s3://fakeBucket/fakeObject", Optional.empty())); - assertTrue(shouldEnablePushdownForTable(session, table, "s3://fakeBucket/fakeObject", Optional.of(partition))); - } - - @Test - public void testShouldNotEnableSelectPushdownWhenDisabledOnSession() - { - ConnectorSession testSession = TestingConnectorSession.builder() - .setPropertyMetadata(List.of(booleanProperty( - S3_SELECT_PUSHDOWN_ENABLED, - "S3 Select pushdown enabled", - false, - false))) - .setPropertyValues(Map.of(S3_SELECT_PUSHDOWN_ENABLED, false)) - .build(); - assertFalse(shouldEnablePushdownForTable(testSession, table, "", Optional.empty())); - } - - @Test - public void testShouldNotEnableSelectPushdownWhenIsNotS3StoragePath() - { - assertFalse(shouldEnablePushdownForTable(session, table, null, Optional.empty())); - assertFalse(shouldEnablePushdownForTable(session, table, "", Optional.empty())); - assertFalse(shouldEnablePushdownForTable(session, table, "s3:/invalid", Optional.empty())); - assertFalse(shouldEnablePushdownForTable(session, table, "s3:/invalid", Optional.of(partition))); - } - - @Test - public void testShouldNotEnableSelectPushdownWhenIsNotSupportedSerde() - { - Storage newStorage = Storage.builder() - .setStorageFormat(fromHiveStorageFormat(ORC)) - .setLocation("location") - .build(); - Table newTable = new Table( - "db", - "table", - Optional.of("owner"), - "type", - newStorage, - singletonList(column), - emptyList(), - emptyMap(), - Optional.empty(), - Optional.empty(), - OptionalLong.empty()); - - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.empty())); - - Partition newPartition = new Partition("db", - "table", - emptyList(), - newStorage, - singletonList(column), - emptyMap()); - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.of(newPartition))); - } - - @Test - public void testShouldNotEnableSelectPushdownWhenIsNotSupportedInputFormat() - { - Storage newStorage = Storage.builder() - .setStorageFormat(StorageFormat.create(LazySimpleSerDe.class.getName(), "inputFormat", "outputFormat")) - .setLocation("location") - .build(); - Table newTable = new Table("db", - "table", - Optional.of("owner"), - "type", - newStorage, - singletonList(column), - emptyList(), - emptyMap(), - Optional.empty(), - Optional.empty(), - OptionalLong.empty()); - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.empty())); - - Partition newPartition = new Partition("db", - "table", - emptyList(), - newStorage, - singletonList(column), - emptyMap()); - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.of(newPartition))); - - newStorage = Storage.builder() - .setStorageFormat(StorageFormat.create(LazySimpleSerDe.class.getName(), TextInputFormat.class.getName(), "outputFormat")) - .setLocation("location") - .build(); - newTable = new Table("db", - "table", - Optional.of("owner"), - "type", - newStorage, - singletonList(column), - emptyList(), - Map.of(SKIP_HEADER_COUNT_KEY, "1"), - Optional.empty(), - Optional.empty(), - OptionalLong.empty()); - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.empty())); - - newTable = new Table("db", - "table", - Optional.of("owner"), - "type", - newStorage, - singletonList(column), - emptyList(), - Map.of(SKIP_FOOTER_COUNT_KEY, "1"), - Optional.empty(), - Optional.empty(), - OptionalLong.empty()); - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.empty())); - } - - @Test - public void testShouldNotEnableSelectPushdownWhenColumnTypesAreNotSupported() - { - Column newColumn = new Column("column", HIVE_BINARY, Optional.empty()); - Table newTable = new Table("db", - "table", - Optional.of("owner"), - "type", - storage, - singletonList(newColumn), - emptyList(), - emptyMap(), - Optional.empty(), - Optional.empty(), - OptionalLong.empty()); - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.empty())); - - Partition newPartition = new Partition("db", - "table", - emptyList(), - storage, - singletonList(newColumn), - emptyMap()); - assertFalse(shouldEnablePushdownForTable(session, newTable, "s3://fakeBucket/fakeObject", Optional.of(newPartition))); - } - - @Test - public void testShouldEnableSplits() - { - // Uncompressed CSV - assertTrue(isSplittable(true, schema, "s3://fakeBucket/fakeObject.csv")); - // Pushdown disabled - assertTrue(isSplittable(false, schema, "s3://fakeBucket/fakeObject.csv")); - // JSON - Properties jsonSchema = new Properties(); - jsonSchema.setProperty(FILE_INPUT_FORMAT, TextInputFormat.class.getName()); - jsonSchema.setProperty(SERIALIZATION_LIB, JsonSerDe.class.getName()); - assertTrue(isSplittable(true, jsonSchema, "s3://fakeBucket/fakeObject.json")); - } - - @Test - public void testShouldNotEnableSplits() - { - // Compressed file - assertFalse(isSplittable(true, schema, "s3://fakeBucket/fakeObject.gz")); - } - - @AfterClass(alwaysRun = true) - public void tearDown() - { - session = null; - table = null; - partition = null; - storage = null; - column = null; - schema = null; - } -}