From c0d69ec306471822b742070b86f2fe796d460610 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Fri, 13 Sep 2019 16:08:53 +0200 Subject: [PATCH 1/2] Filter out Hive information_schema and sys `information_schema` will be inaccessible anyway. `sys` could be accessible, but - it doesn't work (contains JdbcStorageHandler tables and Hive views) - exposing it may require proper handling in access control. --- .../prestosql/plugin/hive/HiveMetadata.java | 31 +- .../prestosql/tests/hive/TestHiveSchema.java | 285 ++++++++++++++++++ 2 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveSchema.java diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java index 5d33075ec07b8..81bcc58628662 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java @@ -324,13 +324,18 @@ public SemiTransactionalHiveMetastore getMetastore() @Override public List listSchemaNames(ConnectorSession session) { - return metastore.getAllDatabases(); + return metastore.getAllDatabases().stream() + .filter(HiveMetadata::filterSchema) + .collect(toImmutableList()); } @Override public HiveTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) { requireNonNull(tableName, "tableName is null"); + if (!filterSchema(tableName.getSchemaName())) { + return null; + } Optional table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()); if (!table.isPresent()) { return null; @@ -646,11 +651,29 @@ public List listTables(ConnectorSession session, Optional listSchemas(ConnectorSession session, Optional schemaName) { if (schemaName.isPresent()) { + if (!filterSchema(schemaName.get())) { + return ImmutableList.of(); + } return ImmutableList.of(schemaName.get()); } return listSchemaNames(session); } + private static boolean filterSchema(String schemaName) + { + if ("information_schema".equals(schemaName)) { + // For things like listing columns in information_schema.columns table, we need to explicitly filter out Hive's own information_schema. + // TODO https://github.com/prestosql/presto/issues/1559 this should be filtered out in engine. + return false; + } + if ("sys".equals(schemaName)) { + // Hive 3's `sys` schema contains no objects we can handle, so there is no point in exposing it. + // Also, exposing it may require proper handling in access control. + return false; + } + return true; + } + @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { @@ -704,6 +727,9 @@ private List listTables(ConnectorSession session, SchemaTablePr return listTables(session, prefix.getSchema()); } SchemaTableName tableName = prefix.toSchemaTableName(); + if (!filterSchema(tableName.getSchemaName())) { + return ImmutableList.of(); + } try { if (!metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).isPresent()) { return ImmutableList.of(); @@ -1718,6 +1744,9 @@ public List listViews(ConnectorSession session, Optional getView(ConnectorSession session, SchemaTableName viewName) { + if (!filterSchema(viewName.getSchemaName())) { + return Optional.empty(); + } return metastore.getTable(new HiveIdentity(session), viewName.getSchemaName(), viewName.getTableName()) .flatMap(view -> { if (isPrestoView(view)) { diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveSchema.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveSchema.java new file mode 100644 index 0000000000000..d742796415c6f --- /dev/null +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveSchema.java @@ -0,0 +1,285 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.tests.hive; + +import com.google.common.collect.ImmutableList; +import io.prestosql.tempto.AfterTestWithContext; +import io.prestosql.tempto.BeforeTestWithContext; +import io.prestosql.tempto.ProductTest; +import io.prestosql.tempto.assertions.QueryAssert; +import io.prestosql.tempto.query.QueryExecutionException; +import io.prestosql.tempto.query.QueryResult; +import org.assertj.core.api.Assertions; +import org.assertj.core.api.Condition; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.google.common.base.Strings.nullToEmpty; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.prestosql.tempto.assertions.QueryAssert.Row.row; +import static io.prestosql.tempto.assertions.QueryAssert.assertThat; +import static io.prestosql.tests.TestGroups.STORAGE_FORMATS; +import static io.prestosql.tests.utils.QueryExecutors.onPresto; +import static java.util.Objects.requireNonNull; + +public class TestHiveSchema + extends ProductTest +{ + @BeforeTestWithContext + public void setUp() + { + // make sure hive.default schema is not empty + onPresto().executeQuery("DROP TABLE IF EXISTS hive.default.test_sys_schema_disabled_table_in_default"); + onPresto().executeQuery("CREATE TABLE hive.default.test_sys_schema_disabled_table_in_default(a bigint)"); + } + + @AfterTestWithContext + public void tearDown() + { + onPresto().executeQuery("DROP TABLE hive.default.test_sys_schema_disabled_table_in_default"); + } + + // Note: this test is run on various Hive versions. Hive before 3 did not have `sys` schema, but it does not hurt to run the test there too. + @Test(groups = STORAGE_FORMATS) + public void testSysSchemaFilteredOut() + { + // SHOW SCHEMAS + assertThat(onPresto().executeQuery("SHOW SCHEMAS FROM hive")) + .satisfies(containsFirstColumnValue("information_schema")) + .satisfies(containsFirstColumnValue("default")) + .doesNotHave(containsFirstColumnValue("sys")); + + // SHOW TABLES + assertThat(() -> onPresto().executeQuery("SHOW TABLES FROM hive.sys")) + .failsWithMessage("line 1:1: Schema 'sys' does not exist"); + + // SHOW COLUMNS + assertThat(() -> onPresto().executeQuery("SHOW COLUMNS FROM hive.sys.version")) // sys.version exists in Hive 3 and is a view + .failsWithMessage("line 1:1: Table 'hive.sys.version' does not exist"); + assertThat(() -> onPresto().executeQuery("SHOW COLUMNS FROM hive.sys.table_params")) // sys.table_params exists in Hive 3 and is a table + .failsWithMessage("line 1:1: Table 'hive.sys.table_params' does not exist"); + + // DESCRIBE + assertThat(() -> onPresto().executeQuery("DESCRIBE hive.sys.version")) // sys.version exists in Hive 3 and is a view + .failsWithMessage("line 1:1: Table 'hive.sys.version' does not exist"); + assertThat(() -> onPresto().executeQuery("DESCRIBE hive.sys.table_params")) // sys.table_params exists in Hive 3 and is a table + .failsWithMessage("line 1:1: Table 'hive.sys.table_params' does not exist"); + + // information_schema.schemata + assertThat(onPresto().executeQuery("SELECT schema_name FROM information_schema.schemata")) + .satisfies(containsFirstColumnValue("information_schema")) + .satisfies(containsFirstColumnValue("default")) + .doesNotHave(containsFirstColumnValue("sys")); + + // information_schema.tables + assertThat(onPresto().executeQuery("SELECT DISTINCT table_schema FROM information_schema.tables")) + .satisfies(containsFirstColumnValue("information_schema")) + .satisfies(containsFirstColumnValue("default")) + .doesNotHave(containsFirstColumnValue("sys")); + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.tables WHERE table_schema = 'sys'")) + .hasNoRows(); + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.tables WHERE table_schema = 'sys' AND table_name = 'version'")) // sys.version exists in Hive 3 + .hasNoRows(); + + // information_schema.columns -- it has a special handling path in metadata, which also depends on query predicates + assertThat(onPresto().executeQuery("SELECT DISTINCT table_schema FROM information_schema.columns")) + .satisfies(containsFirstColumnValue("information_schema")) + .satisfies(containsFirstColumnValue("default")) + .doesNotHave(containsFirstColumnValue("sys")); + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.columns WHERE table_schema = 'sys'")) + .hasNoRows(); + assertThat(onPresto().executeQuery("SELECT column_name FROM information_schema.columns WHERE table_schema = 'sys' AND table_name = 'version'")) // sys.version exists in Hive 3 + .hasNoRows(); + + // information_schema.table_privileges -- it has a special handling path in metadata, which also depends on query predicates + if (tablePrivilegesSupported()) { + assertThat(onPresto().executeQuery("SELECT DISTINCT table_schema FROM information_schema.table_privileges")) + .doesNotHave(containsFirstColumnValue("information_schema")) + .satisfies(containsFirstColumnValue("default")) + .doesNotHave(containsFirstColumnValue("sys")); + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.table_privileges WHERE table_schema = 'sys'")) + .hasNoRows(); + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.table_privileges WHERE table_schema = 'sys' AND table_name = 'version'")) // sys.version exists in Hive 3 + .hasNoRows(); + } + + // SELECT + assertThat(() -> onPresto().executeQuery("SELECT * FROM hive.sys.version")) // sys.version exists in Hive 3 and is a view + .failsWithMessage("line 1:15: Schema sys does not exist"); + assertThat(() -> onPresto().executeQuery("SELECT * FROM hive.sys.table_params")) // sys.table_params exists in Hive 3 and is a table + .failsWithMessage("line 1:15: Schema sys does not exist"); + } + + // Note: this test is run on various Hive versions. Hive before 3 did not have `information_schema` schema, but it does not hurt to run the test there too. + @Test(groups = STORAGE_FORMATS) + public void testHiveInformationSchemaFilteredOut() + { + List allInformationSchemaTables = ImmutableList.builder() + // In particular, no column_privileges which exists in Hive 3's information_schema + .add("columns") + .add("tables") + .add("views") + .add("schemata") + .add("table_privileges") + .add("roles") + .add("applicable_roles") + .add("enabled_roles") + .build(); + List allInformationSchemaTablesAsRows = allInformationSchemaTables.stream() + .map(QueryAssert.Row::row) + .collect(toImmutableList()); + + // This test is run in various setups and we may or may not have access to hive.information_schema.roles table + List allInformationSchemaTablesExceptRoles = allInformationSchemaTables.stream() + .filter(tableName -> !tableName.equals("roles")) + .collect(toImmutableList()); + List allInformationSchemaTablesExceptRolesAsRows = allInformationSchemaTablesExceptRoles.stream() + .map(QueryAssert.Row::row) + .collect(toImmutableList()); + + // SHOW SCHEMAS + assertThat(onPresto().executeQuery("SHOW SCHEMAS FROM hive")) + .satisfies(containsFirstColumnValue("information_schema")); + + // SHOW TABLES + assertThat(onPresto().executeQuery("SHOW TABLES FROM hive.information_schema")) + .satisfies(containsFirstColumnValue("tables")) + .satisfies(containsFirstColumnValue("columns")) + .satisfies(containsFirstColumnValue("table_privileges")) + .doesNotHave(containsFirstColumnValue("column_privileges")); // Hive 3's information_schema has column_privileges view + + // SHOW COLUMNS + assertThat(onPresto().executeQuery("SHOW COLUMNS FROM hive.information_schema.columns")) + .satisfies(containsFirstColumnValue("table_catalog")) + .satisfies(containsFirstColumnValue("table_schema")) + .doesNotHave(containsFirstColumnValue("is_updatable")); // Hive 3's information_schema.columns has is_updatable column + + assertThat(() -> onPresto().executeQuery("SHOW COLUMNS FROM hive.information_schema.column_privileges")) // Hive 3's information_schema has column_privileges view + .failsWithMessage("line 1:1: Table 'hive.information_schema.column_privileges' does not exist"); + + // DESCRIBE + assertThat(onPresto().executeQuery("DESCRIBE hive.information_schema.columns")) + .satisfies(containsFirstColumnValue("table_catalog")) + .satisfies(containsFirstColumnValue("table_schema")) + .satisfies(containsFirstColumnValue("column_name")) + .doesNotHave(containsFirstColumnValue("is_updatable")); // Hive 3's information_schema.columns has is_updatable column + + assertThat(() -> onPresto().executeQuery("DESCRIBE hive.information_schema.column_privileges")) // Hive 3's information_schema has column_privileges view + .failsWithMessage("line 1:1: Table 'hive.information_schema.column_privileges' does not exist"); + + // information_schema.schemata + assertThat(onPresto().executeQuery("SELECT schema_name FROM information_schema.schemata")) + .satisfies(containsFirstColumnValue("information_schema")); + + // information_schema.tables + assertThat(onPresto().executeQuery("SELECT DISTINCT table_schema FROM information_schema.tables")) + .satisfies(containsFirstColumnValue("information_schema")); + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.tables WHERE table_schema = 'information_schema'")) + .containsOnly(allInformationSchemaTablesAsRows); + Assertions.assertThat(onPresto().executeQuery("SELECT table_schema, table_name FROM information_schema.tables").rows().stream() + .filter(row -> row.get(0).equals("information_schema")) + .map(row -> (String) row.get(1))) + .containsOnly(allInformationSchemaTables.toArray(new String[0])); + // information_schema.column_privileges exists in Hive 3 + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.tables WHERE table_schema = 'information_schema' AND table_name = 'column_privileges'")) + .hasNoRows(); + + // information_schema.columns -- it has a special handling path in metadata, which also depends on query predicates + assertThat(onPresto().executeQuery("SELECT DISTINCT table_schema FROM information_schema.columns")) + .satisfies(containsFirstColumnValue("information_schema")); + assertThat(onPresto().executeQuery("SELECT DISTINCT table_name FROM information_schema.columns WHERE table_schema = 'information_schema' AND table_name != 'roles'")) + .containsOnly(allInformationSchemaTablesExceptRolesAsRows); + Assertions.assertThat(onPresto().executeQuery("SELECT table_schema, table_name, column_name FROM information_schema.columns").rows().stream() + .filter(row -> row.get(0).equals("information_schema")) + .map(row -> (String) row.get(1)) + .filter(tableName -> !tableName.equals("roles")) + .distinct()) + .containsOnly(allInformationSchemaTablesExceptRoles.toArray(new String[0])); + assertThat(onPresto().executeQuery("SELECT column_name FROM information_schema.columns WHERE table_schema = 'information_schema' AND table_name = 'columns'")) + .containsOnly( + // In particular, no is_updatable column which exists in Hive 3's information_schema.columns + row("table_catalog"), + row("table_schema"), + row("table_name"), + row("column_name"), + row("ordinal_position"), + row("column_default"), + row("is_nullable"), + row("data_type")); + // information_schema.column_privileges exists in Hive 3 + assertThat(onPresto().executeQuery("SELECT column_name FROM information_schema.columns WHERE table_schema = 'information_schema' AND table_name = 'column_privileges'")) + .hasNoRows(); + + // information_schema.table_privileges -- it has a special handling path in metadata, which also depends on query predicates + if (tablePrivilegesSupported()) { + assertThat(onPresto().executeQuery("SELECT DISTINCT table_schema FROM information_schema.table_privileges")) + .satisfies(containsFirstColumnValue("default")) + .doesNotHave(containsFirstColumnValue("information_schema")); // tables in information_schema have no privileges + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.table_privileges WHERE table_schema = 'information_schema'")) + .hasNoRows(); // tables in information_schema have no privileges + Assertions.assertThat(onPresto().executeQuery("SELECT table_schema, table_name, privilege_type FROM information_schema.table_privileges").rows().stream() + .filter(row -> row.get(0).equals("information_schema")) + .map(row -> (String) row.get(1))) + .isEmpty(); // tables in information_schema have no privileges + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.table_privileges WHERE table_schema = 'information_schema' AND table_name = 'columns'")) + .hasNoRows(); + // information_schema.column_privileges exists in Hive 3 + assertThat(onPresto().executeQuery("SELECT table_name FROM information_schema.table_privileges WHERE table_schema = 'information_schema' AND table_name = 'column_privileges'")) + .hasNoRows(); + } + + // SELECT + assertThat(() -> onPresto().executeQuery("SELECT * FROM hive.information_schema.column_privileges")) // information_schema.column_privileges exists in Hive 3 + .failsWithMessage("line 1:15: Table hive.information_schema.column_privileges does not exist"); + } + + /** Returns whether table privileges are supported in current setup. */ + private boolean tablePrivilegesSupported() + { + try { + onPresto().executeQuery("SELECT * FROM information_schema.table_privileges"); + return true; + } + catch (QueryExecutionException e) { + if (nullToEmpty(e.getMessage()).endsWith(": This connector does not support table privileges")) { + return false; + } + throw e; + } + } + + /** + * @apiNote The expected use context is in negative matching. This is why this method works on single values. + * When matching full rows, it would be possible to have false-positive results. + */ + private static Condition containsFirstColumnValue(T value) + { + requireNonNull(value, "value is null"); + return new Condition<>( + queryResult -> { + List values = queryResult.column(1); + if (!values.isEmpty()) { + // When contains() is used in a negative context (doesNotHave(...)), it could be possible to get false-positives when types are wrong. + Class expectedType = value.getClass(); + Class actualType = values.get(0).getClass(); + verify(expectedType.equals(actualType), "Expected QueryResult to contain %s values, but it contains %s", expectedType, actualType); + } + return values.contains(value); + }, + "Contains(%s)", + value); + } +} From ac1b60289c684f9bab235526d4bfda578c86703d Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 9 Mar 2020 12:57:52 +0100 Subject: [PATCH 2/2] Run storage_formats for kerberized HDFS without impersonation --- .../prestosql/plugin/hive/HiveMetadata.java | 30 +++++++++---------- .../bin/product-tests-suite-2.sh | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java index 81bcc58628662..f2fd617828488 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveMetadata.java @@ -659,21 +659,6 @@ private List listSchemas(ConnectorSession session, Optional sche return listSchemaNames(session); } - private static boolean filterSchema(String schemaName) - { - if ("information_schema".equals(schemaName)) { - // For things like listing columns in information_schema.columns table, we need to explicitly filter out Hive's own information_schema. - // TODO https://github.com/prestosql/presto/issues/1559 this should be filtered out in engine. - return false; - } - if ("sys".equals(schemaName)) { - // Hive 3's `sys` schema contains no objects we can handle, so there is no point in exposing it. - // Also, exposing it may require proper handling in access control. - return false; - } - return true; - } - @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { @@ -1777,6 +1762,21 @@ private boolean isHiveOrPrestoView(Table table) return table.getTableType().equals(TableType.VIRTUAL_VIEW.name()); } + private static boolean filterSchema(String schemaName) + { + if ("information_schema".equals(schemaName)) { + // For things like listing columns in information_schema.columns table, we need to explicitly filter out Hive's own information_schema. + // TODO https://github.com/prestosql/presto/issues/1559 this should be filtered out in engine. + return false; + } + if ("sys".equals(schemaName)) { + // Hive 3's `sys` schema contains no objects we can handle, so there is no point in exposing it. + // Also, exposing it may require proper handling in access control. + return false; + } + return true; + } + @Override public ConnectorTableHandle beginDelete(ConnectorSession session, ConnectorTableHandle tableHandle) { diff --git a/presto-product-tests/bin/product-tests-suite-2.sh b/presto-product-tests/bin/product-tests-suite-2.sh index a0f98758f91cb..aa1547f7722e2 100755 --- a/presto-product-tests/bin/product-tests-suite-2.sh +++ b/presto-product-tests/bin/product-tests-suite-2.sh @@ -11,7 +11,7 @@ presto-product-tests-launcher/bin/run-launcher test run \ presto-product-tests-launcher/bin/run-launcher test run \ --environment singlenode-kerberos-hdfs-no-impersonation \ - -- -g hdfs_no_impersonation \ + -- -g storage_formats,hdfs_no_impersonation \ || suite_exit_code=1 presto-product-tests-launcher/bin/run-launcher test run \