From b9988bb26f55571732acd4ce1574af4456e870dc Mon Sep 17 00:00:00 2001 From: robot-clickhouse-ci-1 <118761991+robot-clickhouse-ci-1@users.noreply.github.com> Date: Sat, 8 Apr 2023 21:25:06 +0200 Subject: [PATCH] Merge pull request #48299 from CurtizJ/do-not-build-unnecessary-sets Do not build sets for skip indexes if they are not used --- src/Storages/MergeTree/MergeTreeData.cpp | 55 +++++++++---------- tests/performance/set_disable_skip_index.xml | 17 ++++++ .../02707_skip_index_with_in.reference | 0 .../0_stateless/02707_skip_index_with_in.sql | 20 +++++++ 4 files changed, 63 insertions(+), 29 deletions(-) create mode 100644 tests/performance/set_disable_skip_index.xml create mode 100644 tests/queries/0_stateless/02707_skip_index_with_in.reference create mode 100644 tests/queries/0_stateless/02707_skip_index_with_in.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 85eacedb8776..09c76472c4eb 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6070,51 +6070,48 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions( } bool MergeTreeData::mayBenefitFromIndexForIn( - const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const + const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const { /// Make sure that the left side of the IN operator contain part of the key. /// If there is a tuple on the left side of the IN operator, at least one item of the tuple - /// must be part of the key (probably wrapped by a chain of some acceptable functions). + /// must be part of the key (probably wrapped by a chain of some acceptable functions). const auto * left_in_operand_tuple = left_in_operand->as(); - const auto & index_wrapper_factory = MergeTreeIndexFactory::instance(); - if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple") + const auto & index_factory = MergeTreeIndexFactory::instance(); + const auto & query_settings = query_context->getSettingsRef(); + + auto check_for_one_argument = [&](const auto & ast) { - for (const auto & item : left_in_operand_tuple->arguments->children) + if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(ast, metadata_snapshot)) + return true; + + if (query_settings.use_skip_indexes) { - if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item, metadata_snapshot)) - return true; for (const auto & index : metadata_snapshot->getSecondaryIndices()) - if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item)) - return true; - for (const auto & projection : metadata_snapshot->getProjections()) - { - if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(item)) + if (index_factory.get(index)->mayBenefitFromIndexForIn(ast)) return true; - } } - /// The tuple itself may be part of the primary key, so check that as a last resort. - if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot)) - return true; - for (const auto & projection : metadata_snapshot->getProjections()) + + if (query_settings.allow_experimental_projection_optimization) { - if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand)) - return true; + for (const auto & projection : metadata_snapshot->getProjections()) + if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(ast)) + return true; } + return false; - } - else + }; + + if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple") { - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand)) + for (const auto & item : left_in_operand_tuple->arguments->children) + if (check_for_one_argument(item)) return true; - for (const auto & projection : metadata_snapshot->getProjections()) - { - if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand)) - return true; - } - return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot); + /// The tuple itself may be part of the primary key + /// or skip index, so check that as a last resort. } + + return check_for_one_argument(left_in_operand); } using PartitionIdToMaxBlock = std::unordered_map; diff --git a/tests/performance/set_disable_skip_index.xml b/tests/performance/set_disable_skip_index.xml new file mode 100644 index 000000000000..5769f30eac93 --- /dev/null +++ b/tests/performance/set_disable_skip_index.xml @@ -0,0 +1,17 @@ + + + CREATE TABLE test_in_skip_idx + ( + a UInt64, + s String, + INDEX idx s TYPE bloom_filter GRANULARITY 1 + ) + ENGINE = MergeTree() ORDER BY a + + + INSERT INTO test_in_skip_idx SELECT number, number FROM numbers(10000000) + OPTIMIZE TABLE test_in_skip_idx FINAL + + SELECT count() FROM test_in_skip_idx WHERE s IN (SELECT toString(number + 10000000) FROM numbers(100000)) SETTINGS use_skip_indexes = 0 + DROP TABLE IF EXISTS test_in_skip_idx + diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.reference b/tests/queries/0_stateless/02707_skip_index_with_in.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.sql b/tests/queries/0_stateless/02707_skip_index_with_in.sql new file mode 100644 index 000000000000..4767619cee19 --- /dev/null +++ b/tests/queries/0_stateless/02707_skip_index_with_in.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS t_skip_index_in; + +CREATE TABLE t_skip_index_in +( + a String, + b String, + c String, + INDEX idx_c c TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY (a, b); + +INSERT INTO t_skip_index_in VALUES ('a', 'b', 'c'); + +-- This query checks that set is not being built if indexes are not used, +-- because with EXPLAIN the set will be built only for analysis of indexes. +EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 0 FORMAT Null; +EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + +DROP TABLE t_skip_index_in;