Merge pull request ClickHouse#48299 from CurtizJ/do-not-build-unneces…

…sary-sets Do not build sets for skip indexes if they are not used
Altinity · Jun 19, 2023 · b9988bb · b9988bb
1 parent 91115cc
commit b9988bb
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 29 deletions.
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6070,51 +6070,48 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(
 }
 
 bool MergeTreeData::mayBenefitFromIndexForIn(
-    const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const
+    const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const
 {
     /// Make sure that the left side of the IN operator contain part of the key.
     /// If there is a tuple on the left side of the IN operator, at least one item of the tuple
-    ///  must be part of the key (probably wrapped by a chain of some acceptable functions).
+    /// must be part of the key (probably wrapped by a chain of some acceptable functions).
     const auto * left_in_operand_tuple = left_in_operand->as<ASTFunction>();
-    const auto & index_wrapper_factory = MergeTreeIndexFactory::instance();
-    if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
+    const auto & index_factory = MergeTreeIndexFactory::instance();
+    const auto & query_settings = query_context->getSettingsRef();
+
+    auto check_for_one_argument = [&](const auto & ast)
     {
-        for (const auto & item : left_in_operand_tuple->arguments->children)
+        if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(ast, metadata_snapshot))
+            return true;
+
+        if (query_settings.use_skip_indexes)
         {
-            if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item, metadata_snapshot))
-                return true;
             for (const auto & index : metadata_snapshot->getSecondaryIndices())
-                if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item))
-                    return true;
-            for (const auto & projection : metadata_snapshot->getProjections())
-            {
-                if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(item))
+                if (index_factory.get(index)->mayBenefitFromIndexForIn(ast))
                     return true;
-            }
         }
-        /// The tuple itself may be part of the primary key, so check that as a last resort.
-        if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot))
-            return true;
-        for (const auto & projection : metadata_snapshot->getProjections())
+
+        if (query_settings.allow_experimental_projection_optimization)
         {
-            if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
-                return true;
+            for (const auto & projection : metadata_snapshot->getProjections())
+                if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(ast))
+                    return true;
         }
+
         return false;
-    }
-    else
+    };
+
+    if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
     {
-        for (const auto & index : metadata_snapshot->getSecondaryIndices())
-            if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand))
+        for (const auto & item : left_in_operand_tuple->arguments->children)
+            if (check_for_one_argument(item))
                 return true;
 
-        for (const auto & projection : metadata_snapshot->getProjections())
-        {
-            if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
-                return true;
-        }
-        return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot);
+        /// The tuple itself may be part of the primary key
+        /// or skip index, so check that as a last resort.
     }
+
+    return check_for_one_argument(left_in_operand);
 }
 
 using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;

diff --git a/tests/performance/set_disable_skip_index.xml b/tests/performance/set_disable_skip_index.xml
@@ -0,0 +1,17 @@
+<test>
+    <create_query>
+        CREATE TABLE test_in_skip_idx
+        (
+            a UInt64,
+            s String,
+            INDEX idx s TYPE bloom_filter GRANULARITY 1
+        )
+        ENGINE = MergeTree() ORDER BY a
+    </create_query>
+
+    <fill_query>INSERT INTO test_in_skip_idx SELECT number, number FROM numbers(10000000)</fill_query>
+    <fill_query>OPTIMIZE TABLE test_in_skip_idx FINAL</fill_query>
+
+    <query>SELECT count() FROM test_in_skip_idx WHERE s IN (SELECT toString(number + 10000000) FROM numbers(100000)) SETTINGS use_skip_indexes = 0</query>
+    <drop_query>DROP TABLE IF EXISTS test_in_skip_idx</drop_query>
+</test>
diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.reference b/tests/queries/0_stateless/02707_skip_index_with_in.reference
diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.sql b/tests/queries/0_stateless/02707_skip_index_with_in.sql
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS t_skip_index_in;
+
+CREATE TABLE t_skip_index_in
+(
+    a String,
+    b String,
+    c String,
+    INDEX idx_c c TYPE bloom_filter GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY (a, b);
+
+INSERT INTO t_skip_index_in VALUES ('a', 'b', 'c');
+
+-- This query checks that set is not being built if indexes are not used,
+-- because with EXPLAIN the set will be built only for analysis of indexes.
+EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 0 FORMAT Null;
+EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
+
+DROP TABLE t_skip_index_in;