Skip to content

Commit

Permalink
Merge pull request ClickHouse#48299 from CurtizJ/do-not-build-unneces…
Browse files Browse the repository at this point in the history
…sary-sets

Do not build sets for skip indexes if they are not used
  • Loading branch information
robot-clickhouse-ci-1 authored and Enmk committed Jun 19, 2023
1 parent 91115cc commit b9988bb
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 29 deletions.
55 changes: 26 additions & 29 deletions src/Storages/MergeTree/MergeTreeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6070,51 +6070,48 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(
}

bool MergeTreeData::mayBenefitFromIndexForIn(
const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const
const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const
{
/// Make sure that the left side of the IN operator contain part of the key.
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple
/// must be part of the key (probably wrapped by a chain of some acceptable functions).
/// must be part of the key (probably wrapped by a chain of some acceptable functions).
const auto * left_in_operand_tuple = left_in_operand->as<ASTFunction>();
const auto & index_wrapper_factory = MergeTreeIndexFactory::instance();
if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
const auto & index_factory = MergeTreeIndexFactory::instance();
const auto & query_settings = query_context->getSettingsRef();

auto check_for_one_argument = [&](const auto & ast)
{
for (const auto & item : left_in_operand_tuple->arguments->children)
if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(ast, metadata_snapshot))
return true;

if (query_settings.use_skip_indexes)
{
if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item, metadata_snapshot))
return true;
for (const auto & index : metadata_snapshot->getSecondaryIndices())
if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item))
return true;
for (const auto & projection : metadata_snapshot->getProjections())
{
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(item))
if (index_factory.get(index)->mayBenefitFromIndexForIn(ast))
return true;
}
}
/// The tuple itself may be part of the primary key, so check that as a last resort.
if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot))
return true;
for (const auto & projection : metadata_snapshot->getProjections())

if (query_settings.allow_experimental_projection_optimization)
{
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
return true;
for (const auto & projection : metadata_snapshot->getProjections())
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(ast))
return true;
}

return false;
}
else
};

if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
{
for (const auto & index : metadata_snapshot->getSecondaryIndices())
if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand))
for (const auto & item : left_in_operand_tuple->arguments->children)
if (check_for_one_argument(item))
return true;

for (const auto & projection : metadata_snapshot->getProjections())
{
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
return true;
}
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot);
/// The tuple itself may be part of the primary key
/// or skip index, so check that as a last resort.
}

return check_for_one_argument(left_in_operand);
}

using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;
Expand Down
17 changes: 17 additions & 0 deletions tests/performance/set_disable_skip_index.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<test>
<create_query>
CREATE TABLE test_in_skip_idx
(
a UInt64,
s String,
INDEX idx s TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree() ORDER BY a
</create_query>

<fill_query>INSERT INTO test_in_skip_idx SELECT number, number FROM numbers(10000000)</fill_query>
<fill_query>OPTIMIZE TABLE test_in_skip_idx FINAL</fill_query>

<query>SELECT count() FROM test_in_skip_idx WHERE s IN (SELECT toString(number + 10000000) FROM numbers(100000)) SETTINGS use_skip_indexes = 0</query>
<drop_query>DROP TABLE IF EXISTS test_in_skip_idx</drop_query>
</test>
Empty file.
20 changes: 20 additions & 0 deletions tests/queries/0_stateless/02707_skip_index_with_in.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
DROP TABLE IF EXISTS t_skip_index_in;

CREATE TABLE t_skip_index_in
(
a String,
b String,
c String,
INDEX idx_c c TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY (a, b);

INSERT INTO t_skip_index_in VALUES ('a', 'b', 'c');

-- This query checks that set is not being built if indexes are not used,
-- because with EXPLAIN the set will be built only for analysis of indexes.
EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 0 FORMAT Null;
EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }

DROP TABLE t_skip_index_in;

0 comments on commit b9988bb

Please sign in to comment.