Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

23.3 backport of #48299 Do not build skip indices if not used #276

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 26 additions & 29 deletions src/Storages/MergeTree/MergeTreeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6070,51 +6070,48 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(
}

bool MergeTreeData::mayBenefitFromIndexForIn(
const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const
const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const
{
/// Make sure that the left side of the IN operator contain part of the key.
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple
/// must be part of the key (probably wrapped by a chain of some acceptable functions).
/// must be part of the key (probably wrapped by a chain of some acceptable functions).
const auto * left_in_operand_tuple = left_in_operand->as<ASTFunction>();
const auto & index_wrapper_factory = MergeTreeIndexFactory::instance();
if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
const auto & index_factory = MergeTreeIndexFactory::instance();
const auto & query_settings = query_context->getSettingsRef();

auto check_for_one_argument = [&](const auto & ast)
{
for (const auto & item : left_in_operand_tuple->arguments->children)
if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(ast, metadata_snapshot))
return true;

if (query_settings.use_skip_indexes)
{
if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item, metadata_snapshot))
return true;
for (const auto & index : metadata_snapshot->getSecondaryIndices())
if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item))
return true;
for (const auto & projection : metadata_snapshot->getProjections())
{
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(item))
if (index_factory.get(index)->mayBenefitFromIndexForIn(ast))
return true;
}
}
/// The tuple itself may be part of the primary key, so check that as a last resort.
if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot))
return true;
for (const auto & projection : metadata_snapshot->getProjections())

if (query_settings.allow_experimental_projection_optimization)
{
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
return true;
for (const auto & projection : metadata_snapshot->getProjections())
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(ast))
return true;
}

return false;
}
else
};

if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
{
for (const auto & index : metadata_snapshot->getSecondaryIndices())
if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand))
for (const auto & item : left_in_operand_tuple->arguments->children)
if (check_for_one_argument(item))
return true;

for (const auto & projection : metadata_snapshot->getProjections())
{
if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
return true;
}
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot);
/// The tuple itself may be part of the primary key
/// or skip index, so check that as a last resort.
}

return check_for_one_argument(left_in_operand);
}

using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;
Expand Down
17 changes: 17 additions & 0 deletions tests/performance/set_disable_skip_index.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<test>
<create_query>
CREATE TABLE test_in_skip_idx
(
a UInt64,
s String,
INDEX idx s TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree() ORDER BY a
</create_query>

<fill_query>INSERT INTO test_in_skip_idx SELECT number, number FROM numbers(10000000)</fill_query>
<fill_query>OPTIMIZE TABLE test_in_skip_idx FINAL</fill_query>

<query>SELECT count() FROM test_in_skip_idx WHERE s IN (SELECT toString(number + 10000000) FROM numbers(100000)) SETTINGS use_skip_indexes = 0</query>
<drop_query>DROP TABLE IF EXISTS test_in_skip_idx</drop_query>
</test>
Empty file.
20 changes: 20 additions & 0 deletions tests/queries/0_stateless/02707_skip_index_with_in.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
DROP TABLE IF EXISTS t_skip_index_in;

CREATE TABLE t_skip_index_in
(
a String,
b String,
c String,
INDEX idx_c c TYPE bloom_filter GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY (a, b);

INSERT INTO t_skip_index_in VALUES ('a', 'b', 'c');

-- This query checks that set is not being built if indexes are not used,
-- because with EXPLAIN the set will be built only for analysis of indexes.
EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 0 FORMAT Null;
EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }

DROP TABLE t_skip_index_in;