From 6549777b5370f71cfc8a9b8c2a51d7d3beedbc63 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 10 Apr 2023 15:46:59 +0800 Subject: [PATCH] Minor refine of join (#7257) ref pingcap/tiflash#6233 --- dbms/src/Columns/ColumnUtils.cpp | 8 + dbms/src/Columns/ColumnUtils.h | 2 + .../DataStreams/NonJoinedBlockInputStream.cpp | 17 +- dbms/src/Interpreters/Expand.cpp | 13 +- dbms/src/Interpreters/Join.cpp | 124 +--------- dbms/src/Interpreters/Join.h | 8 +- dbms/src/Interpreters/JoinHashMap.cpp | 111 +++++++++ .../{JoinHashTable.h => JoinHashMap.h} | 5 +- dbms/src/Interpreters/JoinPartition.cpp | 222 +++++++++--------- dbms/src/Interpreters/JoinPartition.h | 10 +- 10 files changed, 260 insertions(+), 260 deletions(-) create mode 100644 dbms/src/Interpreters/JoinHashMap.cpp rename dbms/src/Interpreters/{JoinHashTable.h => JoinHashMap.h} (97%) diff --git a/dbms/src/Columns/ColumnUtils.cpp b/dbms/src/Columns/ColumnUtils.cpp index f8b90a9978c..df7e0b69d0e 100644 --- a/dbms/src/Columns/ColumnUtils.cpp +++ b/dbms/src/Columns/ColumnUtils.cpp @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include namespace DB { @@ -30,4 +32,10 @@ bool columnEqual(const ColumnPtr & expected, const ColumnPtr & actual, String & } return true; } +void convertColumnToNullable(ColumnWithTypeAndName & column) +{ + column.type = makeNullable(column.type); + if (column.column) + column.column = makeNullable(column.column); +} } // namespace DB diff --git a/dbms/src/Columns/ColumnUtils.h b/dbms/src/Columns/ColumnUtils.h index 2ef52cf429d..549449683d3 100644 --- a/dbms/src/Columns/ColumnUtils.h +++ b/dbms/src/Columns/ColumnUtils.h @@ -15,8 +15,10 @@ #pragma once #include +#include namespace DB { bool columnEqual(const ColumnPtr & expected, const ColumnPtr & actual, String & unequal_msg); +void convertColumnToNullable(ColumnWithTypeAndName & column); } // namespace DB diff --git a/dbms/src/DataStreams/NonJoinedBlockInputStream.cpp b/dbms/src/DataStreams/NonJoinedBlockInputStream.cpp index cb947c8be06..d46868ef037 100644 --- a/dbms/src/DataStreams/NonJoinedBlockInputStream.cpp +++ b/dbms/src/DataStreams/NonJoinedBlockInputStream.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include @@ -200,12 +201,12 @@ void NonJoinedBlockInputStream::fillColumnsUsingCurrentPartition( } if (parent.strictness == ASTTableJoin::Strictness::Any) { - switch (parent.type) + switch (parent.join_map_method) { -#define M(TYPE) \ - case JoinType::TYPE: \ +#define M(METHOD) \ + case JoinMapMethod::METHOD: \ fillColumns( \ - *partition->maps_any_full.TYPE, \ + *partition->maps_any_full.METHOD, \ num_columns_left, \ mutable_columns_left, \ num_columns_right, \ @@ -221,12 +222,12 @@ void NonJoinedBlockInputStream::fillColumnsUsingCurrentPartition( } else if (parent.strictness == ASTTableJoin::Strictness::All) { - switch (parent.type) + switch (parent.join_map_method) { -#define M(TYPE) \ - case JoinType::TYPE: \ +#define M(METHOD) \ + case JoinMapMethod::METHOD: \ fillColumns( \ - *partition->maps_all_full.TYPE, \ + *partition->maps_all_full.METHOD, \ num_columns_left, \ mutable_columns_left, \ num_columns_right, \ diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 3910efec76a..8a226978f9f 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include +#include #include #include #include @@ -22,16 +21,6 @@ namespace DB { -namespace -{ -void convertColumnToNullable(ColumnWithTypeAndName & column) -{ - column.type = makeNullable(column.type); - if (column.column) - column.column = makeNullable(column.column); -} -} // namespace - Expand::Expand(const DB::GroupingSets & gss) : group_sets_names(gss) { diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 9d948e2a428..e1f08ca161a 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -12,10 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include +#include #include #include #include @@ -111,13 +108,6 @@ ColumnRawPtrs extractAndMaterializeKeyColumns(const Block & block, Columns & mat const std::string Join::match_helper_prefix = "__left-semi-join-match-helper"; const DataTypePtr Join::match_helper_type = makeNullable(std::make_shared()); -void convertColumnToNullable(ColumnWithTypeAndName & column) -{ - column.type = makeNullable(column.type); - if (column.column) - column.column = makeNullable(column.column); -} - Join::Join( const Names & key_names_left_, const Names & key_names_right_, @@ -191,97 +181,11 @@ void Join::meetErrorImpl(const String & error_message_, std::unique_lock(column) - || (column->isColumnConst() && typeid_cast(&static_cast(column)->getDataColumn())); -} - -JoinType Join::chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes_) const -{ - const size_t keys_size = key_columns.size(); - - if (keys_size == 0) - return JoinType::CROSS; - - bool all_fixed = true; - size_t keys_bytes = 0; - key_sizes_.resize(keys_size); - for (size_t j = 0; j < keys_size; ++j) - { - if (!key_columns[j]->isFixedAndContiguous()) - { - all_fixed = false; - break; - } - key_sizes_[j] = key_columns[j]->sizeOfValueIfFixed(); - keys_bytes += key_sizes_[j]; - } - - /// If there is one numeric key that fits in 64 bits - if (keys_size == 1 && key_columns[0]->isNumeric()) - { - size_t size_of_field = key_columns[0]->sizeOfValueIfFixed(); - if (size_of_field == 1) - return JoinType::key8; - if (size_of_field == 2) - return JoinType::key16; - if (size_of_field == 4) - return JoinType::key32; - if (size_of_field == 8) - return JoinType::key64; - if (size_of_field == 16) - return JoinType::keys128; - throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR); - } - - /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys - if (all_fixed && keys_bytes <= 16) - return JoinType::keys128; - if (all_fixed && keys_bytes <= 32) - return JoinType::keys256; - - /// If there is single string key, use hash table of it's values. - if (keys_size == 1 && CanAsColumnString(key_columns[0])) - { - if (collators.empty() || !collators[0]) - return JoinType::key_strbin; - else - { - switch (collators[0]->getCollatorType()) - { - case TiDB::ITiDBCollator::CollatorType::UTF8MB4_BIN: - case TiDB::ITiDBCollator::CollatorType::UTF8_BIN: - case TiDB::ITiDBCollator::CollatorType::LATIN1_BIN: - case TiDB::ITiDBCollator::CollatorType::ASCII_BIN: - { - return JoinType::key_strbinpadding; - } - case TiDB::ITiDBCollator::CollatorType::BINARY: - { - return JoinType::key_strbin; - } - default: - { - // for CI COLLATION, use original way - return JoinType::key_string; - } - } - } - } - - if (keys_size == 1 && typeid_cast(key_columns[0])) - return JoinType::key_fixed_string; - - /// Otherwise, use serialized values as the key. - return JoinType::serialized; -} - size_t Join::getTotalRowCount() const { size_t res = 0; - if (type == JoinType::CROSS) + if (join_map_method == JoinMapMethod::CROSS) { res = total_input_build_rows; } @@ -304,7 +208,7 @@ size_t Join::getTotalByteCount() } else { - if (type == JoinType::CROSS) + if (join_map_method == JoinMapMethod::CROSS) { for (const auto & block : blocks) res += block.bytes(); @@ -344,7 +248,7 @@ void Join::setBuildConcurrencyAndInitJoinPartition(size_t build_concurrency_) partitions.reserve(build_concurrency); for (size_t i = 0; i < getBuildConcurrency(); ++i) { - partitions.push_back(std::make_unique(type, kind, strictness, max_block_size, log)); + partitions.push_back(std::make_unique(join_map_method, kind, strictness, max_block_size, log)); } } @@ -412,13 +316,13 @@ void Join::initBuild(const Block & sample_block, size_t build_concurrency_) if (unlikely(initialized)) throw Exception("Logical error: Join has been initialized", ErrorCodes::LOGICAL_ERROR); initialized = true; - type = chooseMethod(getKeyColumns(key_names_right, sample_block), key_sizes); + join_map_method = chooseJoinMapMethod(getKeyColumns(key_names_right, sample_block), key_sizes, collators); setBuildConcurrencyAndInitJoinPartition(build_concurrency_); build_sample_block = sample_block; build_spiller = std::make_unique(build_spill_config, false, build_concurrency_, build_sample_block, log); if (max_bytes_before_external_join > 0) { - if (type == JoinType::CROSS) + if (join_map_method == JoinMapMethod::CROSS) { /// todo support spill for cross join max_bytes_before_external_join = 0; @@ -1353,24 +1257,10 @@ Block Join::joinBlockNullAware(ProbeProcessInfo & probe_process_info) const { Block block = probe_process_info.block; - size_t keys_size = key_names_left.size(); - ColumnRawPtrs key_columns(keys_size); - /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. /// Note: this variable can't be removed because it will take smart pointers' lifecycle to the end of this function. Columns materialized_columns; - - /// Memoize key columns to work with. - for (size_t i = 0; i < keys_size; ++i) - { - key_columns[i] = block.getByName(key_names_left[i]).column.get(); - - if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns[i] = materialized_columns.back().get(); - } - } + ColumnRawPtrs key_columns = extractAndMaterializeKeyColumns(block, materialized_columns, key_names_left); /// Note that `extractAllKeyNullMap` must be done before `extractNestedColumnsAndNullMap` /// because `extractNestedColumnsAndNullMap` will change the nullable column to its nested column. diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 846dc2a796c..e23ec428508 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -291,9 +291,7 @@ class Join bool has_build_data_in_memory = false; private: - JoinType type = JoinType::EMPTY; - - JoinType chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes) const; + JoinMapMethod join_map_method = JoinMapMethod::EMPTY; Sizes key_sizes; @@ -395,6 +393,4 @@ struct RestoreInfo , probe_stream(probe_stream_){}; }; -void convertColumnToNullable(ColumnWithTypeAndName & column); - } // namespace DB diff --git a/dbms/src/Interpreters/JoinHashMap.cpp b/dbms/src/Interpreters/JoinHashMap.cpp new file mode 100644 index 00000000000..10f95ddbcde --- /dev/null +++ b/dbms/src/Interpreters/JoinHashMap.cpp @@ -0,0 +1,111 @@ +// Copyright 2023 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +namespace DB +{ +namespace +{ +bool canAsColumnString(const IColumn * column) +{ + return typeid_cast(column) + || (column->isColumnConst() && typeid_cast(&static_cast(column)->getDataColumn())); +} +} // namespace + +JoinMapMethod chooseJoinMapMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes, const TiDB::TiDBCollators & collators) +{ + const size_t keys_size = key_columns.size(); + + if (keys_size == 0) + return JoinMapMethod::CROSS; + + bool all_fixed = true; + size_t keys_bytes = 0; + key_sizes.resize(keys_size); + for (size_t j = 0; j < keys_size; ++j) + { + if (!key_columns[j]->isFixedAndContiguous()) + { + all_fixed = false; + break; + } + key_sizes[j] = key_columns[j]->sizeOfValueIfFixed(); + keys_bytes += key_sizes[j]; + } + + /// If there is one numeric key that fits in 64 bits + if (keys_size == 1 && key_columns[0]->isNumeric()) + { + size_t size_of_field = key_columns[0]->sizeOfValueIfFixed(); + if (size_of_field == 1) + return JoinMapMethod::key8; + if (size_of_field == 2) + return JoinMapMethod::key16; + if (size_of_field == 4) + return JoinMapMethod::key32; + if (size_of_field == 8) + return JoinMapMethod::key64; + if (size_of_field == 16) + return JoinMapMethod::keys128; + throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR); + } + + /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys + if (all_fixed && keys_bytes <= 16) + return JoinMapMethod::keys128; + if (all_fixed && keys_bytes <= 32) + return JoinMapMethod::keys256; + + /// If there is single string key, use hash table of it's values. + if (keys_size == 1 && canAsColumnString(key_columns[0])) + { + if (collators.empty() || !collators[0]) + return JoinMapMethod::key_strbin; + else + { + switch (collators[0]->getCollatorType()) + { + case TiDB::ITiDBCollator::CollatorType::UTF8MB4_BIN: + case TiDB::ITiDBCollator::CollatorType::UTF8_BIN: + case TiDB::ITiDBCollator::CollatorType::LATIN1_BIN: + case TiDB::ITiDBCollator::CollatorType::ASCII_BIN: + { + return JoinMapMethod::key_strbinpadding; + } + case TiDB::ITiDBCollator::CollatorType::BINARY: + { + return JoinMapMethod::key_strbin; + } + default: + { + // for CI COLLATION, use original way + return JoinMapMethod::key_string; + } + } + } + } + + if (keys_size == 1 && typeid_cast(key_columns[0])) + return JoinMapMethod::key_fixed_string; + + /// Otherwise, use serialized values as the key. + return JoinMapMethod::serialized; +} +} // namespace DB diff --git a/dbms/src/Interpreters/JoinHashTable.h b/dbms/src/Interpreters/JoinHashMap.h similarity index 97% rename from dbms/src/Interpreters/JoinHashTable.h rename to dbms/src/Interpreters/JoinHashMap.h index ae233f27403..a33129531a7 100644 --- a/dbms/src/Interpreters/JoinHashTable.h +++ b/dbms/src/Interpreters/JoinHashMap.h @@ -19,6 +19,7 @@ namespace DB { +using Sizes = std::vector; /// Reference to the row in block. struct RowRef { @@ -84,7 +85,7 @@ struct WithUsedFlag : Base M(keys256) \ M(serialized) -enum class JoinType +enum class JoinMapMethod { EMPTY, CROSS, @@ -165,4 +166,6 @@ using MapsAny = MapsTemplate>; using MapsAll = MapsTemplate>; using MapsAnyFull = MapsTemplate>; using MapsAllFull = MapsTemplate>; + +JoinMapMethod chooseJoinMapMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes, const TiDB::TiDBCollators & collators); } // namespace DB diff --git a/dbms/src/Interpreters/JoinPartition.cpp b/dbms/src/Interpreters/JoinPartition.cpp index ef731eea570..27aebb9b26a 100644 --- a/dbms/src/Interpreters/JoinPartition.cpp +++ b/dbms/src/Interpreters/JoinPartition.cpp @@ -63,18 +63,18 @@ void insertRowToList(RowRefList * list, RowRefList * elem, Block * stored_block, } template -static void initImpl(Maps & maps, JoinType type) +static void initImpl(Maps & maps, JoinMapMethod method) { - switch (type) + switch (method) { - case JoinType::EMPTY: + case JoinMapMethod::EMPTY: break; - case JoinType::CROSS: + case JoinMapMethod::CROSS: break; -#define M(TYPE) \ - case JoinType::TYPE: \ - maps.TYPE = std::make_unique(); \ +#define M(METHOD) \ + case JoinMapMethod::METHOD: \ + maps.METHOD = std::make_unique(); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -85,18 +85,18 @@ static void initImpl(Maps & maps, JoinType type) } template -static Map & getMapImpl(Maps & maps, JoinType type) +static Map & getMapImpl(Maps & maps, JoinMapMethod method) { void * ret = nullptr; - switch (type) + switch (method) { - case JoinType::EMPTY: - case JoinType::CROSS: + case JoinMapMethod::EMPTY: + case JoinMapMethod::CROSS: throw Exception("Should not reach here"); -#define M(TYPE) \ - case JoinType::TYPE: \ - ret = maps.TYPE.get(); \ +#define M(METHOD) \ + case JoinMapMethod::METHOD: \ + ret = maps.METHOD.get(); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -108,17 +108,17 @@ static Map & getMapImpl(Maps & maps, JoinType type) } template -static size_t getRowCountImpl(const Maps & maps, JoinType type) +static size_t getRowCountImpl(const Maps & maps, JoinMapMethod method) { - switch (type) + switch (method) { - case JoinType::EMPTY: + case JoinMapMethod::EMPTY: return 0; - case JoinType::CROSS: + case JoinMapMethod::CROSS: return 0; -#define M(NAME) \ - case JoinType::NAME: \ +#define M(NAME) \ + case JoinMapMethod::NAME: \ return maps.NAME ? maps.NAME->size() : 0; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -129,17 +129,17 @@ static size_t getRowCountImpl(const Maps & maps, JoinType type) } template -static size_t getByteCountImpl(const Maps & maps, JoinType type) +static size_t getByteCountImpl(const Maps & maps, JoinMapMethod method) { - switch (type) + switch (method) { - case JoinType::EMPTY: + case JoinMapMethod::EMPTY: return 0; - case JoinType::CROSS: + case JoinMapMethod::CROSS: return 0; -#define M(NAME) \ - case JoinType::NAME: \ +#define M(NAME) \ + case JoinMapMethod::NAME: \ return maps.NAME ? maps.NAME->getBufferSizeInBytes() : 0; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -150,17 +150,17 @@ static size_t getByteCountImpl(const Maps & maps, JoinType type) } template -static size_t clearMaps(Maps & maps, JoinType type) +static size_t clearMaps(Maps & maps, JoinMapMethod method) { size_t ret = 0; - switch (type) + switch (method) { - case JoinType::EMPTY: - case JoinType::CROSS: + case JoinMapMethod::EMPTY: + case JoinMapMethod::CROSS: ret = 0; break; #define M(NAME) \ - case JoinType::NAME: \ + case JoinMapMethod::NAME: \ if (maps.NAME) \ { \ ret = maps.NAME->getBufferSizeInBytes(); \ @@ -179,20 +179,20 @@ static size_t clearMaps(Maps & maps, JoinType type) size_t JoinPartition::getRowCount() { size_t ret = 0; - ret += getRowCountImpl(maps_any, join_type); - ret += getRowCountImpl(maps_all, join_type); - ret += getRowCountImpl(maps_any_full, join_type); - ret += getRowCountImpl(maps_all_full, join_type); + ret += getRowCountImpl(maps_any, join_map_method); + ret += getRowCountImpl(maps_all, join_map_method); + ret += getRowCountImpl(maps_any_full, join_map_method); + ret += getRowCountImpl(maps_all_full, join_map_method); return ret; } size_t JoinPartition::getHashMapAndPoolByteCount() { size_t ret = 0; - ret += getByteCountImpl(maps_any, join_type); - ret += getByteCountImpl(maps_all, join_type); - ret += getByteCountImpl(maps_any_full, join_type); - ret += getByteCountImpl(maps_all_full, join_type); + ret += getByteCountImpl(maps_any, join_map_method); + ret += getByteCountImpl(maps_all, join_map_method); + ret += getByteCountImpl(maps_any_full, join_map_method); + ret += getByteCountImpl(maps_all_full, join_map_method); ret += pool->size(); return ret; } @@ -205,16 +205,16 @@ void JoinPartition::initMap() if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) - initImpl(maps_any, join_type); + initImpl(maps_any, join_map_method); else - initImpl(maps_all, join_type); + initImpl(maps_all, join_map_method); } else { if (strictness == ASTTableJoin::Strictness::Any) - initImpl(maps_any_full, join_type); + initImpl(maps_any_full, join_map_method); else - initImpl(maps_all_full, join_type); + initImpl(maps_all_full, join_map_method); } } @@ -264,10 +264,10 @@ void JoinPartition::releasePartitionPoolAndHashMap(std::unique_lock { size_t released_bytes = pool->size(); pool.reset(); - released_bytes += clearMaps(maps_any, join_type); - released_bytes += clearMaps(maps_all, join_type); - released_bytes += clearMaps(maps_any_full, join_type); - released_bytes += clearMaps(maps_all_full, join_type); + released_bytes += clearMaps(maps_any, join_map_method); + released_bytes += clearMaps(maps_all, join_map_method); + released_bytes += clearMaps(maps_any_full, join_map_method); + released_bytes += clearMaps(maps_all_full, join_map_method); subMemoryUsage(released_bytes); } @@ -308,73 +308,73 @@ Blocks JoinPartition::trySpillProbePartition(bool force, size_t max_cached_data_ namespace { /// code for hash map insertion -template +template struct KeyGetterForTypeImpl; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodOneNumber; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodOneNumber; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodOneNumber; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodOneNumber; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodString; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodStringBin; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodStringBin; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodFixedString; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodKeysFixed; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodKeysFixed; }; template -struct KeyGetterForTypeImpl +struct KeyGetterForTypeImpl { using Type = ColumnsHashing::HashMethodSerialized; }; -template +template struct KeyGetterForType { using Value = typename Data::value_type; using Mapped_t = typename Data::mapped_type; using Mapped = std::conditional_t, const Mapped_t, Mapped_t>; - using Type = typename KeyGetterForTypeImpl::Type; + using Type = typename KeyGetterForTypeImpl::Type; }; /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. @@ -616,27 +616,27 @@ void insertBlockIntoMapsImpl( bool enable_fine_grained_shuffle, bool enable_join_spill) { - switch (join_partitions[stream_index]->getJoinType()) + switch (join_partitions[stream_index]->getJoinMapMethod()) { - case JoinType::EMPTY: + case JoinMapMethod::EMPTY: break; - case JoinType::CROSS: + case JoinMapMethod::CROSS: break; /// Do nothing. We have already saved block, and it is enough. -#define M(TYPE) \ - case JoinType::TYPE: \ - insertBlockIntoMapsImplType::Type, typename Maps::TYPE##Type>( \ - join_partitions, \ - rows, \ - key_columns, \ - key_sizes, \ - collators, \ - stored_block, \ - null_map, \ - stream_index, \ - insert_concurrency, \ - enable_fine_grained_shuffle, \ - enable_join_spill); \ +#define M(METHOD) \ + case JoinMapMethod::METHOD: \ + insertBlockIntoMapsImplType::Type, typename Maps::METHOD##Type>( \ + join_partitions, \ + rows, \ + key_columns, \ + key_sizes, \ + collators, \ + stored_block, \ + null_map, \ + stream_index, \ + insert_concurrency, \ + enable_fine_grained_shuffle, \ + enable_join_spill); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -654,16 +654,16 @@ Map & JoinPartition::getHashMap() if (getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) - return getMapImpl(maps_any_full, join_type); + return getMapImpl(maps_any_full, join_map_method); else - return getMapImpl(maps_all_full, join_type); + return getMapImpl(maps_all_full, join_map_method); } else { if (strictness == ASTTableJoin::Strictness::Any) - return getMapImpl(maps_any, join_type); + return getMapImpl(maps_any, join_map_method); else - return getMapImpl(maps_all, join_type); + return getMapImpl(maps_all, join_map_method); } } @@ -1354,25 +1354,25 @@ void JoinPartition::probeBlockImpl( ProbeProcessInfo & probe_process_info) { const auto & current_join_partition = join_partitions[probe_process_info.partition_index]; - auto type = current_join_partition->join_type; - switch (type) + auto method = current_join_partition->join_map_method; + switch (method) { -#define M(TYPE) \ - case JoinType::TYPE: \ - probeBlockImplType::Type, typename Maps::TYPE##Type>( \ - join_partitions, \ - rows, \ - key_columns, \ - key_sizes, \ - added_columns, \ - null_map, \ - filter, \ - current_offset, \ - offsets_to_replicate, \ - right_indexes, \ - collators, \ - join_build_info, \ - probe_process_info); \ +#define M(METHOD) \ + case JoinMapMethod::METHOD: \ + probeBlockImplType::Type, typename Maps::METHOD##Type>( \ + join_partitions, \ + rows, \ + key_columns, \ + key_sizes, \ + added_columns, \ + null_map, \ + filter, \ + current_offset, \ + offsets_to_replicate, \ + right_indexes, \ + collators, \ + join_build_info, \ + probe_process_info); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1392,18 +1392,18 @@ std::pair>, std::listjoin_type; - switch (type) + auto method = join_partitions[0]->join_map_method; + switch (method) { -#define M(TYPE) \ - case JoinType::TYPE: \ - return probeBlockNullAwareType::Type, typename Maps::TYPE##Type>( \ - join_partitions, \ - block, \ - key_columns, \ - key_sizes, \ - collators, \ - left_side_info, \ +#define M(METHOD) \ + case JoinMapMethod::METHOD: \ + return probeBlockNullAwareType::Type, typename Maps::METHOD##Type>( \ + join_partitions, \ + block, \ + key_columns, \ + key_sizes, \ + collators, \ + left_side_info, \ right_side_info); APPLY_FOR_JOIN_VARIANTS(M) #undef M diff --git a/dbms/src/Interpreters/JoinPartition.h b/dbms/src/Interpreters/JoinPartition.h index 2ca1582920a..53ae98285f2 100644 --- a/dbms/src/Interpreters/JoinPartition.h +++ b/dbms/src/Interpreters/JoinPartition.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -73,10 +73,10 @@ using JoinPartitions = std::vector>; class JoinPartition { public: - JoinPartition(JoinType join_type_, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, size_t max_block_size, const LoggerPtr & log_) + JoinPartition(JoinMapMethod join_map_type_, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, size_t max_block_size, const LoggerPtr & log_) : kind(kind_) , strictness(strictness_) - , join_type(join_type_) + , join_map_method(join_map_type_) , pool(std::make_shared()) , spill(false) , log(log_) @@ -131,7 +131,7 @@ class JoinPartition } bool isSpill() const { return spill; } void markSpill() { spill = true; } - JoinType getJoinType() const { return join_type; } + JoinMapMethod getJoinMapMethod() const { return join_map_method; } ASTTableJoin::Kind getJoinKind() const { return kind; } Block * getLastBuildBlock() { return &build_partition.blocks.back(); } ArenaPtr & getPartitionPool() @@ -212,7 +212,7 @@ class JoinPartition ASTTableJoin::Kind kind; ASTTableJoin::Strictness strictness; - JoinType join_type; + JoinMapMethod join_map_method; MapsAny maps_any; /// For ANY LEFT|INNER JOIN MapsAll maps_all; /// For ALL LEFT|INNER JOIN MapsAnyFull maps_any_full; /// For ANY RIGHT|FULL JOIN