Skip to content

Commit

Permalink
Remove useless code in join (#6673)
Browse files Browse the repository at this point in the history
ref #6528
  • Loading branch information
windtalker authored Jan 21, 2023
1 parent 1f9ec07 commit b88b2aa
Show file tree
Hide file tree
Showing 11 changed files with 15 additions and 270 deletions.
18 changes: 7 additions & 11 deletions dbms/src/DataStreams/NonJoinedBlockInputStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,18 +111,14 @@ NonJoinedBlockInputStream::NonJoinedBlockInputStream(const Join & parent_, const
for (size_t i = 0; i < num_columns_right; ++i)
column_indices_right.push_back(num_columns_left + i);

/// If use_nulls, convert left columns to Nullable.
if (parent.use_nulls)
for (size_t i = 0; i < num_columns_left; ++i)
{
for (size_t i = 0; i < num_columns_left; ++i)
{
const auto & column_with_type_and_name = result_sample_block.getByPosition(column_indices_left[i]);
if (parent.key_names_left.end() == std::find(parent.key_names_left.begin(), parent.key_names_left.end(), column_with_type_and_name.name))
/// if it is not the key, then convert to nullable, if it is key, then just keep the original type
/// actually we don't care about the key column now refer to https://github.com/pingcap/tiflash/blob/v6.5.0/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp#L953
/// for detailed explanation
convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i]));
}
const auto & column_with_type_and_name = result_sample_block.getByPosition(column_indices_left[i]);
if (parent.key_names_left.end() == std::find(parent.key_names_left.begin(), parent.key_names_left.end(), column_with_type_and_name.name))
/// if it is not the key, then convert to nullable, if it is key, then just keep the original type
/// actually we don't care about the key column now refer to https://github.com/pingcap/tiflash/blob/v6.5.0/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp#L953
/// for detailed explanation
convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i]));
}

columns_left.resize(num_columns_left);
Expand Down
1 change: 0 additions & 1 deletion dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
JoinPtr join_ptr = std::make_shared<Join>(
probe_key_names,
build_key_names,
true,
tiflash_join.kind,
tiflash_join.strictness,
log->identifier(),
Expand Down
1 change: 0 additions & 1 deletion dbms/src/Flash/Planner/plans/PhysicalJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ PhysicalPlanNodePtr PhysicalJoin::build(
JoinPtr join_ptr = std::make_shared<Join>(
probe_key_names,
build_key_names,
true,
tiflash_join.kind,
tiflash_join.strictness,
log->identifier(),
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Interpreters/ExpressionActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ void ExpressionAction::prepare(Block & sample_block)
/// in case of coprocessor task, the join is always not null, but if the query comes from
/// clickhouse client, the join maybe null, skip updating column type if join is null
// todo find a new way to update the column type so the type can always be updated.
if (join != nullptr && join->getKind() == ASTTableJoin::Kind::Right && join->useNulls())
if (join != nullptr && join->getKind() == ASTTableJoin::Kind::Right)
{
/// update the column type for left block
std::unordered_set<String> keys;
Expand Down
18 changes: 1 addition & 17 deletions dbms/src/Interpreters/ExpressionAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
#include <Poco/String.h>
#include <Poco/Util/Application.h>
#include <Storages/MutableSupport.h>
#include <Storages/StorageJoin.h>
#include <Storages/StorageMemory.h>
#include <Storages/StorageSet.h>

Expand Down Expand Up @@ -2175,28 +2174,13 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
{
auto database_table = getDatabaseAndTableNameFromIdentifier(static_cast<const ASTIdentifier &>(*table_to_join.database_and_table_name));
StoragePtr table = context.tryGetTable(database_table.first, database_table.second);

if (table)
{
auto * storage_join = dynamic_cast<StorageJoin *>(table.get());

if (storage_join)
{
storage_join->assertCompatible(join_params.kind, join_params.strictness);
/// TODO Check the set of keys.

JoinPtr & join = storage_join->getJoin();
subquery_for_set.join = join;
}
}
}

if (!subquery_for_set.join)
{
JoinPtr join = std::make_shared<Join>(
join_key_names_left,
join_key_names_right,
settings.join_use_nulls,
join_params.kind,
join_params.strictness,
"" /*req_id=*/,
Expand Down Expand Up @@ -2580,7 +2564,7 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd
{
joined_columns.insert(col.name);

bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left || table_join.kind == ASTTableJoin::Kind::Cross_Left || table_join.kind == ASTTableJoin::Kind::Full);
bool make_nullable = table_join.kind == ASTTableJoin::Kind::Left || table_join.kind == ASTTableJoin::Kind::Cross_Left || table_join.kind == ASTTableJoin::Kind::Full;
joined_columns_name_type.emplace_back(col.name, make_nullable ? makeNullable(col.type) : col.type);
}
}
Expand Down
15 changes: 5 additions & 10 deletions dbms/src/Interpreters/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ void convertColumnToNullable(ColumnWithTypeAndName & column)
Join::Join(
const Names & key_names_left_,
const Names & key_names_right_,
bool use_nulls_,
ASTTableJoin::Kind kind_,
ASTTableJoin::Strictness strictness_,
const String & req_id,
Expand All @@ -135,7 +134,6 @@ Join::Join(
, strictness(strictness_)
, key_names_left(key_names_left_)
, key_names_right(key_names_right_)
, use_nulls(use_nulls_)
, build_concurrency(0)
, probe_concurrency(0)
, active_probe_concurrency(0)
Expand Down Expand Up @@ -507,7 +505,7 @@ void Join::setSampleBlock(const Block & block)
}

/// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable.
if (use_nulls && (isLeftJoin(kind) || kind == ASTTableJoin::Kind::Full))
if (isLeftJoin(kind) || kind == ASTTableJoin::Kind::Full)
for (size_t i = 0; i < num_columns_to_add; ++i)
convertColumnToNullable(sample_block_with_columns_to_add.getByPosition(i));

Expand Down Expand Up @@ -932,7 +930,7 @@ void Join::insertFromBlockInternal(Block * stored_block, size_t stream_index)
}

/// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable.
if (use_nulls && (isLeftJoin(kind) || kind == ASTTableJoin::Kind::Full))
if (isLeftJoin(kind) || kind == ASTTableJoin::Kind::Full)
{
for (size_t i = getFullness(kind) ? keys_size : 0; i < size; ++i)
{
Expand Down Expand Up @@ -1588,12 +1586,9 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
if (ColumnPtr converted = col->convertToFullColumnIfConst())
col = converted;

/// If use_nulls, convert left columns (except keys) to Nullable.
if (use_nulls)
{
if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name))
convertColumnToNullable(block.getByPosition(i));
}
/// convert left columns (except keys) to Nullable
if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name))
convertColumnToNullable(block.getByPosition(i));
}
}

Expand Down
11 changes: 1 addition & 10 deletions dbms/src/Interpreters/Join.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,14 @@ struct ProbeProcessInfo;
*
* Default values for outer joins (LEFT, RIGHT, FULL):
*
* Behaviour is controlled by 'join_use_nulls' settings.
* If it is false, we substitute (global) default value for the data type, for non-joined rows
* (zero, empty string, etc. and NULL for Nullable data types).
* If it is true, we always generate Nullable column and substitute NULLs for non-joined rows,
* Always generate Nullable column and substitute NULLs for non-joined rows,
* as in standard SQL.
*/
class Join
{
public:
Join(const Names & key_names_left_,
const Names & key_names_right_,
bool use_nulls_,
ASTTableJoin::Kind kind_,
ASTTableJoin::Strictness strictness_,
const String & req_id,
Expand Down Expand Up @@ -136,7 +132,6 @@ class Join
/** For RIGHT and FULL JOINs.
* A stream that will contain default values from left table, joined with rows from right table, that was not joined before.
* Use only after all calls to joinBlock was done.
* left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside).
*/
BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, size_t index, size_t step, size_t max_block_size) const;

Expand All @@ -149,7 +144,6 @@ class Join

ASTTableJoin::Kind getKind() const { return kind; }

bool useNulls() const { return use_nulls; }
const Names & getLeftJoinKeys() const { return key_names_left; }

size_t getProbeConcurrency() const
Expand Down Expand Up @@ -309,9 +303,6 @@ class Join
/// Names of key columns (columns for equi-JOIN) in "right" table (in the order they appear in USING clause).
const Names key_names_right;

/// Substitute NULLs for non-JOINed rows.
bool use_nulls;

size_t build_concurrency;

mutable std::mutex probe_mutex;
Expand Down
2 changes: 0 additions & 2 deletions dbms/src/Interpreters/Settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ struct Settings
"values are non-zero, and at least absolute or relative amount of errors is lower than corresponding value, will skip until next " \
"line and continue.") \
\
M(SettingBool, join_use_nulls, 0, "Use NULLs for non-joined rows of outer JOINs. If false, use default value of corresponding columns data type.") \
\
M(SettingUInt64, preferred_block_size_bytes, 1000000, "") \
\
M(SettingUInt64, max_replica_delay_for_distributed_queries, 300, "If set, distributed queries of Replicated tables will choose servers with replication delay in seconds less than the specified " \
Expand Down
147 changes: 0 additions & 147 deletions dbms/src/Storages/StorageJoin.cpp

This file was deleted.

Loading

0 comments on commit b88b2aa

Please sign in to comment.