From 0aa314088dad8f0ced5db6e34c5a52b5c60f548f Mon Sep 17 00:00:00 2001 From: elsa0520 Date: Mon, 30 Oct 2023 13:14:44 +0800 Subject: [PATCH 1/8] [Bug] runtime filter: fix timezone error in runtime filter This pr fix #8222. The rs operator which is parsed by filter should change the timezone to UTF when the column type is timestamp. The tiflash storage layer store the timestamp type value using UTF. So all of literal include runtime filter values should be changed to UTF timezone. --- dbms/src/DataStreams/RuntimeFilter.cpp | 7 ++++++- dbms/src/DataStreams/RuntimeFilter.h | 3 +++ .../Coprocessor/DAGExpressionAnalyzer.cpp | 1 + .../DeltaMerge/FilterParser/FilterParser.cpp | 18 ++++++++++++++---- .../DeltaMerge/FilterParser/FilterParser.h | 3 ++- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/dbms/src/DataStreams/RuntimeFilter.cpp b/dbms/src/DataStreams/RuntimeFilter.cpp index 70d3c0dea4a..b04e4b942d0 100644 --- a/dbms/src/DataStreams/RuntimeFilter.cpp +++ b/dbms/src/DataStreams/RuntimeFilter.cpp @@ -55,6 +55,10 @@ void RuntimeFilter::setINValuesSet(const std::shared_ptr & in_values_set_) in_values_set = in_values_set_; } +void RuntimeFilter::setTimezoneInfo(const TimezoneInfo & timezone_info_) { + timezone_info = timezone_info_; +} + void RuntimeFilter::build() { if (!DM::FilterParser::isRSFilterSupportType(target_expr.field_type().tp())) @@ -210,7 +214,8 @@ DM::RSOperatorPtr RuntimeFilter::parseToRSOperator(DM::ColumnDefines & columns_t rf_type, target_expr, columns_to_read, - in_values_set->getUniqueSetElements()); + in_values_set->getUniqueSetElements(), + timezone_info); case tipb::MIN_MAX: case tipb::BLOOM_FILTER: // TODO diff --git a/dbms/src/DataStreams/RuntimeFilter.h b/dbms/src/DataStreams/RuntimeFilter.h index 6017ee56fc8..8a8ab3a8c96 100644 --- a/dbms/src/DataStreams/RuntimeFilter.h +++ b/dbms/src/DataStreams/RuntimeFilter.h @@ -61,6 +61,8 @@ class RuntimeFilter void setINValuesSet(const std::shared_ptr & in_values_set_); + void setTimezoneInfo(const TimezoneInfo & timezone_info_); + void build(); void updateValues(const ColumnWithTypeAndName & values, const LoggerPtr & log); @@ -85,6 +87,7 @@ class RuntimeFilter tipb::Expr source_expr; tipb::Expr target_expr; const tipb::RuntimeFilterType rf_type; + TimezoneInfo timezone_info; // thread safe std::atomic status = RuntimeFilterStatus::NOT_READY; // used for failed_reason thread safe diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index ae65441bd1c..3e63eee26ba 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -1375,6 +1375,7 @@ void DAGExpressionAnalyzer::appendRuntimeFilterProperties(RuntimeFilterPtr & run header.insert(ColumnWithTypeAndName(name_and_type.type->createColumn(), name_and_type.type, "_" + toString(1))); in_values_set->setHeader(header); runtime_filter->setINValuesSet(in_values_set); + runtime_filter->setTimezoneInfo(context.getTimezoneInfo()); break; case tipb::MIN_MAX: case tipb::BLOOM_FILTER: diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp index 339dafe0774..21d112ff1a2 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp @@ -380,7 +380,8 @@ RSOperatorPtr FilterParser::parseRFInExpr( const tipb::RuntimeFilterType rf_type, const tipb::Expr & target_expr, const ColumnDefines & columns_to_read, - const std::set & setElements) + const std::set & setElements, + const TimezoneInfo & timezone_info) { switch (rf_type) { @@ -390,9 +391,18 @@ RSOperatorPtr FilterParser::parseRFInExpr( return createUnsupported(target_expr.ShortDebugString(), "rf target expr is not column expr", false); auto column_define = cop::getColumnDefineForColumnExpr(target_expr, columns_to_read); auto attr = Attr{.col_name = column_define.name, .col_id = column_define.id, .type = column_define.type}; - // FIXME: for timestamp literal, we should convert it to UTC timezone - Fields values(setElements.begin(), setElements.end()); - return createIn(attr, values); + if (target_expr.field_type().tp() == TiDB::TypeTimestamp && !timezone_info.is_utc_timezone) { + Fields values; + std::for_each(setElements.begin(), setElements.end(),[&](Field element) { + // convert literal value from timezone specified in cop request to UTC + cop::convertFieldWithTimezone(element, timezone_info); + values.push_back(element); + }); + return createIn(attr, values); + } else { + Fields values(setElements.begin(), setElements.end()); + return createIn(attr, values); + } } case tipb::MIN_MAX: case tipb::BLOOM_FILTER: diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h index 9c86a25e89d..1a504b48abc 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h @@ -50,7 +50,8 @@ class FilterParser tipb::RuntimeFilterType rf_type, const tipb::Expr & target_expr, const ColumnDefines & columns_to_read, - const std::set & setElements); + const std::set & setElements, + const TimezoneInfo & timezone_info); static bool isRSFilterSupportType(Int32 field_type); From e2e23865e5f832dba432138bd20b2f574d8ce4a9 Mon Sep 17 00:00:00 2001 From: elsa0520 Date: Mon, 30 Oct 2023 13:52:18 +0800 Subject: [PATCH 2/8] format code --- dbms/src/DataStreams/RuntimeFilter.cpp | 3 ++- .../Storages/DeltaMerge/FilterParser/FilterParser.cpp | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataStreams/RuntimeFilter.cpp b/dbms/src/DataStreams/RuntimeFilter.cpp index b04e4b942d0..ad2eacdc19c 100644 --- a/dbms/src/DataStreams/RuntimeFilter.cpp +++ b/dbms/src/DataStreams/RuntimeFilter.cpp @@ -55,7 +55,8 @@ void RuntimeFilter::setINValuesSet(const std::shared_ptr & in_values_set_) in_values_set = in_values_set_; } -void RuntimeFilter::setTimezoneInfo(const TimezoneInfo & timezone_info_) { +void RuntimeFilter::setTimezoneInfo(const TimezoneInfo & timezone_info_) +{ timezone_info = timezone_info_; } diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp index 21d112ff1a2..58685f48651 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp @@ -391,15 +391,18 @@ RSOperatorPtr FilterParser::parseRFInExpr( return createUnsupported(target_expr.ShortDebugString(), "rf target expr is not column expr", false); auto column_define = cop::getColumnDefineForColumnExpr(target_expr, columns_to_read); auto attr = Attr{.col_name = column_define.name, .col_id = column_define.id, .type = column_define.type}; - if (target_expr.field_type().tp() == TiDB::TypeTimestamp && !timezone_info.is_utc_timezone) { + if (target_expr.field_type().tp() == TiDB::TypeTimestamp && !timezone_info.is_utc_timezone) + { Fields values; - std::for_each(setElements.begin(), setElements.end(),[&](Field element) { + std::for_each(setElements.begin(), setElements.end(), [&](Field element) { // convert literal value from timezone specified in cop request to UTC cop::convertFieldWithTimezone(element, timezone_info); values.push_back(element); }); return createIn(attr, values); - } else { + } + else + { Fields values(setElements.begin(), setElements.end()); return createIn(attr, values); } From 051d2a394cc30b289b3e477625e11aab061c6a7b Mon Sep 17 00:00:00 2001 From: elsa0520 Date: Mon, 30 Oct 2023 14:16:13 +0800 Subject: [PATCH 3/8] add test --- .../DeltaMerge/FilterParser/FilterParser.cpp | 1 + tests/fullstack-test/mpp/runtime_filter.test | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp index 58685f48651..0f81d4c70be 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp @@ -394,6 +394,7 @@ RSOperatorPtr FilterParser::parseRFInExpr( if (target_expr.field_type().tp() == TiDB::TypeTimestamp && !timezone_info.is_utc_timezone) { Fields values; + values.reserve(setElements.size()); std::for_each(setElements.begin(), setElements.end(), [&](Field element) { // convert literal value from timezone specified in cop request to UTC cop::convertFieldWithTimezone(element, timezone_info); diff --git a/tests/fullstack-test/mpp/runtime_filter.test b/tests/fullstack-test/mpp/runtime_filter.test index e561cf502c3..7f97af50b58 100644 --- a/tests/fullstack-test/mpp/runtime_filter.test +++ b/tests/fullstack-test/mpp/runtime_filter.test @@ -30,8 +30,20 @@ mysql> insert into test.t2 values (2,3,3,3,3333333,3, 1, 3, 3.0, 3.00, 3.0); mysql> insert into test.t2 values (3,3,3,3,3333333,3, 1, 3, 3.0, 3.00, 3.0); mysql> insert into test.t2 values (4,null,null,null,null,null, null, null, null, null, null); +mysql> drop table if exists test.t1_timestamp; +mysql> create table test.t1_timestamp (k1 int, k2 timestamp); +mysql> alter table test.t1_timestamp set tiflash replica 1; +mysql> insert into t1_timestamp values (1, "2023-10-20 00:00:00"); + +mysql> drop table if exists test.t2_timestamp; +mysql> create table test.t2_timestamp (k1 int, k2 timestamp); +mysql> alter table test.t2_timestamp set tiflash replica 1; +mysql> insert into t2_timestamp values (1, "2023-10-20 00:00:00"); + func> wait_table test t1 func> wait_table test t2 +func> wait_table test t1_timestamp +func> wait_table test t2_timestamp # inner join mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp = 1; set tidb_runtime_filter_mode="LOCAL"; select t1_tinyint, t2_tinyint from test.t1, test.t2 where t1.t1_tinyint=t2.t2_tinyint; @@ -62,5 +74,15 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp = 1; se | 1 | +------------+ +# test timestamp column type for issue #8222 +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp = 1; set tidb_runtime_filter_mode="LOCAL"; select * from test.t1_timestamp, test.t2_timestamp where t1_timestamp.k2=t2_timestamp.k2; ++------+---------------------+------+---------------------+ +| k1 | k2 | k1 | k2 | ++------+---------------------+------+---------------------+ +| 1 | 2023-10-20 00:00:00 | 1 | 2023-10-20 00:00:00 | ++------+---------------------+------+---------------------+ + mysql> drop table test.t1; mysql> drop table test.t2; +mysql> drop table test.t1_timestamp; +mysql> drop table test.t2_timestamp; From 05bf8e42d15f109c00c47d453b5d6f1ea42cdf87 Mon Sep 17 00:00:00 2001 From: elsa0520 Date: Mon, 30 Oct 2023 15:14:37 +0800 Subject: [PATCH 4/8] fix test --- tests/fullstack-test/mpp/runtime_filter.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/fullstack-test/mpp/runtime_filter.test b/tests/fullstack-test/mpp/runtime_filter.test index 7f97af50b58..b2ab230c882 100644 --- a/tests/fullstack-test/mpp/runtime_filter.test +++ b/tests/fullstack-test/mpp/runtime_filter.test @@ -34,11 +34,13 @@ mysql> drop table if exists test.t1_timestamp; mysql> create table test.t1_timestamp (k1 int, k2 timestamp); mysql> alter table test.t1_timestamp set tiflash replica 1; mysql> insert into t1_timestamp values (1, "2023-10-20 00:00:00"); +mysql> alter table t1_timestamp compact tiflash replica; mysql> drop table if exists test.t2_timestamp; mysql> create table test.t2_timestamp (k1 int, k2 timestamp); mysql> alter table test.t2_timestamp set tiflash replica 1; mysql> insert into t2_timestamp values (1, "2023-10-20 00:00:00"); +mysql> alter table t2_timestamp compact tiflash replica; func> wait_table test t1 func> wait_table test t2 From 45afeaf414d9f3a1840f45a42f88521bafef0c4f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Mon, 30 Oct 2023 15:15:55 +0800 Subject: [PATCH 5/8] Add comments --- dbms/src/DataStreams/RuntimeFilter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/DataStreams/RuntimeFilter.cpp b/dbms/src/DataStreams/RuntimeFilter.cpp index ad2eacdc19c..62ed46e5de5 100644 --- a/dbms/src/DataStreams/RuntimeFilter.cpp +++ b/dbms/src/DataStreams/RuntimeFilter.cpp @@ -211,6 +211,8 @@ DM::RSOperatorPtr RuntimeFilter::parseToRSOperator(DM::ColumnDefines & columns_t switch (rf_type) { case tipb::IN: + // Note that the elements are added from the block read (after timezone casted). + // Take care of them when parsing to rough set filter. return DM::FilterParser::parseRFInExpr( rf_type, target_expr, From d658e9c8e39d23a4f4aa6c60c3a1bd185f8e217e Mon Sep 17 00:00:00 2001 From: elsa0520 Date: Mon, 30 Oct 2023 16:29:54 +0800 Subject: [PATCH 6/8] fix test --- tests/fullstack-test/mpp/runtime_filter.test | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/fullstack-test/mpp/runtime_filter.test b/tests/fullstack-test/mpp/runtime_filter.test index b2ab230c882..97c30e69b0c 100644 --- a/tests/fullstack-test/mpp/runtime_filter.test +++ b/tests/fullstack-test/mpp/runtime_filter.test @@ -34,19 +34,20 @@ mysql> drop table if exists test.t1_timestamp; mysql> create table test.t1_timestamp (k1 int, k2 timestamp); mysql> alter table test.t1_timestamp set tiflash replica 1; mysql> insert into t1_timestamp values (1, "2023-10-20 00:00:00"); -mysql> alter table t1_timestamp compact tiflash replica; mysql> drop table if exists test.t2_timestamp; mysql> create table test.t2_timestamp (k1 int, k2 timestamp); mysql> alter table test.t2_timestamp set tiflash replica 1; mysql> insert into t2_timestamp values (1, "2023-10-20 00:00:00"); -mysql> alter table t2_timestamp compact tiflash replica; func> wait_table test t1 func> wait_table test t2 func> wait_table test t1_timestamp func> wait_table test t2_timestamp +mysql> alter table t1_timestamp compact tiflash replica; +mysql> alter table t2_timestamp compact tiflash replica; + # inner join mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp = 1; set tidb_runtime_filter_mode="LOCAL"; select t1_tinyint, t2_tinyint from test.t1, test.t2 where t1.t1_tinyint=t2.t2_tinyint; +------------+------------+ From 6fd3288031f468f65bb18f4a6fc6ab392ce9daea Mon Sep 17 00:00:00 2001 From: JaySon Date: Mon, 30 Oct 2023 17:38:28 +0800 Subject: [PATCH 7/8] Fix test case --- tests/fullstack-test/mpp/runtime_filter.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fullstack-test/mpp/runtime_filter.test b/tests/fullstack-test/mpp/runtime_filter.test index 97c30e69b0c..af2684525be 100644 --- a/tests/fullstack-test/mpp/runtime_filter.test +++ b/tests/fullstack-test/mpp/runtime_filter.test @@ -33,12 +33,12 @@ mysql> insert into test.t2 values (4,null,null,null,null,null, null, null, null, mysql> drop table if exists test.t1_timestamp; mysql> create table test.t1_timestamp (k1 int, k2 timestamp); mysql> alter table test.t1_timestamp set tiflash replica 1; -mysql> insert into t1_timestamp values (1, "2023-10-20 00:00:00"); +mysql> insert into t1_timestamp values (1, '2023-10-20 00:00:00'); mysql> drop table if exists test.t2_timestamp; mysql> create table test.t2_timestamp (k1 int, k2 timestamp); mysql> alter table test.t2_timestamp set tiflash replica 1; -mysql> insert into t2_timestamp values (1, "2023-10-20 00:00:00"); +mysql> insert into t2_timestamp values (1, '2023-10-20 00:00:00'); func> wait_table test t1 func> wait_table test t2 From ac5f455fc24fcad684089595223ef9f99056ff52 Mon Sep 17 00:00:00 2001 From: JaySon Date: Mon, 30 Oct 2023 18:00:21 +0800 Subject: [PATCH 8/8] Fix test case --- tests/fullstack-test/mpp/runtime_filter.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/fullstack-test/mpp/runtime_filter.test b/tests/fullstack-test/mpp/runtime_filter.test index af2684525be..b06fffffba7 100644 --- a/tests/fullstack-test/mpp/runtime_filter.test +++ b/tests/fullstack-test/mpp/runtime_filter.test @@ -33,20 +33,20 @@ mysql> insert into test.t2 values (4,null,null,null,null,null, null, null, null, mysql> drop table if exists test.t1_timestamp; mysql> create table test.t1_timestamp (k1 int, k2 timestamp); mysql> alter table test.t1_timestamp set tiflash replica 1; -mysql> insert into t1_timestamp values (1, '2023-10-20 00:00:00'); +mysql> insert into test.t1_timestamp values (1, '2023-10-20 00:00:00'); mysql> drop table if exists test.t2_timestamp; mysql> create table test.t2_timestamp (k1 int, k2 timestamp); mysql> alter table test.t2_timestamp set tiflash replica 1; -mysql> insert into t2_timestamp values (1, '2023-10-20 00:00:00'); +mysql> insert into test.t2_timestamp values (1, '2023-10-20 00:00:00'); func> wait_table test t1 func> wait_table test t2 func> wait_table test t1_timestamp func> wait_table test t2_timestamp -mysql> alter table t1_timestamp compact tiflash replica; -mysql> alter table t2_timestamp compact tiflash replica; +mysql> alter table test.t1_timestamp compact tiflash replica; +mysql> alter table test.t2_timestamp compact tiflash replica; # inner join mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp = 1; set tidb_runtime_filter_mode="LOCAL"; select t1_tinyint, t2_tinyint from test.t1, test.t2 where t1.t1_tinyint=t2.t2_tinyint;