From 27452c63ad68362912ef4303199952eb9f78c93e Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Sat, 2 Apr 2022 13:18:29 +0800 Subject: [PATCH 01/79] Remove invalid blob file after BlobStore restored. (#4475) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/BlobStore.cpp | 134 ++++++++++++++- dbms/src/Storages/Page/V3/BlobStore.h | 1 + .../Page/V3/tests/gtest_blob_store.cpp | 160 ++++++++++++++++++ .../Page/V3/tests/gtest_page_directory.cpp | 22 ++- 4 files changed, 309 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 468cfa01227..bfce007feba 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -50,6 +50,12 @@ namespace PS::V3 { static constexpr bool BLOBSTORE_CHECKSUM_ON_READ = true; +#ifndef NDEBUG +static constexpr bool CHECK_STATS_ALL_IN_DISK = true; +#else +static constexpr bool CHECK_STATS_ALL_IN_DISK = false; +#endif + using BlobStat = BlobStore::BlobStats::BlobStat; using BlobStatPtr = BlobStore::BlobStats::BlobStatPtr; using ChecksumClass = Digest::CRC64; @@ -797,17 +803,143 @@ void BlobStore::BlobStats::restoreByEntry(const PageEntryV3 & entry) stat->restoreSpaceMap(entry.offset, entry.size); } +std::set BlobStore::BlobStats::getBlobIdsFromDisk(String path) const +{ + std::set blob_ids_on_disk; + + Poco::File store_path(path); + if (!store_path.exists()) + { + return blob_ids_on_disk; + } + + + std::vector file_list; + store_path.list(file_list); + + for (const auto & blob_name : file_list) + { + if (!startsWith(blob_name, BlobFile::BLOB_PREFIX_NAME)) + { + LOG_FMT_INFO(log, "Ignore not blob file [dir={}] [file={}]", path, blob_name); + continue; + } + + Strings ss; + boost::split(ss, blob_name, boost::is_any_of("_")); + + if (ss.size() != 2) + { + LOG_FMT_INFO(log, "Ignore unrecognized blob file [dir={}] [file={}]", path, blob_name); + continue; + } + + String err_msg; + try + { + const auto & blob_id = std::stoull(ss[1]); + blob_ids_on_disk.insert(blob_id); + continue; // continue to handle next file + } + catch (std::invalid_argument & e) + { + err_msg = e.what(); + } + catch (std::out_of_range & e) + { + err_msg = e.what(); + } + LOG_FMT_INFO(log, "Ignore unrecognized blob file [dir={}] [file={}] [err={}]", path, blob_name, err_msg); + } + + return blob_ids_on_disk; +} + void BlobStore::BlobStats::restore() { BlobFileId max_restored_file_id = 0; for (auto & [path, stats] : stats_map) { - (void)path; + std::set blob_ids_in_stats; for (const auto & stat : stats) { stat->recalculateSpaceMap(); max_restored_file_id = std::max(stat->id, max_restored_file_id); + blob_ids_in_stats.insert(stat->id); + } + + // If a BlobFile on disk with a valid rate of 0 (but has not been deleted because of some reason), + // then it won't be restored to stats. But we should check and clean up if such files exist. + + std::set blob_ids_on_disk = getBlobIdsFromDisk(path); + + if (blob_ids_on_disk.size() < blob_ids_in_stats.size()) + { + FmtBuffer fmt_buf; + fmt_buf.fmtAppend( + "Some of Blob are missing in disk.[path={}] [stats ids: ", + path); + + fmt_buf.joinStr( + blob_ids_in_stats.begin(), + blob_ids_in_stats.end(), + [](const auto arg, FmtBuffer & fb) { + fb.fmtAppend("{}", arg); + }, + ", "); + + fmt_buf.append("]"); + + throw Exception(fmt_buf.toString(), + ErrorCodes::LOGICAL_ERROR); + } + + if constexpr (CHECK_STATS_ALL_IN_DISK) + { + std::vector blob_ids_on_disk_not_in_stats(blob_ids_in_stats.size()); + auto last_check_it = std::set_difference(blob_ids_in_stats.begin(), + blob_ids_in_stats.end(), + blob_ids_on_disk.begin(), + blob_ids_on_disk.end(), + blob_ids_on_disk_not_in_stats.begin()); + + if (last_check_it != blob_ids_on_disk_not_in_stats.begin()) + { + FmtBuffer fmt_buf; + fmt_buf.fmtAppend( + "Some of Blob are missing in disk.[path={}] [stats ids: ", + path); + + fmt_buf.joinStr( + blob_ids_in_stats.begin(), + blob_ids_in_stats.end(), + [](const auto arg, FmtBuffer & fb) { + fb.fmtAppend("{}", arg); + }, + ", "); + + fmt_buf.append("]"); + + throw Exception(fmt_buf.toString(), + ErrorCodes::LOGICAL_ERROR); + } + } + + std::vector invalid_blob_ids; + + std::set_difference(blob_ids_on_disk.begin(), + blob_ids_on_disk.end(), + blob_ids_in_stats.begin(), + blob_ids_in_stats.end(), + std::back_inserter(invalid_blob_ids)); + + for (const auto & invalid_blob_id : invalid_blob_ids) + { + const auto & invalid_blob_path = fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, invalid_blob_id); + LOG_FMT_INFO(log, "Remove invalid blob file [file={}]", invalid_blob_path); + Poco::File invalid_blob(invalid_blob_path); + invalid_blob.remove(); } } diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index aa6928f975c..c91ba90177e 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -180,6 +180,7 @@ class BlobStore : private Allocator #endif void restoreByEntry(const PageEntryV3 & entry); void restore(); + std::set getBlobIdsFromDisk(String path) const; friend class PageDirectoryFactory; #ifndef DBMS_PUBLIC_GTEST diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index d63e74aea8b..3db3b53dd2b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -70,6 +70,11 @@ try BlobFileId file_id1 = 10; BlobFileId file_id2 = 12; + const auto & path = getTemporaryPath(); + createIfNotExist(path); + Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); + Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); + { stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, @@ -285,6 +290,11 @@ try BlobFileId file_id1 = 10; BlobFileId file_id2 = 12; + const auto & path = getTemporaryPath(); + createIfNotExist(path); + Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); + Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); + { blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, @@ -334,6 +344,156 @@ try } CATCH + +TEST_F(BlobStoreTest, RestoreWithInvalidBlob) +try +{ + const auto file_provider = DB::tests::TiFlashTestEnv::getContext().getFileProvider(); + config.file_limit_size = 1024; + + // Generate blob [1,2,3] + auto write_blob_datas = [](BlobStore & blob_store) { + WriteBatch write_batch; + PageId page_id = 55; + size_t buff_size = 1024; + char c_buff[buff_size]; + memset(c_buff, 0x1, buff_size); + + // write blob 1 + write_batch.putPage(page_id, /* tag */ 0, std::make_shared(const_cast(c_buff), buff_size), buff_size); + blob_store.write(write_batch, nullptr); + write_batch.clear(); + + // write blob 2 + write_batch.putPage(page_id + 1, /* tag */ 0, std::make_shared(const_cast(c_buff), buff_size), buff_size); + blob_store.write(write_batch, nullptr); + write_batch.clear(); + + // write blob 3 + write_batch.putPage(page_id + 2, /* tag */ 0, std::make_shared(const_cast(c_buff), buff_size), buff_size); + blob_store.write(write_batch, nullptr); + write_batch.clear(); + }; + + auto check_in_disk_file = [](String parent_path, std::vector exited_blobs) -> bool { + for (const auto blob_id : exited_blobs) + { + Poco::File file(fmt::format("{}/{}{}", parent_path, BlobFile::BLOB_PREFIX_NAME, blob_id)); + if (!file.exists()) + { + return false; + } + } + return true; + }; + + auto restore_blobs = [](BlobStore & blob_store, std::vector blob_ids) { + for (const auto & id : blob_ids) + { + blob_store.blob_stats.restoreByEntry(PageEntryV3{ + .file_id = id, + .size = 1024, + .tag = 0, + .offset = 0, + .checksum = 0x4567, + }); + } + }; + + // Case 1, all of blob been restored + { + auto test_path = getTemporaryPath(); + auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); + write_blob_datas(blob_store); + + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + + auto blob_store_check = BlobStore(getCurrentTestName(), file_provider, delegator, config); + restore_blobs(blob_store_check, {1, 2, 3}); + + blob_store_check.blob_stats.restore(); + + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + DB::tests::TiFlashTestEnv::tryRemovePath(test_path); + createIfNotExist(test_path); + } + + // Case 2, only recover blob 1 + { + auto test_path = getTemporaryPath(); + auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); + write_blob_datas(blob_store); + + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + + auto blob_store_check = BlobStore(getCurrentTestName(), file_provider, delegator, config); + restore_blobs(blob_store_check, {1}); + + blob_store_check.blob_stats.restore(); + + ASSERT_TRUE(check_in_disk_file(test_path, {1})); + DB::tests::TiFlashTestEnv::tryRemovePath(test_path); + createIfNotExist(test_path); + } + + // Case 3, only recover blob 2 + { + auto test_path = getTemporaryPath(); + auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); + write_blob_datas(blob_store); + + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + + auto blob_store_check = BlobStore(getCurrentTestName(), file_provider, delegator, config); + restore_blobs(blob_store_check, {2}); + + blob_store_check.blob_stats.restore(); + + ASSERT_TRUE(check_in_disk_file(test_path, {2})); + DB::tests::TiFlashTestEnv::tryRemovePath(test_path); + createIfNotExist(test_path); + } + + // Case 4, only recover blob 3 + { + auto test_path = getTemporaryPath(); + auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); + write_blob_datas(blob_store); + + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + + auto blob_store_check = BlobStore(getCurrentTestName(), file_provider, delegator, config); + restore_blobs(blob_store_check, {3}); + + blob_store_check.blob_stats.restore(); + + ASSERT_TRUE(check_in_disk_file(test_path, {3})); + DB::tests::TiFlashTestEnv::tryRemovePath(test_path); + createIfNotExist(test_path); + } + + // Case 5, recover a not exist blob + { + auto test_path = getTemporaryPath(); + auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); + write_blob_datas(blob_store); + + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + + auto blob_store_check = BlobStore(getCurrentTestName(), file_provider, delegator, config); + restore_blobs(blob_store_check, {4}); + ASSERT_THROW(blob_store_check.blob_stats.restore(), DB::Exception); + // Won't remove blob if exception happened. + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + + auto blob_store_check2 = BlobStore(getCurrentTestName(), file_provider, delegator, config); + restore_blobs(blob_store_check2, {1, 2, 3, 4}); + ASSERT_THROW(blob_store_check2.blob_stats.restore(), DB::Exception); + ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + } +} +CATCH + TEST_F(BlobStoreTest, testWriteRead) { const auto file_provider = DB::tests::TiFlashTestEnv::getContext().getFileProvider(); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 30c9c36461e..16c2140964b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -1959,13 +1959,21 @@ CATCH TEST_F(PageDirectoryGCTest, RestoreWithRef) try { - PageEntryV3 entry_1_v1{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_5_v1{.file_id = 5, .size = 255, .tag = 0, .offset = 0x100, .checksum = 0x4567}; - PageEntryV3 entry_5_v2{.file_id = 5, .size = 255, .tag = 0, .offset = 0x400, .checksum = 0x4567}; + BlobFileId file_id1 = 1; + BlobFileId file_id2 = 5; + + const auto & path = getTemporaryPath(); + createIfNotExist(path); + Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); + Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); + + PageEntryV3 entry_1_v1{.file_id = file_id1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_5_v1{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x100, .checksum = 0x4567}; + PageEntryV3 entry_5_v2{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x400, .checksum = 0x4567}; { PageEntriesEdit edit; - edit.put(1, entry_1_v1); - edit.put(5, entry_5_v1); + edit.put(file_id1, entry_1_v1); + edit.put(file_id2, entry_5_v1); dir->apply(std::move(edit)); } { @@ -1998,9 +2006,9 @@ try EXPECT_SAME_ENTRY(entry_5_v2, restored_dir->get(5, temp_snap).second); // The entry_1_v1 should be restored to stats - auto stat_for_file_1 = stats.blobIdToStat(1, false, false); + auto stat_for_file_1 = stats.blobIdToStat(file_id1, false, false); EXPECT_TRUE(stat_for_file_1->smap->isMarkUsed(entry_1_v1.offset, entry_1_v1.size)); - auto stat_for_file_5 = stats.blobIdToStat(5, false, false); + auto stat_for_file_5 = stats.blobIdToStat(file_id2, false, false); // entry_5_v1 should not be restored to stats EXPECT_FALSE(stat_for_file_5->smap->isMarkUsed(entry_5_v1.offset, entry_5_v1.size)); EXPECT_TRUE(stat_for_file_5->smap->isMarkUsed(entry_5_v2.offset, entry_5_v2.size)); From 759421494fd276282f6d7bc2f1857fdac740c827 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Sat, 2 Apr 2022 16:20:30 +0800 Subject: [PATCH 02/79] Remove redundant use of marco __PRETTY_FUNCTION__ (#4486) close pingcap/tiflash#4392 --- dbms/src/Flash/BatchCoprocessorHandler.cpp | 15 ++++---- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 13 ++++--- .../Coprocessor/DAGStorageInterpreter.cpp | 4 +-- dbms/src/Flash/CoprocessorHandler.cpp | 18 +++++----- dbms/src/Flash/FlashService.cpp | 31 ++++++++-------- dbms/src/Flash/Mpp/ExchangeReceiver.cpp | 2 +- dbms/src/Flash/Mpp/MPPTask.cpp | 3 +- dbms/src/Storages/DeltaMerge/File/DMFile.cpp | 30 ++++++++-------- dbms/src/Storages/DeltaMerge/File/DMFile.h | 27 +++++++------- dbms/src/Storages/DeltaMerge/SchemaUpdate.cpp | 22 ++++++------ dbms/src/Storages/Transaction/KVStore.cpp | 5 ++- .../Storages/Transaction/SchemaBuilder.cpp | 35 ++++++++++--------- 12 files changed, 101 insertions(+), 104 deletions(-) diff --git a/dbms/src/Flash/BatchCoprocessorHandler.cpp b/dbms/src/Flash/BatchCoprocessorHandler.cpp index c02b25d44d7..dd40a154fcc 100644 --- a/dbms/src/Flash/BatchCoprocessorHandler.cpp +++ b/dbms/src/Flash/BatchCoprocessorHandler.cpp @@ -60,8 +60,7 @@ grpc::Status BatchCoprocessorHandler::execute() auto tables_regions_info = TablesRegionsInfo::create(cop_request->regions(), cop_request->table_regions(), cop_context.db_context.getTMTContext()); LOG_FMT_DEBUG( log, - "{}: Handling {} regions from {} physical tables in DAG request: {}", - __PRETTY_FUNCTION__, + "Handling {} regions from {} physical tables in DAG request: {}", tables_regions_info.regionCount(), tables_regions_info.tableCount(), dag_request.DebugString()); @@ -76,7 +75,7 @@ grpc::Status BatchCoprocessorHandler::execute() DAGDriver driver(cop_context.db_context, cop_request->start_ts() > 0 ? cop_request->start_ts() : dag_request.start_ts_fallback(), cop_request->schema_ver(), writer); // batch execution; driver.execute(); - LOG_FMT_DEBUG(log, "{}: Handle DAG request done", __PRETTY_FUNCTION__); + LOG_FMT_DEBUG(log, "Handle DAG request done"); break; } case COP_REQ_TYPE_ANALYZE: @@ -89,28 +88,28 @@ grpc::Status BatchCoprocessorHandler::execute() } catch (const TiFlashException & e) { - LOG_FMT_ERROR(log, "{}: TiFlash Exception: {}\n{}", __PRETTY_FUNCTION__, e.displayText(), e.getStackTrace().toString()); + LOG_FMT_ERROR(log, "TiFlash Exception: {}\n{}", e.displayText(), e.getStackTrace().toString()); GET_METRIC(tiflash_coprocessor_request_error, reason_internal_error).Increment(); return recordError(grpc::StatusCode::INTERNAL, e.standardText()); } catch (const Exception & e) { - LOG_FMT_ERROR(log, "{}: DB Exception: {}\n{}", __PRETTY_FUNCTION__, e.message(), e.getStackTrace().toString()); + LOG_FMT_ERROR(log, "DB Exception: {}\n{}", e.message(), e.getStackTrace().toString()); return recordError(tiflashErrorCodeToGrpcStatusCode(e.code()), e.message()); } catch (const pingcap::Exception & e) { - LOG_FMT_ERROR(log, "{}: KV Client Exception: {}", __PRETTY_FUNCTION__, e.message()); + LOG_FMT_ERROR(log, "KV Client Exception: {}", e.message()); return recordError(grpc::StatusCode::INTERNAL, e.message()); } catch (const std::exception & e) { - LOG_FMT_ERROR(log, "{}: std exception: {}", __PRETTY_FUNCTION__, e.what()); + LOG_FMT_ERROR(log, "std exception: {}", e.what()); return recordError(grpc::StatusCode::INTERNAL, e.what()); } catch (...) { - LOG_FMT_ERROR(log, "{}: other exception", __PRETTY_FUNCTION__); + LOG_FMT_ERROR(log, "other exception"); return recordError(grpc::StatusCode::INTERNAL, "other exception"); } } diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 60b4a18ced9..55a2024a8bc 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -164,8 +164,7 @@ try { LOG_FMT_DEBUG( log, - "{}: dag request without encode cost: {} seconds, produce {} rows, {} bytes.", - __PRETTY_FUNCTION__, + "dag request without encode cost: {} seconds, produce {} rows, {} bytes.", p_stream->getProfileInfo().execution_time / (double)1000000000, p_stream->getProfileInfo().rows, p_stream->getProfileInfo().bytes); @@ -190,27 +189,27 @@ catch (const LockException & e) } catch (const TiFlashException & e) { - LOG_FMT_ERROR(log, "{}: {}\n{}", __PRETTY_FUNCTION__, e.standardText(), e.getStackTrace().toString()); + LOG_FMT_ERROR(log, "{}\n{}", e.standardText(), e.getStackTrace().toString()); recordError(grpc::StatusCode::INTERNAL, e.standardText()); } catch (const Exception & e) { - LOG_FMT_ERROR(log, "{}: DB Exception: {}\n{}", __PRETTY_FUNCTION__, e.message(), e.getStackTrace().toString()); + LOG_FMT_ERROR(log, "DB Exception: {}\n{}", e.message(), e.getStackTrace().toString()); recordError(e.code(), e.message()); } catch (const pingcap::Exception & e) { - LOG_FMT_ERROR(log, "{}: KV Client Exception: {}", __PRETTY_FUNCTION__, e.message()); + LOG_FMT_ERROR(log, "KV Client Exception: {}", e.message()); recordError(e.code(), e.message()); } catch (const std::exception & e) { - LOG_FMT_ERROR(log, "{}: std exception: {}", __PRETTY_FUNCTION__, e.what()); + LOG_FMT_ERROR(log, "std exception: {}", e.what()); recordError(ErrorCodes::UNKNOWN_EXCEPTION, e.what()); } catch (...) { - LOG_FMT_ERROR(log, "{}: other exception", __PRETTY_FUNCTION__); + LOG_FMT_ERROR(log, "other exception"); recordError(ErrorCodes::UNKNOWN_EXCEPTION, "other exception"); } diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index 1698baef946..a7a8ca48ef5 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -549,7 +549,7 @@ std::unordered_map DAG auto log_schema_version = [&](const String & result, const std::vector & storage_schema_versions) { FmtBuffer buffer; - buffer.fmtAppend("{} Table {} schema {} Schema version [storage, global, query]: [{}, {}, {}]", __PRETTY_FUNCTION__, logical_table_id, result, storage_schema_versions[0], global_schema_version, query_schema_version); + buffer.fmtAppend("Table {} schema {} Schema version [storage, global, query]: [{}, {}, {}]", logical_table_id, result, storage_schema_versions[0], global_schema_version, query_schema_version); if (table_scan.isPartitionTableScan()) { assert(storage_schema_versions.size() == 1 + table_scan.getPhysicalTableIDs().size()); @@ -567,7 +567,7 @@ std::unordered_map DAG GET_METRIC(tiflash_schema_trigger_count, type_cop_read).Increment(); tmt.getSchemaSyncer()->syncSchemas(context); auto schema_sync_cost = std::chrono::duration_cast(Clock::now() - start_time).count(); - LOG_FMT_INFO(log, "{} Table {} schema sync cost {}ms.", __PRETTY_FUNCTION__, logical_table_id, schema_sync_cost); + LOG_FMT_INFO(log, "Table {} schema sync cost {}ms.", logical_table_id, schema_sync_cost); }; /// Try get storage and lock once. diff --git a/dbms/src/Flash/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp index e7b31780b54..98bea6b73f6 100644 --- a/dbms/src/Flash/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -81,7 +81,7 @@ grpc::Status CoprocessorHandler::execute() SCOPE_EXIT({ GET_METRIC(tiflash_coprocessor_handling_request_count, type_cop_dag).Decrement(); }); tipb::DAGRequest dag_request = getDAGRequestFromStringWithRetry(cop_request->data()); - LOG_FMT_DEBUG(log, "{}: Handling DAG request: {}", __PRETTY_FUNCTION__, dag_request.DebugString()); + LOG_FMT_DEBUG(log, "Handling DAG request: {}", dag_request.DebugString()); if (dag_request.has_is_rpn_expr() && dag_request.is_rpn_expr()) throw TiFlashException( "DAG request with rpn expression is not supported in TiFlash", @@ -111,7 +111,7 @@ grpc::Status CoprocessorHandler::execute() DAGDriver driver(cop_context.db_context, cop_request->start_ts() > 0 ? cop_request->start_ts() : dag_request.start_ts_fallback(), cop_request->schema_ver(), &dag_response); driver.execute(); cop_response->set_data(dag_response.SerializeAsString()); - LOG_FMT_DEBUG(log, "{}: Handle DAG request done", __PRETTY_FUNCTION__); + LOG_FMT_DEBUG(log, "Handle DAG request done"); break; } case COP_REQ_TYPE_ANALYZE: @@ -124,13 +124,13 @@ grpc::Status CoprocessorHandler::execute() } catch (const TiFlashException & e) { - LOG_FMT_ERROR(log, "{}:{}\n{}", __PRETTY_FUNCTION__, e.standardText(), e.getStackTrace().toString()); + LOG_FMT_ERROR(log, "{}\n{}", e.standardText(), e.getStackTrace().toString()); GET_METRIC(tiflash_coprocessor_request_error, reason_internal_error).Increment(); return recordError(grpc::StatusCode::INTERNAL, e.standardText()); } catch (LockException & e) { - LOG_FMT_WARNING(log, "{}: LockException: region {}, message: {}", __PRETTY_FUNCTION__, cop_request->context().region_id(), e.message()); + LOG_FMT_WARNING(log, "LockException: region {}, message: {}", cop_request->context().region_id(), e.message()); cop_response->Clear(); GET_METRIC(tiflash_coprocessor_request_error, reason_meet_lock).Increment(); cop_response->set_allocated_locked(e.lock_info.release()); @@ -139,7 +139,7 @@ grpc::Status CoprocessorHandler::execute() } catch (const RegionException & e) { - LOG_FMT_WARNING(log, "{}: RegionException: region {}, message: {}", __PRETTY_FUNCTION__, cop_request->context().region_id(), e.message()); + LOG_FMT_WARNING(log, "RegionException: region {}, message: {}", cop_request->context().region_id(), e.message()); cop_response->Clear(); errorpb::Error * region_err; switch (e.status) @@ -163,25 +163,25 @@ grpc::Status CoprocessorHandler::execute() } catch (const pingcap::Exception & e) { - LOG_FMT_ERROR(log, "{}: KV Client Exception: {}", __PRETTY_FUNCTION__, e.message()); + LOG_FMT_ERROR(log, "KV Client Exception: {}", e.message()); GET_METRIC(tiflash_coprocessor_request_error, reason_kv_client_error).Increment(); return recordError(grpc::StatusCode::INTERNAL, e.message()); } catch (const Exception & e) { - LOG_FMT_ERROR(log, "{}: DB Exception: {}\n{}", __PRETTY_FUNCTION__, e.message(), e.getStackTrace().toString()); + LOG_FMT_ERROR(log, "DB Exception: {}\n{}", e.message(), e.getStackTrace().toString()); GET_METRIC(tiflash_coprocessor_request_error, reason_internal_error).Increment(); return recordError(tiflashErrorCodeToGrpcStatusCode(e.code()), e.message()); } catch (const std::exception & e) { - LOG_FMT_ERROR(log, "{}: std exception: {}", __PRETTY_FUNCTION__, e.what()); + LOG_FMT_ERROR(log, "std exception: {}", e.what()); GET_METRIC(tiflash_coprocessor_request_error, reason_other_error).Increment(); return recordError(grpc::StatusCode::INTERNAL, e.what()); } catch (...) { - LOG_FMT_ERROR(log, "{}: other exception", __PRETTY_FUNCTION__); + LOG_FMT_ERROR(log, "other exception"); GET_METRIC(tiflash_coprocessor_request_error, reason_other_error).Increment(); return recordError(grpc::StatusCode::INTERNAL, "other exception"); } diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index e40dbfc14da..167ee238249 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -78,7 +78,7 @@ grpc::Status FlashService::Coprocessor( coprocessor::Response * response) { CPUAffinityManager::getInstance().bindSelfGrpcThread(); - LOG_FMT_DEBUG(log, "{}: Handling coprocessor request: {}", __PRETTY_FUNCTION__, request->DebugString()); + LOG_FMT_DEBUG(log, "Handling coprocessor request: {}", request->DebugString()); if (!security_config.checkGrpcContext(grpc_context)) { @@ -105,14 +105,14 @@ grpc::Status FlashService::Coprocessor( return cop_handler.execute(); }); - LOG_FMT_DEBUG(log, "{}: Handle coprocessor request done: {}, {}", __PRETTY_FUNCTION__, ret.error_code(), ret.error_message()); + LOG_FMT_DEBUG(log, "Handle coprocessor request done: {}, {}", ret.error_code(), ret.error_message()); return ret; } ::grpc::Status FlashService::BatchCoprocessor(::grpc::ServerContext * grpc_context, const ::coprocessor::BatchRequest * request, ::grpc::ServerWriter<::coprocessor::BatchResponse> * writer) { CPUAffinityManager::getInstance().bindSelfGrpcThread(); - LOG_FMT_DEBUG(log, "{}: Handling coprocessor request: {}", __PRETTY_FUNCTION__, request->DebugString()); + LOG_FMT_DEBUG(log, "Handling coprocessor request: {}", request->DebugString()); if (!security_config.checkGrpcContext(grpc_context)) { @@ -139,7 +139,7 @@ ::grpc::Status FlashService::BatchCoprocessor(::grpc::ServerContext * grpc_conte return cop_handler.execute(); }); - LOG_FMT_DEBUG(log, "{}: Handle coprocessor request done: {}, {}", __PRETTY_FUNCTION__, ret.error_code(), ret.error_message()); + LOG_FMT_DEBUG(log, "Handle coprocessor request done: {}, {}", ret.error_code(), ret.error_message()); return ret; } @@ -149,7 +149,7 @@ ::grpc::Status FlashService::DispatchMPPTask( ::mpp::DispatchTaskResponse * response) { CPUAffinityManager::getInstance().bindSelfGrpcThread(); - LOG_FMT_DEBUG(log, "{}: Handling mpp dispatch request: {}", __PRETTY_FUNCTION__, request->DebugString()); + LOG_FMT_DEBUG(log, "Handling mpp dispatch request: {}", request->DebugString()); if (!security_config.checkGrpcContext(grpc_context)) { return grpc::Status(grpc::PERMISSION_DENIED, tls_err_msg); @@ -221,7 +221,7 @@ ::grpc::Status FlashService::EstablishMPPConnectionSyncOrAsync(::grpc::ServerCon CPUAffinityManager::getInstance().bindSelfGrpcThread(); // Establish a pipe for data transferring. The pipes has registered by the task in advance. // We need to find it out and bind the grpc stream with it. - LOG_FMT_DEBUG(log, "{}: Handling establish mpp connection request: {}", __PRETTY_FUNCTION__, request->DebugString()); + LOG_FMT_DEBUG(log, "Handling establish mpp connection request: {}", request->DebugString()); if (!security_config.checkGrpcContext(grpc_context)) { @@ -281,7 +281,7 @@ ::grpc::Status FlashService::EstablishMPPConnectionSyncOrAsync(::grpc::ServerCon } else { - LOG_FMT_DEBUG(log, "{}: Write error message failed for unknown reason.", __PRETTY_FUNCTION__); + LOG_FMT_DEBUG(log, "Write error message failed for unknown reason."); return grpc::Status(grpc::StatusCode::UNKNOWN, "Write error message failed for unknown reason."); } } @@ -316,7 +316,7 @@ ::grpc::Status FlashService::CancelMPPTask( { CPUAffinityManager::getInstance().bindSelfGrpcThread(); // CancelMPPTask cancels the query of the task. - LOG_FMT_DEBUG(log, "{}: cancel mpp task request: {}", __PRETTY_FUNCTION__, request->DebugString()); + LOG_FMT_DEBUG(log, "cancel mpp task request: {}", request->DebugString()); if (!security_config.checkGrpcContext(grpc_context)) { @@ -376,7 +376,7 @@ grpc::Status FlashService::BatchCommands( GET_METRIC(tiflash_coprocessor_response_bytes).Increment(response.ByteSizeLong()); }); - LOG_FMT_DEBUG(log, "{}: Handling batch commands: {}", __PRETTY_FUNCTION__, request.DebugString()); + LOG_FMT_DEBUG(log, "Handling batch commands: {}", request.DebugString()); BatchCommandsContext batch_commands_context( *context, @@ -388,8 +388,7 @@ grpc::Status FlashService::BatchCommands( { LOG_FMT_DEBUG( log, - "{}: Handle batch commands request done: {}, {}", - __PRETTY_FUNCTION__, + "Handle batch commands request done: {}, {}", ret.error_code(), ret.error_message()); return ret; @@ -397,11 +396,11 @@ grpc::Status FlashService::BatchCommands( if (!stream->Write(response)) { - LOG_FMT_DEBUG(log, "{}: Write response failed for unknown reason.", __PRETTY_FUNCTION__); + LOG_FMT_DEBUG(log, "Write response failed for unknown reason."); return grpc::Status(grpc::StatusCode::UNKNOWN, "Write response failed for unknown reason."); } - LOG_FMT_DEBUG(log, "{}: Handle batch commands request done: {}, {}", __PRETTY_FUNCTION__, ret.error_code(), ret.error_message()); + LOG_FMT_DEBUG(log, "Handle batch commands request done: {}, {}", ret.error_code(), ret.error_message()); } return grpc::Status::OK; @@ -457,17 +456,17 @@ std::tuple FlashService::createDBContext(const grpc::S } catch (Exception & e) { - LOG_FMT_ERROR(log, "{}: DB Exception: {}", __PRETTY_FUNCTION__, e.message()); + LOG_FMT_ERROR(log, "DB Exception: {}", e.message()); return std::make_tuple(std::make_shared(server.context()), grpc::Status(tiflashErrorCodeToGrpcStatusCode(e.code()), e.message())); } catch (const std::exception & e) { - LOG_FMT_ERROR(log, "{}: std exception: {}", __PRETTY_FUNCTION__, e.what()); + LOG_FMT_ERROR(log, "std exception: {}", e.what()); return std::make_tuple(std::make_shared(server.context()), grpc::Status(grpc::StatusCode::INTERNAL, e.what())); } catch (...) { - LOG_FMT_ERROR(log, "{}: other exception", __PRETTY_FUNCTION__); + LOG_FMT_ERROR(log, "other exception"); return std::make_tuple(std::make_shared(server.context()), grpc::Status(grpc::StatusCode::INTERNAL, "other exception")); } } diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index 3ae7a11bd63..75796af6780 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -115,7 +115,7 @@ class AsyncRequestHandler : public UnaryCallback // handle will be called by ExchangeReceiver::reactor. void handle() { - LOG_FMT_TRACE(log, "Enter {}. stage: {}", __PRETTY_FUNCTION__, stage); + LOG_FMT_TRACE(log, "stage: {}", stage); switch (stage) { case AsyncRequestStage::WAIT_MAKE_READER: diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 707366e1799..0f18ad582b4 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -154,8 +154,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) TablesRegionsInfo tables_regions_info = TablesRegionsInfo::create(task_request.regions(), task_request.table_regions(), tmt_context); LOG_FMT_DEBUG( log, - "{}: Handling {} regions from {} physical tables in MPP task", - __PRETTY_FUNCTION__, + "Handling {} regions from {} physical tables in MPP task", tables_regions_info.regionCount(), tables_regions_info.tableCount()); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp index 9a4cfdcafd3..928bf8a6415 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp @@ -117,7 +117,7 @@ DMFilePtr DMFile::create(UInt64 file_id, const String & parent_path, bool single if (file.exists()) { file.remove(true); - LOG_FMT_WARNING(log, "{}: Existing dmfile, removed: {}", __PRETTY_FUNCTION__, path); + LOG_FMT_WARNING(log, "Existing dmfile, removed: {}", path); } if (single_file_mode) { @@ -207,43 +207,43 @@ bool DMFile::isColIndexExist(const ColId & col_id) const } } -const String DMFile::encryptionBasePath() const +String DMFile::encryptionBasePath() const { return getPathByStatus(parent_path, file_id, DMFile::Status::READABLE); } -const EncryptionPath DMFile::encryptionDataPath(const FileNameBase & file_name_base) const +EncryptionPath DMFile::encryptionDataPath(const FileNameBase & file_name_base) const { return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : file_name_base + details::DATA_FILE_SUFFIX); } -const EncryptionPath DMFile::encryptionIndexPath(const FileNameBase & file_name_base) const +EncryptionPath DMFile::encryptionIndexPath(const FileNameBase & file_name_base) const { return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : file_name_base + details::INDEX_FILE_SUFFIX); } -const EncryptionPath DMFile::encryptionMarkPath(const FileNameBase & file_name_base) const +EncryptionPath DMFile::encryptionMarkPath(const FileNameBase & file_name_base) const { return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : file_name_base + details::MARK_FILE_SUFFIX); } -const EncryptionPath DMFile::encryptionMetaPath() const +EncryptionPath DMFile::encryptionMetaPath() const { return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : metaFileName()); } -const EncryptionPath DMFile::encryptionPackStatPath() const +EncryptionPath DMFile::encryptionPackStatPath() const { return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : packStatFileName()); } -const EncryptionPath DMFile::encryptionPackPropertyPath() const +EncryptionPath DMFile::encryptionPackPropertyPath() const { return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : packPropertyFileName()); } -const EncryptionPath DMFile::encryptionConfigurationPath() const +EncryptionPath DMFile::encryptionConfigurationPath() const { return EncryptionPath(encryptionBasePath(), isSingleFileMode() ? "" : configurationFileName()); } @@ -474,7 +474,7 @@ void DMFile::readPackStat(const FileProviderPtr & file_provider, const MetaPackI configuration->getChecksumAlgorithm(), configuration->getChecksumFrameLength()); buf->seek(meta_pack_info.pack_stat_offset); - if (sizeof(PackStat) * packs != buf->readBig((char *)pack_stats.data(), sizeof(PackStat) * packs)) + if (sizeof(PackStat) * packs != buf->readBig(reinterpret_cast(pack_stats.data()), sizeof(PackStat) * packs)) { throw Exception("Cannot read all data", ErrorCodes::CANNOT_READ_ALL_DATA); } @@ -483,7 +483,7 @@ void DMFile::readPackStat(const FileProviderPtr & file_provider, const MetaPackI { auto buf = openForRead(file_provider, path, encryptionPackStatPath(), meta_pack_info.pack_stat_size); buf.seek(meta_pack_info.pack_stat_offset); - if (sizeof(PackStat) * packs != buf.readBig((char *)pack_stats.data(), sizeof(PackStat) * packs)) + if (sizeof(PackStat) * packs != buf.readBig(reinterpret_cast(pack_stats.data()), sizeof(PackStat) * packs)) { throw Exception("Cannot read all data", ErrorCodes::CANNOT_READ_ALL_DATA); } @@ -620,13 +620,13 @@ void DMFile::finalizeForFolderMode(const FileProviderPtr & file_provider, const Poco::File file(new_path); if (file.exists()) { - LOG_FMT_WARNING(log, "{}: Existing dmfile, removing: {}", __PRETTY_FUNCTION__, new_path); + LOG_FMT_WARNING(log, "Existing dmfile, removing: {}", new_path); const String deleted_path = getPathByStatus(parent_path, file_id, Status::DROPPED); // no need to delete the encryption info associated with the dmfile path here. // because this dmfile path is still a valid path and no obsolete encryption info will be left. file.renameTo(deleted_path); file.remove(true); - LOG_FMT_WARNING(log, "{}: Existing dmfile, removed: {}", __PRETTY_FUNCTION__, deleted_path); + LOG_FMT_WARNING(log, "Existing dmfile, removed: {}", deleted_path); } old_file.renameTo(new_path); initializeSubFileStatsForFolderMode(); @@ -712,7 +712,7 @@ std::set DMFile::listAllInPath( const auto full_path = parent_path + "/" + name; if (Poco::File temp_file(full_path); temp_file.exists()) temp_file.remove(true); - LOG_FMT_WARNING(log, "{}: Existing temporary dmfile, removed: {}", __PRETTY_FUNCTION__, full_path); + LOG_FMT_WARNING(log, "Existing temporary dmfile, removed: {}", full_path); continue; } else if (startsWith(name, details::FOLDER_PREFIX_DROPPED)) @@ -732,7 +732,7 @@ std::set DMFile::listAllInPath( const auto full_path = parent_path + "/" + name; if (Poco::File del_file(full_path); del_file.exists()) del_file.remove(true); - LOG_FMT_WARNING(log, "{}: Existing dropped dmfile, removed: {}", __PRETTY_FUNCTION__, full_path); + LOG_FMT_WARNING(log, "Existing dropped dmfile, removed: {}", full_path); continue; } } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.h b/dbms/src/Storages/DeltaMerge/File/DMFile.h index ee7cf54b0d8..e74048d0196 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.h @@ -83,7 +83,7 @@ class DMFile : private boost::noncopyable case DROPPED: return "DROPPED"; default: - throw Exception("Unexpected status: " + DB::toString((int)status)); + throw Exception("Unexpected status: " + DB::toString(static_cast(status))); } } @@ -98,7 +98,7 @@ class DMFile : private boost::noncopyable size_t value; public: - ReadMetaMode(size_t value_) + explicit ReadMetaMode(size_t value_) : value(value_) {} @@ -179,8 +179,7 @@ class DMFile : private boost::noncopyable DMSingleFileFormatVersion file_format_version; Footer() - : meta_pack_info() - , sub_file_stat_offset(0) + : sub_file_stat_offset(0) , sub_file_num(0) , file_format_version(DMSingleFileFormatVersion::SINGLE_FILE_VERSION_BASE) {} @@ -227,7 +226,7 @@ class DMFile : private boost::noncopyable size_t getRows() const { size_t rows = 0; - for (auto & s : pack_stats) + for (const auto & s : pack_stats) rows += s.rows; return rows; } @@ -235,7 +234,7 @@ class DMFile : private boost::noncopyable size_t getBytes() const { size_t bytes = 0; - for (auto & s : pack_stats) + for (const auto & s : pack_stats) bytes += s.bytes; return bytes; } @@ -331,14 +330,14 @@ class DMFile : private boost::noncopyable bool isColIndexExist(const ColId & col_id) const; - const String encryptionBasePath() const; - const EncryptionPath encryptionDataPath(const FileNameBase & file_name_base) const; - const EncryptionPath encryptionIndexPath(const FileNameBase & file_name_base) const; - const EncryptionPath encryptionMarkPath(const FileNameBase & file_name_base) const; - const EncryptionPath encryptionMetaPath() const; - const EncryptionPath encryptionPackStatPath() const; - const EncryptionPath encryptionPackPropertyPath() const; - const EncryptionPath encryptionConfigurationPath() const; + String encryptionBasePath() const; + EncryptionPath encryptionDataPath(const FileNameBase & file_name_base) const; + EncryptionPath encryptionIndexPath(const FileNameBase & file_name_base) const; + EncryptionPath encryptionMarkPath(const FileNameBase & file_name_base) const; + EncryptionPath encryptionMetaPath() const; + EncryptionPath encryptionPackStatPath() const; + EncryptionPath encryptionPackPropertyPath() const; + EncryptionPath encryptionConfigurationPath() const; static FileNameBase getFileNameBase(ColId col_id, const IDataType::SubstreamPath & substream = {}) { diff --git a/dbms/src/Storages/DeltaMerge/SchemaUpdate.cpp b/dbms/src/Storages/DeltaMerge/SchemaUpdate.cpp index 752d53f0152..0faa601b335 100644 --- a/dbms/src/Storages/DeltaMerge/SchemaUpdate.cpp +++ b/dbms/src/Storages/DeltaMerge/SchemaUpdate.cpp @@ -36,8 +36,8 @@ String astToDebugString(const IAST * const ast) // Useless for production env void setColumnDefineDefaultValue(const AlterCommand & command, ColumnDefine & define) { - std::function castDefaultValue; // for lazy bind - castDefaultValue = [&](const Field & value, const DataTypePtr & type) -> Field { + std::function cast_default_value; // for lazy bind + cast_default_value = [&](const Field & value, const DataTypePtr & type) -> Field { switch (type->getTypeId()) { case TypeIndex::Float32: @@ -93,7 +93,7 @@ void setColumnDefineDefaultValue(const AlterCommand & command, ColumnDefine & de time_t time = 0; ReadBufferFromMemory buf(date.data(), date.size()); readDateTimeText(time, buf); - return toField((Int64)time); + return toField(static_cast(time)); } case TypeIndex::Decimal32: { @@ -138,7 +138,7 @@ void setColumnDefineDefaultValue(const AlterCommand & command, ColumnDefine & de return value; auto nullable = std::dynamic_pointer_cast(type); DataTypePtr nested_type = nullable->getNestedType(); - return castDefaultValue(value, nested_type); // Recursive call on nested type + return cast_default_value(value, nested_type); // Recursive call on nested type } default: throw Exception("Unsupported to setColumnDefineDefaultValue with data type: " + type->getName() @@ -153,12 +153,12 @@ void setColumnDefineDefaultValue(const AlterCommand & command, ColumnDefine & de // a cast function // change column_define.default_value - if (auto default_literal = typeid_cast(command.default_expression.get()); + if (const auto * default_literal = typeid_cast(command.default_expression.get()); default_literal && default_literal->value.getType() == Field::Types::String) { define.default_value = default_literal->value; } - else if (auto default_cast_expr = typeid_cast(command.default_expression.get()); + else if (const auto * default_cast_expr = typeid_cast(command.default_expression.get()); default_cast_expr && default_cast_expr->name == "CAST" /* ParserCastExpression::name */) { // eg. CAST('1.234' AS Float32); CAST(999 AS Int32) @@ -167,10 +167,10 @@ void setColumnDefineDefaultValue(const AlterCommand & command, ColumnDefine & de throw Exception("Unknown CAST expression in default expr", ErrorCodes::NOT_IMPLEMENTED); } - auto default_literal_in_cast = typeid_cast(default_cast_expr->arguments->children[0].get()); + const auto * default_literal_in_cast = typeid_cast(default_cast_expr->arguments->children[0].get()); if (default_literal_in_cast) { - Field default_value = castDefaultValue(default_literal_in_cast->value, define.type); + Field default_value = cast_default_value(default_literal_in_cast->value, define.type); define.default_value = default_value; } else @@ -192,14 +192,14 @@ void setColumnDefineDefaultValue(const AlterCommand & command, ColumnDefine & de { DB::Exception ex(e); ex.addMessage(fmt::format("(in setColumnDefineDefaultValue for default_expression: {})", astToDebugString(command.default_expression.get()))); - throw ex; + throw ex; // NOLINT } catch (std::exception & e) { DB::Exception ex( fmt::format("std::exception: {} (in setColumnDefineDefaultValue for default_expression: {})", e.what(), astToDebugString(command.default_expression.get())), ErrorCodes::LOGICAL_ERROR); - throw ex; + throw ex; // NOLINT } } } @@ -311,7 +311,7 @@ void applyAlter(ColumnDefines & table_columns, } else { - LOG_FMT_WARNING(log, "{} receive unknown alter command, type: {}", __PRETTY_FUNCTION__, static_cast(command.type)); + LOG_FMT_WARNING(log, "receive unknown alter command, type: {}", static_cast(command.type)); } } diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index 265bba6b065..8a7df489f20 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -297,7 +297,7 @@ void KVStore::handleDestroy(UInt64 region_id, TMTContext & tmt, const KVStoreTas const auto region = getRegion(region_id); if (region == nullptr) { - LOG_FMT_INFO(log, "{}: [region {}] is not found, might be removed already", __PRETTY_FUNCTION__, region_id); + LOG_FMT_INFO(log, "[region {}] is not found, might be removed already", region_id); return; } LOG_FMT_INFO(log, "Handle destroy {}", region->toString()); @@ -423,8 +423,7 @@ EngineStoreApplyRes KVStore::handleAdminRaftCmd(raft_cmdpb::AdminRequest && requ if (curr_region_ptr == nullptr) { LOG_FMT_WARNING(log, - "{}: [region {}] is not found at [term {}, index {}, cmd {}], might be removed already", - __PRETTY_FUNCTION__, + "[region {}] is not found at [term {}, index {}, cmd {}], might be removed already", curr_region_id, term, index, diff --git a/dbms/src/Storages/Transaction/SchemaBuilder.cpp b/dbms/src/Storages/Transaction/SchemaBuilder.cpp index 088c9d40d53..1ed8b33d415 100644 --- a/dbms/src/Storages/Transaction/SchemaBuilder.cpp +++ b/dbms/src/Storages/Transaction/SchemaBuilder.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -331,22 +332,24 @@ void SchemaBuilder::applyAlterPhysicalTable(DBInfoPtr db_inf return; } - std::stringstream ss; - ss << "Detected schema changes: " << name_mapper.debugCanonicalName(*db_info, *table_info) << ": "; - for (const auto & schema_change : schema_changes) - for (const auto & command : schema_change.first) - { - if (command.type == AlterCommand::ADD_COLUMN) - ss << "ADD COLUMN " << command.column_name << " " << command.data_type->getName() << ", "; - else if (command.type == AlterCommand::DROP_COLUMN) - ss << "DROP COLUMN " << command.column_name << ", "; - else if (command.type == AlterCommand::MODIFY_COLUMN) - ss << "MODIFY COLUMN " << command.column_name << " " << command.data_type->getName() << ", "; - else if (command.type == AlterCommand::RENAME_COLUMN) - ss << "RENAME COLUMN from " << command.column_name << " to " << command.new_column_name << ", "; - } - - LOG_FMT_DEBUG(log, "{} : {}", __PRETTY_FUNCTION__, ss.str()); + auto log_str = [&]() { + FmtBuffer fmt_buf; + fmt_buf.fmtAppend("Detected schema changes: {}: ", name_mapper.debugCanonicalName(*db_info, *table_info)); + for (const auto & schema_change : schema_changes) + for (const auto & command : schema_change.first) + { + if (command.type == AlterCommand::ADD_COLUMN) + fmt_buf.fmtAppend("ADD COLUMN {} {},", command.column_name, command.data_type->getName()); + else if (command.type == AlterCommand::DROP_COLUMN) + fmt_buf.fmtAppend("DROP COLUMN {}, ", command.column_name); + else if (command.type == AlterCommand::MODIFY_COLUMN) + fmt_buf.fmtAppend("MODIFY COLUMN {} {}, ", command.column_name, command.data_type->getName()); + else if (command.type == AlterCommand::RENAME_COLUMN) + fmt_buf.fmtAppend("RENAME COLUMN from {} to {}, ", command.column_name, command.new_column_name); + } + return fmt_buf.toString(); + }; + LOG_DEBUG(log, log_str()); /// Update metadata, through calling alterFromTiDB. // Using original table info with updated columns instead of using new_table_info directly, From 661d205314c1465470fbdde4baff095e640faf88 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Sat, 2 Apr 2022 17:46:30 +0800 Subject: [PATCH 03/79] Remove useless TODO in `dbms/src/Storages/Page/V3/tests/gtest_wal_log.cpp` (#4572) close pingcap/tiflash#4571 --- dbms/src/Storages/Page/V3/tests/gtest_wal_log.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_log.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_log.cpp index fff43f2681f..0f1406f57fc 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_log.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_log.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// TODO: Add copyright for PingCAP // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License From e235089ac6fc748f5af7864e4d3a6d607b2e1972 Mon Sep 17 00:00:00 2001 From: Fu Zhe Date: Mon, 4 Apr 2022 11:02:30 +0800 Subject: [PATCH 04/79] Agg: replace the usage of `ThreadPool` by `ThreadPoolManager`. (#4582) close pingcap/tiflash#4581 --- dbms/src/Interpreters/Aggregator.cpp | 26 +++++++++++++++----------- dbms/src/Interpreters/Aggregator.h | 11 ++++++++--- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index b640a7c8a96..ed640ce5d08 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1122,10 +1122,14 @@ Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_v } -BlocksList Aggregator::prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const +BlocksList Aggregator::prepareBlocksAndFillTwoLevel( + AggregatedDataVariants & data_variants, + bool final, + ThreadPoolManager * thread_pool, + size_t max_threads) const { #define M(NAME) \ - else if (data_variants.type == AggregatedDataVariants::Type::NAME) return prepareBlocksAndFillTwoLevelImpl(data_variants, *data_variants.NAME, final, thread_pool); + else if (data_variants.type == AggregatedDataVariants::Type::NAME) return prepareBlocksAndFillTwoLevelImpl(data_variants, *data_variants.NAME, final, thread_pool, max_threads); if (false) // NOLINT { @@ -1141,9 +1145,9 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( AggregatedDataVariants & data_variants, Method & method, bool final, - ThreadPool * thread_pool) const + ThreadPoolManager * thread_pool, + size_t max_threads) const { - size_t max_threads = thread_pool ? thread_pool->size() : 1; if (max_threads > data_variants.aggregates_pools.size()) for (size_t i = data_variants.aggregates_pools.size(); i < max_threads; ++i) data_variants.aggregates_pools.push_back(std::make_shared()); @@ -1181,7 +1185,7 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( [thread_id, &converter] { return converter(thread_id); }); if (thread_pool) - thread_pool->schedule(wrapInvocable(true, [thread_id, &tasks] { tasks[thread_id](); })); + thread_pool->schedule(true, [thread_id, &tasks] { tasks[thread_id](); }); else tasks[thread_id](); } @@ -1227,10 +1231,10 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b if (data_variants.empty()) return blocks; - std::unique_ptr thread_pool; + std::shared_ptr thread_pool; if (max_threads > 1 && data_variants.sizeWithoutOverflowRow() > 100000 /// TODO Make a custom threshold. && data_variants.isTwoLevel()) /// TODO Use the shared thread pool with the `merge` function. - thread_pool = std::make_unique(max_threads); + thread_pool = newThreadPoolManager(max_threads); if (isCancelled()) return BlocksList(); @@ -1249,7 +1253,7 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b if (!data_variants.isTwoLevel()) blocks.emplace_back(prepareBlockAndFillSingleLevel(data_variants, final)); else - blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, thread_pool.get())); + blocks.splice(blocks.end(), prepareBlocksAndFillTwoLevel(data_variants, final, thread_pool.get(), max_threads)); } if (!final) @@ -2002,10 +2006,10 @@ void Aggregator::mergeStream(const BlockInputStreamPtr & stream, AggregatedDataV } }; - std::unique_ptr thread_pool; + std::shared_ptr thread_pool; if (max_threads > 1 && total_input_rows > 100000 /// TODO Make a custom threshold. && has_two_level) - thread_pool = std::make_unique(max_threads); + thread_pool = newThreadPoolManager(max_threads); for (const auto & bucket_blocks : bucket_to_blocks) { @@ -2022,7 +2026,7 @@ void Aggregator::mergeStream(const BlockInputStreamPtr & stream, AggregatedDataV }; if (thread_pool) - thread_pool->schedule(wrapInvocable(true, task)); + thread_pool->schedule(true, task); else task(); } diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index f78a5bb139d..672a0951465 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include #include #include @@ -1048,14 +1048,19 @@ class Aggregator Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; Block prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const; - BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; + BlocksList prepareBlocksAndFillTwoLevel( + AggregatedDataVariants & data_variants, + bool final, + ThreadPoolManager * thread_pool, + size_t max_threads) const; template BlocksList prepareBlocksAndFillTwoLevelImpl( AggregatedDataVariants & data_variants, Method & method, bool final, - ThreadPool * thread_pool) const; + ThreadPoolManager * thread_pool, + size_t max_threads) const; template void mergeStreamsImplCase( From b0121627f42e7195271807f3a600fe2c36cae26f Mon Sep 17 00:00:00 2001 From: SeaRise Date: Wed, 6 Apr 2022 18:46:32 +0800 Subject: [PATCH 05/79] Tiflash crash when scanning partition table with `time` column (#4563) close pingcap/tiflash#4526 --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 4 +- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 3 +- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 11 ++- .../Coprocessor/DAGQueryBlockInterpreter.h | 6 +- .../query/mpp/partition_table.test | 93 +++++++++++-------- .../mpp/partition_table_with_time.test | 35 +++++++ 6 files changed, 106 insertions(+), 46 deletions(-) create mode 100644 tests/fullstack-test/mpp/partition_table_with_time.test diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 594f5a47158..95e3655d9d7 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -616,11 +616,11 @@ bool DAGExpressionAnalyzer::buildExtraCastsAfterTS( bool DAGExpressionAnalyzer::appendExtraCastsAfterTS( ExpressionActionsChain & chain, const std::vector & need_cast_column, - const tipb::TableScan & table_scan) + const TiDBTableScan & table_scan) { auto & step = initAndGetLastStep(chain); - bool has_cast = buildExtraCastsAfterTS(step.actions, need_cast_column, table_scan.columns()); + bool has_cast = buildExtraCastsAfterTS(step.actions, need_cast_column, table_scan.getColumns()); for (auto & col : source_columns) step.required_output.push_back(col.name); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 52729176c66..eec05d1427f 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +123,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable bool appendExtraCastsAfterTS( ExpressionActionsChain & chain, const std::vector & need_cast_column, - const tipb::TableScan & table_scan); + const TiDBTableScan & table_scan); /// return true if some actions is needed bool appendJoinKeyAndJoinFilters( diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index fe798020f69..6c3c6700577 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -93,7 +93,7 @@ bool addExtraCastsAfterTs( DAGExpressionAnalyzer & analyzer, const std::vector & need_cast_column, ExpressionActionsChain & chain, - const tipb::TableScan & table_scan) + const TiDBTableScan & table_scan) { bool has_need_cast_column = false; for (auto b : need_cast_column) @@ -309,7 +309,11 @@ void DAGQueryBlockInterpreter::handleTableScan(const TiDBTableScan & table_scan, FAIL_POINT_PAUSE(FailPoints::pause_after_copr_streams_acquired); /// handle timezone/duration cast for local and remote table scan. - executeCastAfterTableScan(storage_interpreter.is_need_add_cast_column, remote_read_streams_start_index, pipeline); + executeCastAfterTableScan( + table_scan, + storage_interpreter.is_need_add_cast_column, + remote_read_streams_start_index, + pipeline); recordProfileStreams(pipeline, query_block.source_name); /// handle pushed down filter for local and remote table scan. @@ -356,6 +360,7 @@ void DAGQueryBlockInterpreter::executePushedDownFilter( } void DAGQueryBlockInterpreter::executeCastAfterTableScan( + const TiDBTableScan & table_scan, const std::vector & is_need_add_cast_column, size_t remote_read_streams_start_index, DAGPipeline & pipeline) @@ -366,7 +371,7 @@ void DAGQueryBlockInterpreter::executeCastAfterTableScan( analyzer->initChain(chain, original_source_columns); // execute timezone cast or duration cast if needed for local table scan - if (addExtraCastsAfterTs(*analyzer, is_need_add_cast_column, chain, query_block.source->tbl_scan())) + if (addExtraCastsAfterTs(*analyzer, is_need_add_cast_column, chain, table_scan)) { ExpressionActionsPtr extra_cast = chain.getLastActions(); chain.finalize(); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 0e3b7597e7e..5325b76eec6 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -58,7 +58,11 @@ class DAGQueryBlockInterpreter private: void executeImpl(DAGPipeline & pipeline); void handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); - void executeCastAfterTableScan(const std::vector & is_need_add_cast_column, size_t remote_read_streams_start_index, DAGPipeline & pipeline); + void executeCastAfterTableScan( + const TiDBTableScan & table_scan, + const std::vector & is_need_add_cast_column, + size_t remote_read_streams_start_index, + DAGPipeline & pipeline); void executePushedDownFilter(const std::vector & conditions, size_t remote_read_streams_start_index, DAGPipeline & pipeline); void handleJoin(const tipb::Join & join, DAGPipeline & pipeline, SubqueryForSet & right_query); void prepareJoin( diff --git a/tests/delta-merge-test/query/mpp/partition_table.test b/tests/delta-merge-test/query/mpp/partition_table.test index 6364d6a10ef..6ac6de5fb5e 100644 --- a/tests/delta-merge-test/query/mpp/partition_table.test +++ b/tests/delta-merge-test/query/mpp/partition_table.test @@ -24,7 +24,7 @@ => DBGInvoke __set_flush_threshold(1000000, 1000000) # Data. -=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64') +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64, col_3 MyDatetime') => DBGInvoke __mock_tidb_partition(default, test, 9997) => DBGInvoke __mock_tidb_partition(default, test, 9998) => DBGInvoke __mock_tidb_partition(default, test, 9999) @@ -38,21 +38,22 @@ # query on empty table => DBGInvoke tidb_query('select count(col_1) from default.test group by col_2', 4,'mpp_query:true,mpp_partition_num:3') +=> DBGInvoke tidb_query('select count(col_1) from default.test group by col_3', 4,'mpp_query:true,mpp_partition_num:3') -=> DBGInvoke __raft_insert_row(default, test, 1, 50, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 1, 51, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 1, 52, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 1, 53, 'test4', 888) +=> DBGInvoke __raft_insert_row(default, test, 1, 50, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 1, 51, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 1, 52, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 1, 53, 'test4', 888, '2022-03-31 11:33:33') -=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 4, 51, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 4, 52, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 4, 53, 'test4', 888) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 4, 51, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 4, 52, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 4, 53, 'test4', 888, '2022-03-31 11:33:33') -=> DBGInvoke __raft_insert_row(default, test, 7, 50, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 7, 51, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 7, 52, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 7, 53, 'test4', 888) +=> DBGInvoke __raft_insert_row(default, test, 7, 50, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 7, 51, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 7, 52, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 7, 53, 'test4', 888, '2022-03-31 11:33:33') # query on table that some partition does not contains region => DBGInvoke tidb_query('select count(col_1), col_2 from default.test group by col_2', 4,'mpp_query:true,mpp_partition_num:3') @@ -62,6 +63,13 @@ │ 3 │ 888 │ └─────────────────────┴─────────────────────┘ +=> DBGInvoke tidb_query('select count(col_1), col_3 from default.test group by col_3', 4,'mpp_query:true,mpp_partition_num:3') +┌─exchange_receiver_0─┬─exchange_receiver_1─┐ +│ 6 │ 2022-03-31 11:11:11 │ +│ 3 │ 2022-03-31 11:22:22 │ +│ 3 │ 2022-03-31 11:33:33 │ +└─────────────────────┴─────────────────────┘ + # add more regions => DBGInvoke __put_region(2, 100, 200, default, test, 9997) => DBGInvoke __put_region(3, 200, 300, default, test, 9997) @@ -72,32 +80,32 @@ => DBGInvoke __put_region(8, 100, 200, default, test, 9999) => DBGInvoke __put_region(9, 200, 300, default, test, 9999) -=> DBGInvoke __raft_insert_row(default, test, 2, 150, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 2, 151, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 2, 152, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 2, 153, 'test4', 888) -=> DBGInvoke __raft_insert_row(default, test, 3, 250, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 3, 251, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 3, 252, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 3, 253, 'test4', 888) - -=> DBGInvoke __raft_insert_row(default, test, 5, 150, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 5, 151, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 5, 152, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 5, 153, 'test4', 888) -=> DBGInvoke __raft_insert_row(default, test, 6, 250, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 6, 251, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 6, 252, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 6, 253, 'test4', 888) - -=> DBGInvoke __raft_insert_row(default, test, 8, 150, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 8, 151, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 8, 152, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 8, 153, 'test4', 888) -=> DBGInvoke __raft_insert_row(default, test, 9, 250, 'test1', 666) -=> DBGInvoke __raft_insert_row(default, test, 9, 251, 'test2', 666) -=> DBGInvoke __raft_insert_row(default, test, 9, 252, 'test3', 777) -=> DBGInvoke __raft_insert_row(default, test, 9, 253, 'test4', 888) +=> DBGInvoke __raft_insert_row(default, test, 2, 150, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 2, 151, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 2, 152, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 2, 153, 'test4', 888, '2022-03-31 11:33:33') +=> DBGInvoke __raft_insert_row(default, test, 3, 250, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 3, 251, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 3, 252, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 3, 253, 'test4', 888, '2022-03-31 11:33:33') + +=> DBGInvoke __raft_insert_row(default, test, 5, 150, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 5, 151, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 5, 152, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 5, 153, 'test4', 888, '2022-03-31 11:33:33') +=> DBGInvoke __raft_insert_row(default, test, 6, 250, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 6, 251, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 6, 252, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 6, 253, 'test4', 888, '2022-03-31 11:33:33') + +=> DBGInvoke __raft_insert_row(default, test, 8, 150, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 8, 151, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 8, 152, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 8, 153, 'test4', 888, '2022-03-31 11:33:33') +=> DBGInvoke __raft_insert_row(default, test, 9, 250, 'test1', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 9, 251, 'test2', 666, '2022-03-31 11:11:11') +=> DBGInvoke __raft_insert_row(default, test, 9, 252, 'test3', 777, '2022-03-31 11:22:22') +=> DBGInvoke __raft_insert_row(default, test, 9, 253, 'test4', 888, '2022-03-31 11:33:33') # query on table that every partition contains region => DBGInvoke tidb_query('select count(col_1), col_2 from default.test group by col_2', 4,'mpp_query:true,mpp_partition_num:3') @@ -107,6 +115,13 @@ │ 9 │ 888 │ └─────────────────────┴─────────────────────┘ +=> DBGInvoke tidb_query('select count(col_1), col_3 from default.test group by col_3', 4,'mpp_query:true,mpp_partition_num:3') +┌─exchange_receiver_0─┬─exchange_receiver_1─┐ +│ 18 │ 2022-03-31 11:11:11 │ +│ 9 │ 2022-03-31 11:22:22 │ +│ 9 │ 2022-03-31 11:33:33 │ +└─────────────────────┴─────────────────────┘ + # Clean up. => DBGInvoke __drop_tidb_table(default, test) => drop table if exists default.test diff --git a/tests/fullstack-test/mpp/partition_table_with_time.test b/tests/fullstack-test/mpp/partition_table_with_time.test new file mode 100644 index 00000000000..0130931322b --- /dev/null +++ b/tests/fullstack-test/mpp/partition_table_with_time.test @@ -0,0 +1,35 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Preparation. +=> DBGInvoke __init_fail_point() + +mysql> drop table if exists test.t; +mysql> create table test.t (col1 smallint(28) unsigned DEFAULT 0, col2 time DEFAULT NULL) PARTITION BY RANGE (col1) ( PARTITION p0 VALUES LESS THAN (2), PARTITION p3 VALUES LESS THAN MAXVALUE); +mysql> insert into test.t values (1, 111111), (2, 222222), (3, 333333); +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +mysql> set tidb_allow_mpp=1; set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; set tidb_partition_prune_mode= dynamic; select col2 from test.t order by col1; ++----------+ +| col2 | ++----------+ +| 11:11:11 | +| 22:22:22 | +| 33:33:33 | ++----------+ + +# Clean up. +mysql> drop table if exists test.t From 467ffa48f27af807a732dedf1579e17977d31746 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Wed, 6 Apr 2022 19:24:31 +0800 Subject: [PATCH 06/79] doc: add instructions for runing unit tests (#4576) close pingcap/tiflash#4574 --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a222cd66d8..05def1eb76c 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,16 @@ cmake $WORKSPACE/tiflash -DCMAKE_BUILD_TYPE=DEBUG ## Run Unit Tests -TBD. +To run unit tests, you need to build with `-DCMAKE_BUILD_TYPE=DEBUG`: + +```shell +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=DEBUG +ninja gtests_dbms +ninja gtests_libcommon +ninja gtests_libdaemon +``` +And the unit-test executables are at `$BUILD/dbms/gtests_dbms`, `$BUILD/libs/libcommon/src/tests/gtests_libcommon` and `$BUILD/libs/libdaemon/src/tests/gtests_libdaemon`. ## Run Integration Tests From 924ea6d67f8931a36523cb1db6cbcbbe4161c740 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Wed, 6 Apr 2022 20:10:31 +0800 Subject: [PATCH 07/79] Add a stale snapshots check, also add some logs (#4537) ref pingcap/tiflash#3594 --- dbms/src/Common/Stopwatch.h | 23 +++++ dbms/src/Storages/Page/V3/BlobStore.cpp | 85 ++++++++++++++++++- dbms/src/Storages/Page/V3/PageDirectory.cpp | 65 +++++++++++++- dbms/src/Storages/Page/V3/PageDirectory.h | 7 +- dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 46 +++++++++- dbms/src/Storages/Page/V3/WALStore.cpp | 61 ++++++++----- 6 files changed, 249 insertions(+), 38 deletions(-) diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index 9de9d03addf..aced9fced11 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -57,17 +57,21 @@ class Stopwatch void start() { start_ns = nanoseconds(); + last_ns = start_ns; is_running = true; } + void stop() { stop_ns = nanoseconds(); is_running = false; } + void reset() { start_ns = 0; stop_ns = 0; + last_ns = 0; is_running = false; } void restart() { start(); } @@ -75,9 +79,28 @@ class Stopwatch UInt64 elapsedMilliseconds() const { return elapsed() / 1000000UL; } double elapsedSeconds() const { return static_cast(elapsed()) / 1000000000ULL; } + UInt64 elapsedFromLastTime() + { + const auto now_ns = nanoseconds(); + if (is_running) + { + auto rc = now_ns - last_ns; + last_ns = now_ns; + return rc; + } + else + { + return stop_ns - last_ns; + } + }; + + UInt64 elapsedMillisecondsFromLastTime() { return elapsedFromLastTime() / 1000000UL; } + UInt64 elapsedSecondsFromLastTime() { return elapsedFromLastTime() / 1000000UL; } + private: UInt64 start_ns = 0; UInt64 stop_ns = 0; + UInt64 last_ns = 0; clockid_t clock_type; bool is_running = false; diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index bfce007feba..18f5b017e1b 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -251,9 +251,17 @@ void BlobStore::remove(const PageEntriesV3 & del_entries) // So if we can't use id find blob, just ignore it. if (stat) { - LOG_FMT_TRACE(log, "Blob begin to recalculate capability [blob_id={}]", blob_id); - auto lock = stat->lock(); - stat->recalculateCapacity(); + { + auto lock = stat->lock(); + stat->recalculateCapacity(); + } + LOG_FMT_TRACE(log, "Blob recalculated capability [blob_id={}] [max_cap={}] " + "[total_size={}] [valid_size={}] [valid_rate={}]", + blob_id, + stat->sm_max_caps, + stat->sm_total_size, + stat->sm_valid_size, + stat->sm_valid_rate); } } } @@ -565,10 +573,74 @@ BlobFilePtr BlobStore::read(BlobFileId blob_id, BlobFileOffset offset, char * bu } +struct BlobStoreGCInfo +{ + String toString() const + { + return fmt::format("{}. {}. {}. {}. ", + toTypeString("Read-Only Blob", 0), + toTypeString("No GC Blob", 1), + toTypeString("Full GC Blob", 2), + toTypeString("Truncated Blob", 3)); + } + + void appendToReadOnlyBlob(const BlobFileId blob_id, double valid_rate) + { + blob_gc_info[0].emplace_back(std::make_pair(blob_id, valid_rate)); + } + + void appendToNoNeedGCBlob(const BlobFileId blob_id, double valid_rate) + { + blob_gc_info[1].emplace_back(std::make_pair(blob_id, valid_rate)); + } + + void appendToNeedGCBlob(const BlobFileId blob_id, double valid_rate) + { + blob_gc_info[2].emplace_back(std::make_pair(blob_id, valid_rate)); + } + + void appendToTruncatedBlob(const BlobFileId blob_id, double valid_rate) + { + blob_gc_info[3].emplace_back(std::make_pair(blob_id, valid_rate)); + } + +private: + // 1. read only blob + // 2. no need gc blob + // 3. full gc blob + // 4. need truncate blob + std::vector> blob_gc_info[4]; + + String toTypeString(const std::string_view prefix, const size_t index) const + { + FmtBuffer fmt_buf; + + if (blob_gc_info[index].empty()) + { + fmt_buf.fmtAppend("{}: [null]", prefix); + } + else + { + fmt_buf.fmtAppend("{}: [", prefix); + fmt_buf.joinStr( + blob_gc_info[index].begin(), + blob_gc_info[index].end(), + [](const auto arg, FmtBuffer & fb) { + fb.fmtAppend("{}/{:.2f}", arg.first, arg.second); + }, + ", "); + fmt_buf.append("]"); + } + + return fmt_buf.toString(); + } +}; + std::vector BlobStore::getGCStats() { const auto stats_list = blob_stats.getStats(); std::vector blob_need_gc; + BlobStoreGCInfo blobstore_gc_info; for (const auto & [path, stats] : stats_list) { @@ -577,6 +649,7 @@ std::vector BlobStore::getGCStats() { if (stat->isReadOnly()) { + blobstore_gc_info.appendToReadOnlyBlob(stat->id, stat->sm_valid_rate); LOG_FMT_TRACE(log, "Current [blob_id={}] is read-only", stat->id); continue; } @@ -615,9 +688,11 @@ std::vector BlobStore::getGCStats() // Change current stat to read only stat->changeToReadOnly(); + blobstore_gc_info.appendToNeedGCBlob(stat->id, stat->sm_valid_rate); } else { + blobstore_gc_info.appendToNoNeedGCBlob(stat->id, stat->sm_valid_rate); LOG_FMT_TRACE(log, "Current [blob_id={}] valid rate is {:.2f}, No need to GC.", stat->id, stat->sm_valid_rate); } @@ -627,10 +702,14 @@ std::vector BlobStore::getGCStats() LOG_FMT_TRACE(log, "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", stat->id, stat->sm_total_size, right_margin); blobfile->truncate(right_margin); stat->sm_total_size = right_margin; + stat->sm_valid_rate = stat->sm_valid_size * 1.0 / stat->sm_total_size; + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); } } } + LOG_FMT_INFO(log, "BlobStore gc get status done. gc info: {}", blobstore_gc_info.toString()); + return blob_need_gc; } diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index e680bb850be..de9ec0cc03e 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -1045,6 +1045,8 @@ void PageDirectory::gcApply(PageEntriesEdit && migrated_edit, const WriteLimiter const auto & versioned_entries = iter->second; versioned_entries->createNewEntry(record.version, record.entry); } + + LOG_FMT_INFO(log, "GC apply done. [edit size={}]", migrated_edit.size()); } std::set PageDirectory::getAliveExternalIds(NamespaceId ns_id) const @@ -1086,13 +1088,19 @@ PageDirectory::getEntriesByBlobIds(const std::vector & blob_ids) con return {blob_versioned_entries, total_page_size}; } + UInt64 total_page_nums = 0; while (true) { // `iter` is an iter that won't be invalid cause by `apply`/`gcApply`. // do scan on the version list without lock on `mvcc_table_directory`. auto page_id = iter->first; const auto & version_entries = iter->second; - total_page_size += version_entries->getEntriesByBlobIds(blob_id_set, page_id, blob_versioned_entries); + auto single_page_size = version_entries->getEntriesByBlobIds(blob_id_set, page_id, blob_versioned_entries); + total_page_size += single_page_size; + if (single_page_size != 0) + { + total_page_nums++; + } { std::shared_lock read_lock(table_rw_mutex); @@ -1108,6 +1116,10 @@ PageDirectory::getEntriesByBlobIds(const std::vector & blob_ids) con throw Exception(fmt::format("Can't get any entries from [blob_id={}]", blob_id)); } } + + LOG_FMT_INFO(log, "Get entries by Blob ids done. [total_page_size={}] [total_page_nums={}]", // + total_page_size, // + total_page_nums); return std::make_pair(std::move(blob_versioned_entries), total_page_size); } @@ -1129,17 +1141,39 @@ PageEntriesV3 PageDirectory::gcInMemEntries() { UInt64 lowest_seq = sequence.load(); + UInt64 invalid_snapshot_nums = 0; + UInt64 valid_snapshot_nums = 0; + UInt64 longest_alive_snapshot_time = 0; + UInt64 longest_alive_snapshot_seq = 0; + UInt64 stale_snapshot_nums = 0; { // Cleanup released snapshots std::lock_guard lock(snapshots_mutex); for (auto iter = snapshots.begin(); iter != snapshots.end(); /* empty */) { if (auto snap = iter->lock(); snap == nullptr) + { iter = snapshots.erase(iter); + invalid_snapshot_nums++; + } else { lowest_seq = std::min(lowest_seq, snap->sequence); ++iter; + valid_snapshot_nums++; + const auto alive_time_seconds = snap->elapsedSeconds(); + + if (alive_time_seconds > 10 * 60) // TODO: Make `10 * 60` as a configuration + { + LOG_FMT_WARNING(log, "Meet a stale snapshot [thread id={}] [tracing id={}] [seq={}] [alive time(s)={}]", snap->create_thread, snap->tracing_id, snap->sequence, alive_time_seconds); + stale_snapshot_nums++; + } + + if (longest_alive_snapshot_time < alive_time_seconds) + { + longest_alive_snapshot_time = alive_time_seconds; + longest_alive_snapshot_seq = snap->sequence; + } } } } @@ -1153,6 +1187,9 @@ PageEntriesV3 PageDirectory::gcInMemEntries() return all_del_entries; } + UInt64 invalid_page_nums = 0; + UInt64 valid_page_nums = 0; + // The page_id that we need to decrease ref count // { id_0: , id_1: <...>, ... } std::map> normal_entries_to_deref; @@ -1172,9 +1209,11 @@ PageEntriesV3 PageDirectory::gcInMemEntries() if (all_deleted) { iter = mvcc_table_directory.erase(iter); + invalid_page_nums++; } else { + valid_page_nums++; iter++; } @@ -1183,6 +1222,8 @@ PageEntriesV3 PageDirectory::gcInMemEntries() } } + UInt64 total_deref_counter = 0; + // Iterate all page_id that need to decrease ref count of specified version. for (const auto & [page_id, deref_counter] : normal_entries_to_deref) { @@ -1205,9 +1246,28 @@ PageEntriesV3 PageDirectory::gcInMemEntries() { std::unique_lock write_lock(table_rw_mutex); mvcc_table_directory.erase(iter); + invalid_page_nums++; + valid_page_nums--; } } + LOG_FMT_INFO(log, "After MVCC gc in memory [lowest_seq={}] " + "clean [invalid_snapshot_nums={}] [invalid_page_nums={}] " + "[total_deref_counter={}] [all_del_entries={}]. " + "Still exist [snapshot_nums={}], [page_nums={}]. " + "Longest alive snapshot: [longest_alive_snapshot_time={}] " + "[longest_alive_snapshot_seq={}] [stale_snapshot_nums={}]", + lowest_seq, + invalid_snapshot_nums, + invalid_page_nums, + total_deref_counter, + all_del_entries.size(), + valid_snapshot_nums, + valid_page_nums, + longest_alive_snapshot_time, + longest_alive_snapshot_seq, + stale_snapshot_nums); + return all_del_entries; } @@ -1237,7 +1297,8 @@ PageEntriesEdit PageDirectory::dumpSnapshotToEdit(PageDirectorySnapshotPtr snap) break; } } - // TODO: log down the sequence and time elapsed + + LOG_FMT_INFO(log, "Dumped snapshot to edits.[sequence={}]", snap->sequence); return edit; } diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index 1934ac7f817..d0cc6ffd313 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -65,13 +65,8 @@ class PageDirectorySnapshot : public DB::PageStorageSnapshot return diff.count(); } - unsigned getTid() const - { - return create_thread; - } - public: - UInt64 sequence; + const UInt64 sequence; const unsigned create_thread; const String tracing_id; diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index e771f74a41d..cefb20a4736 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -205,10 +205,10 @@ bool PageStorageImpl::gcImpl(bool /*not_skip*/, const WriteLimiterPtr & write_li if (!gc_is_running.compare_exchange_strong(v, true)) return false; - Stopwatch watch; + Stopwatch gc_watch; SCOPE_EXIT({ GET_METRIC(tiflash_storage_page_gc_count, type_v3).Increment(); - GET_METRIC(tiflash_storage_page_gc_duration_seconds, type_v3).Observe(watch.elapsedSeconds()); + GET_METRIC(tiflash_storage_page_gc_duration_seconds, type_v3).Observe(gc_watch.elapsedSeconds()); bool is_running = true; gc_is_running.compare_exchange_strong(is_running, false); }); @@ -226,26 +226,37 @@ bool PageStorageImpl::gcImpl(bool /*not_skip*/, const WriteLimiterPtr & write_li } }; - // 1. Do the MVCC gc, clean up expired snapshot. // And get the expired entries. if (page_directory->tryDumpSnapshot(write_limiter)) { GET_METRIC(tiflash_storage_page_gc_count, type_v3_mvcc_dumped).Increment(); } + const auto dump_snapshots_ms = gc_watch.elapsedMillisecondsFromLastTime(); + const auto & del_entries = page_directory->gcInMemEntries(); - LOG_FMT_DEBUG(log, "Remove entries from memory [num_entries={}]", del_entries.size()); + const auto gc_in_mem_entries_ms = gc_watch.elapsedMillisecondsFromLastTime(); // 2. Remove the expired entries in BlobStore. // It won't delete the data on the disk. // It will only update the SpaceMap which in memory. blob_store.remove(del_entries); + const auto blobstore_remove_entries_ms = gc_watch.elapsedMillisecondsFromLastTime(); // 3. Analyze the status of each Blob in order to obtain the Blobs that need to do `heavy GC`. // Blobs that do not need to do heavy GC will also do ftruncate to reduce space enlargement. const auto & blob_need_gc = blob_store.getGCStats(); + const auto blobstore_get_gc_stats_ms = gc_watch.elapsedMillisecondsFromLastTime(); if (blob_need_gc.empty()) { + LOG_FMT_INFO(log, "GC finished without any blob need full gc. [total time(ms)={}]" + " [dump snapshots(ms)={}] [gc in mem entries(ms)={}]" + " [blobstore remove entries(ms)={}] [blobstore get status(ms)={}]", + gc_watch.elapsedMilliseconds(), + dump_snapshots_ms, + gc_in_mem_entries_ms, + blobstore_remove_entries_ms, + blobstore_get_gc_stats_ms); clean_external_page(); return false; } @@ -259,8 +270,20 @@ bool PageStorageImpl::gcImpl(bool /*not_skip*/, const WriteLimiterPtr & write_li // We also need to filter the version of the entry. // So that the `gc_apply` can proceed smoothly. auto [blob_gc_info, total_page_size] = page_directory->getEntriesByBlobIds(blob_need_gc); + const auto gc_get_entries_ms = gc_watch.elapsedMillisecondsFromLastTime(); if (blob_gc_info.empty()) { + LOG_FMT_INFO(log, "GC finished without any entry need be moved. [total time(ms)={}]" + " [dump snapshots(ms)={}] [in mem entries(ms)={}]" + " [blobstore remove entries(ms)={}] [blobstore get status(ms)={}]" + " [get entries(ms)={}]", + gc_watch.elapsedMilliseconds(), + dump_snapshots_ms, + gc_in_mem_entries_ms, + blobstore_remove_entries_ms, + blobstore_get_gc_stats_ms, + gc_get_entries_ms); + clean_external_page(); return false; } @@ -269,6 +292,7 @@ bool PageStorageImpl::gcImpl(bool /*not_skip*/, const WriteLimiterPtr & write_li // After BlobStore GC, these entries will be migrated to a new blob. // Then we should notify MVCC apply the change. PageEntriesEdit gc_edit = blob_store.gc(blob_gc_info, total_page_size, write_limiter, read_limiter); + const auto blobstore_full_gc_ms = gc_watch.elapsedMillisecondsFromLastTime(); if (gc_edit.empty()) { throw Exception("Something wrong after BlobStore GC.", ErrorCodes::LOGICAL_ERROR); @@ -282,6 +306,20 @@ bool PageStorageImpl::gcImpl(bool /*not_skip*/, const WriteLimiterPtr & write_li // will be remained as "read-only" files while entries in them are useless in actual. // Those BlobFiles should be cleaned during next restore. page_directory->gcApply(std::move(gc_edit), write_limiter); + const auto gc_apply_ms = gc_watch.elapsedMillisecondsFromLastTime(); + LOG_FMT_INFO(log, "GC finished. [total time(ms)={}]" + " [dump snapshots(ms)={}] [gc in mem entries(ms)={}]" + " [blobstore remove entries(ms)={}] [blobstore get status(ms)={}]" + " [get gc entries(ms)={}] [blobstore full gc(ms)={}]" + " [gc apply(ms)={}]", + gc_watch.elapsedMilliseconds(), + dump_snapshots_ms, + gc_in_mem_entries_ms, + blobstore_remove_entries_ms, + blobstore_get_gc_stats_ms, + gc_get_entries_ms, + blobstore_full_gc_ms, + gc_apply_ms); clean_external_page(); diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 99056554fe0..8164ea21944 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -179,27 +179,26 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire return false; LOG_FMT_INFO(logger, "Saving directory snapshot"); - { - // Use {largest_log_num + 1, 1} to save the `edit` - const auto log_num = files_snap.persisted_log_files.rbegin()->log_num; - // Create a temporary file for saving directory snapshot - auto [compact_log, log_filename] = createLogWriter({log_num, 1}, /*manual_flush*/ true); - { - const String serialized = ser::serializeTo(directory_snap); - ReadBufferFromString payload(serialized); - compact_log->addRecord(payload, serialized.size()); - } - compact_log->flush(write_limiter); - compact_log.reset(); // close fd explicitly before renaming file. - - // Rename it to be a normal log file. - const auto temp_fullname = log_filename.fullname(LogFileStage::Temporary); - const auto normal_fullname = log_filename.fullname(LogFileStage::Normal); - LOG_FMT_INFO(logger, "Renaming log file to be normal [fullname={}]", temp_fullname); - auto f = Poco::File{temp_fullname}; - f.renameTo(normal_fullname); - LOG_FMT_INFO(logger, "Rename log file to normal done [fullname={}]", normal_fullname); - } + + // Use {largest_log_num + 1, 1} to save the `edit` + const auto log_num = files_snap.persisted_log_files.rbegin()->log_num; + // Create a temporary file for saving directory snapshot + auto [compact_log, log_filename] = createLogWriter({log_num, 1}, /*manual_flush*/ true); + + const String serialized = ser::serializeTo(directory_snap); + ReadBufferFromString payload(serialized); + + compact_log->addRecord(payload, serialized.size()); + compact_log->flush(write_limiter); + compact_log.reset(); // close fd explicitly before renaming file. + + // Rename it to be a normal log file. + const auto temp_fullname = log_filename.fullname(LogFileStage::Temporary); + const auto normal_fullname = log_filename.fullname(LogFileStage::Normal); + LOG_FMT_INFO(logger, "Renaming log file to be normal [fullname={}]", temp_fullname); + auto f = Poco::File{temp_fullname}; + f.renameTo(normal_fullname); + LOG_FMT_INFO(logger, "Rename log file to normal done [fullname={}]", normal_fullname); // #define ARCHIVE_COMPACTED_LOGS // keep for debug @@ -221,8 +220,24 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire #endif } } - // TODO: Log more information. duration, num entries, size of compact log file... - LOG_FMT_INFO(logger, "Save directory snapshot to log file done [num_compacts={}]", files_snap.persisted_log_files.size()); + + FmtBuffer fmt_buf; + fmt_buf.append("Dumped directory snapshot to log file done. [files_snapshot="); + + fmt_buf.joinStr( + files_snap.persisted_log_files.begin(), + files_snap.persisted_log_files.end(), + [](const auto & arg, FmtBuffer & fb) { + fb.fmtAppend("{}", arg.filename(arg.stage)); + }, + ", "); + fmt_buf.fmtAppend("] [num of records={}] [file={}] [size={}].", + directory_snap.size(), + normal_fullname, + serialized.size()); + + LOG_INFO(logger, fmt_buf.toString()); + return true; } From dec116f1e9c0caa870b704fad650a5dc1469d6ef Mon Sep 17 00:00:00 2001 From: JaySon Date: Fri, 8 Apr 2022 15:10:32 +0800 Subject: [PATCH 08/79] Tracing: Add read_tso as tracing id to PageStorage snapshot (#4288) close pingcap/tiflash#4287 --- dbms/src/Common/Logger.h | 28 ++++++++- .../Coprocessor/DAGStorageInterpreter.cpp | 9 +-- dbms/src/Server/tests/gtest_dttool.cpp | 3 +- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 6 +- dbms/src/Storages/DeltaMerge/DMContext.h | 8 ++- .../DMVersionFilterBlockInputStream.h | 22 ++++--- .../Storages/DeltaMerge/Delta/Snapshot.cpp | 3 +- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 48 +++++++++------ .../src/Storages/DeltaMerge/DeltaMergeStore.h | 9 +-- .../File/DMFileBlockInputStream.cpp | 4 +- .../DeltaMerge/File/DMFileBlockInputStream.h | 6 +- .../DeltaMerge/File/DMFilePackFilter.h | 11 ++-- .../Storages/DeltaMerge/File/DMFileReader.cpp | 6 +- .../Storages/DeltaMerge/File/DMFileReader.h | 6 +- .../DeltaMerge/FilterParser/FilterParser.cpp | 8 +-- .../DeltaMerge/FilterParser/FilterParser.h | 2 +- .../Storages/DeltaMerge/RowKeyRangeUtils.cpp | 7 ++- .../Storages/DeltaMerge/RowKeyRangeUtils.h | 8 ++- dbms/src/Storages/DeltaMerge/Segment.cpp | 14 +++-- .../Storages/DeltaMerge/SegmentReadTaskPool.h | 2 +- .../Storages/DeltaMerge/StableValueSpace.cpp | 34 ++++++----- .../tests/gtest_dm_delta_merge_store.cpp | 58 +++++++++++++++++-- .../tests/gtest_dm_minmax_index.cpp | 2 +- .../DeltaMerge/tools/workload/DTWorkload.cpp | 2 +- dbms/src/Storages/SelectQueryInfo.cpp | 27 +++++---- dbms/src/Storages/SelectQueryInfo.h | 12 ++-- dbms/src/Storages/StorageDeltaMerge.cpp | 29 ++++++---- dbms/src/Storages/StorageDeltaMerge.h | 2 +- dbms/src/Storages/StorageDeltaMergeHelpers.h | 9 ++- .../Storages/System/StorageSystemDTTables.cpp | 18 ++++-- .../Storages/tests/gtest_filter_parser.cpp | 6 +- 31 files changed, 273 insertions(+), 136 deletions(-) diff --git a/dbms/src/Common/Logger.h b/dbms/src/Common/Logger.h index 677723628ac..02aaa4d8cbe 100644 --- a/dbms/src/Common/Logger.h +++ b/dbms/src/Common/Logger.h @@ -50,8 +50,20 @@ class Logger : private boost::noncopyable return getInternal(source, buf, std::forward(first_identifier), std::forward(rest)...); } + template + static LoggerPtr get(Poco::Logger * source_log, T && first_identifier, Args &&... rest) + { + FmtBuffer buf; + return getInternal(source_log, buf, std::forward(first_identifier), std::forward(rest)...); + } + Logger(const std::string & source, const std::string & identifier) - : logger(&Poco::Logger::get(source)) + : Logger(&Poco::Logger::get(source), identifier) + { + } + + Logger(Poco::Logger * source_log, const std::string & identifier) + : logger(source_log) , id(identifier) { } @@ -114,6 +126,20 @@ class Logger : private boost::noncopyable return std::make_shared(source, buf.toString()); } + template + static LoggerPtr getInternal(Poco::Logger * source_log, FmtBuffer & buf, T && first, Args &&... args) + { + buf.fmtAppend("{} ", std::forward(first)); + return getInternal(source_log, buf, std::forward(args)...); + } + + template + static LoggerPtr getInternal(Poco::Logger * source_log, FmtBuffer & buf, T && identifier) + { + buf.fmtAppend("{}", std::forward(identifier)); + return std::make_shared(source_log, buf.toString()); + } + std::string wrapMsg(const std::string & msg) const { return fmt::format("{} {}", id, msg); diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index a7a8ca48ef5..0a3e6396ece 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -261,7 +261,7 @@ LearnerReadSnapshot DAGStorageInterpreter::doBatchCopLearnerRead() std::unordered_map DAGStorageInterpreter::generateSelectQueryInfos() { std::unordered_map ret; - auto create_query_info = [&]() -> SelectQueryInfo { + auto create_query_info = [&](Int64 table_id) -> SelectQueryInfo { SelectQueryInfo query_info; /// to avoid null point exception query_info.query = makeDummyQuery(); @@ -270,13 +270,14 @@ std::unordered_map DAGStorageInterpreter::generateSele analyzer->getPreparedSets(), analyzer->getCurrentInputColumns(), context.getTimezoneInfo()); + query_info.req_id = fmt::format("{} Table<{}>", log->identifier(), table_id); return query_info; }; if (table_scan.isPartitionTableScan()) { for (const auto physical_table_id : table_scan.getPhysicalTableIDs()) { - SelectQueryInfo query_info = create_query_info(); + SelectQueryInfo query_info = create_query_info(physical_table_id); query_info.mvcc_query_info = std::make_unique(mvcc_query_info->resolve_locks, mvcc_query_info->read_tso); ret.emplace(physical_table_id, std::move(query_info)); } @@ -292,8 +293,8 @@ std::unordered_map DAGStorageInterpreter::generateSele } else { - TableID table_id = logical_table_id; - SelectQueryInfo query_info = create_query_info(); + const TableID table_id = logical_table_id; + SelectQueryInfo query_info = create_query_info(table_id); query_info.mvcc_query_info = std::move(mvcc_query_info); ret.emplace(table_id, std::move(query_info)); } diff --git a/dbms/src/Server/tests/gtest_dttool.cpp b/dbms/src/Server/tests/gtest_dttool.cpp index 8499f2f863c..4ea163f0507 100644 --- a/dbms/src/Server/tests/gtest_dttool.cpp +++ b/dbms/src/Server/tests/gtest_dttool.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -287,4 +288,4 @@ TEST_F(DTToolTest, BlockwiseInvariant) EXPECT_EQ(new_prop_iter, refreshed_file->getPackProperties().property().end()); stream->readSuffix(); } -} \ No newline at end of file +} diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 8fe9e3ff825..9fe72839628 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -43,7 +43,7 @@ void ColumnFileBig::calculateStat(const DMContext & context) {}, context.db_context.getFileProvider(), context.getReadLimiter(), - /*tracing_logger*/ nullptr); + /*tracing_id*/ context.tracing_id); std::tie(valid_rows, valid_bytes) = pack_filter.validRowsAndBytes(); } @@ -87,7 +87,9 @@ void ColumnFileBigReader::initStream() return; DMFileBlockInputStreamBuilder builder(context.db_context); - file_stream = builder.build(column_file.getFile(), *col_defs, RowKeyRanges{column_file.segment_range}); + file_stream = builder + .setTracingID(context.tracing_id) + .build(column_file.getFile(), *col_defs, RowKeyRanges{column_file.segment_range}); // If we only need to read pk and version columns, then cache columns data in memory. if (pk_ver_only) diff --git a/dbms/src/Storages/DeltaMerge/DMContext.h b/dbms/src/Storages/DeltaMerge/DMContext.h index 30ae4c723d5..1caeb5a87a8 100644 --- a/dbms/src/Storages/DeltaMerge/DMContext.h +++ b/dbms/src/Storages/DeltaMerge/DMContext.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -24,6 +25,7 @@ namespace DB { class StoragePathPool; + namespace DM { class StoragePool; @@ -81,7 +83,7 @@ struct DMContext : private boost::noncopyable const bool enable_relevant_place; const bool enable_skippable_place; - const String query_id; + String tracing_id; public: DMContext(const Context & db_context_, @@ -93,7 +95,7 @@ struct DMContext : private boost::noncopyable bool is_common_handle_, size_t rowkey_column_size_, const DB::Settings & settings, - const String & query_id_ = "") + const String & tracing_id_ = "") : db_context(db_context_) , path_pool(path_pool_) , storage_pool(storage_pool_) @@ -117,7 +119,7 @@ struct DMContext : private boost::noncopyable , read_stable_only(settings.dt_read_stable_only) , enable_relevant_place(settings.dt_enable_relevant_place) , enable_skippable_place(settings.dt_enable_skippable_place) - , query_id(query_id_) + , tracing_id(tracing_id_) { } diff --git a/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h b/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h index 7cf83f08ab3..d512f140109 100644 --- a/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h @@ -15,6 +15,8 @@ #pragma once #include +#include +#include #include #include #include @@ -36,17 +38,20 @@ class DMVersionFilterBlockInputStream : public IBlockInputStream { static_assert(MODE == DM_VERSION_FILTER_MODE_MVCC || MODE == DM_VERSION_FILTER_MODE_COMPACT); + constexpr static const char * MVCC_FILTER_NAME = "DMVersionFilterBlockInputStream"; + constexpr static const char * COMPACT_FILTER_NAME = "DMVersionFilterBlockInputStream"; + public: DMVersionFilterBlockInputStream(const BlockInputStreamPtr & input, const ColumnDefines & read_columns, UInt64 version_limit_, bool is_common_handle_, - const String & query_id_ = "") + const String & tracing_id = "") : version_limit(version_limit_) , is_common_handle(is_common_handle_) , header(toEmptyBlock(read_columns)) - , query_id(query_id_) - , log(&Poco::Logger::get("DMVersionFilterBlockInputStream<" + String(MODE == DM_VERSION_FILTER_MODE_MVCC ? "MVCC" : "COMPACT") + ">")) + , log(Logger::get((MODE == DM_VERSION_FILTER_MODE_MVCC ? MVCC_FILTER_NAME : COMPACT_FILTER_NAME), + tracing_id)) { children.push_back(input); @@ -60,15 +65,17 @@ class DMVersionFilterBlockInputStream : public IBlockInputStream ~DMVersionFilterBlockInputStream() { LOG_FMT_DEBUG(log, - "Total rows: {}, pass: {:.2f}%, complete pass: {:.2f}%, complete not pass: {:.2f}%, not clean: {:.2f}%, effective: {:.2f}%, read tso: {}, query id: {}", + "Total rows: {}, pass: {:.2f}%" + ", complete pass: {:.2f}%, complete not pass: {:.2f}%" + ", not clean: {:.2f}%, effective: {:.2f}%" + ", read tso: {}", total_rows, passed_rows * 100.0 / total_rows, complete_passed * 100.0 / total_blocks, complete_not_passed * 100.0 / total_blocks, not_clean_rows * 100.0 / passed_rows, effective_num_rows * 100.0 / passed_rows, - version_limit, - (query_id.empty() ? "" : query_id)); + version_limit); } void readPrefix() override; @@ -192,7 +199,6 @@ class DMVersionFilterBlockInputStream : public IBlockInputStream const UInt64 version_limit; const bool is_common_handle; const Block header; - const String query_id; size_t handle_col_pos; size_t version_col_pos; @@ -230,7 +236,7 @@ class DMVersionFilterBlockInputStream : public IBlockInputStream size_t not_clean_rows = 0; size_t effective_num_rows = 0; - Poco::Logger * const log; + const LoggerPtr log; }; } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp b/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp index af1fdfec94f..cc161d0f4d9 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp @@ -38,8 +38,7 @@ DeltaSnapshotPtr DeltaValueSpace::createSnapshot(const DMContext & context, bool snap->is_update = for_update; snap->_delta = this->shared_from_this(); - // TODO: Add tracing_id from mpp task or background tasks - auto storage_snap = std::make_shared(context.storage_pool, context.getReadLimiter(), /*tracing_id*/ "", true); + auto storage_snap = std::make_shared(context.storage_pool, context.getReadLimiter(), context.tracing_id, /*snapshot_read*/ true); snap->persisted_files_snap = persisted_file_set->createSnapshot(storage_snap); snap->shared_delta_index = delta_index; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 25abdbbb07d..11ec13f25dd 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -102,7 +102,7 @@ namespace DM // MergeDeltaTaskPool // ================================================ -std::pair DeltaMergeStore::MergeDeltaTaskPool::tryAddTask(const BackgroundTask & task, const ThreadType & whom, const size_t max_task_num, Poco::Logger * log_) +std::pair DeltaMergeStore::MergeDeltaTaskPool::tryAddTask(const BackgroundTask & task, const ThreadType & whom, const size_t max_task_num, const LoggerPtr & log_) { std::scoped_lock lock(mutex); if (light_tasks.size() + heavy_tasks.size() >= max_task_num) @@ -142,7 +142,7 @@ std::pair DeltaMergeStore::MergeDeltaTaskPool::tryAddTask(const Back return std::make_pair(true, is_heavy); } -DeltaMergeStore::BackgroundTask DeltaMergeStore::MergeDeltaTaskPool::nextTask(bool is_heavy, Poco::Logger * log_) +DeltaMergeStore::BackgroundTask DeltaMergeStore::MergeDeltaTaskPool::nextTask(bool is_heavy, const LoggerPtr & log_) { std::scoped_lock lock(mutex); @@ -207,7 +207,7 @@ DeltaMergeStore::DeltaMergeStore(Context & db_context, , blockable_background_pool(db_context.getBlockableBackgroundPool()) , next_gc_check_key(is_common_handle ? RowKeyValue::COMMON_HANDLE_MIN_KEY : RowKeyValue::INT_HANDLE_MIN_KEY) , hash_salt(++DELTA_MERGE_STORE_HASH_SALT) - , log(&Poco::Logger::get("DeltaMergeStore[" + db_name + "." + table_name + "]")) + , log(Logger::get("DeltaMergeStore", fmt::format("{}.{}", db_name, table_name))) { LOG_FMT_INFO(log, "Restore DeltaMerge Store start [{}.{}]", db_name, table_name); @@ -442,7 +442,7 @@ void DeltaMergeStore::shutdown() LOG_FMT_TRACE(log, "Shutdown DeltaMerge end [{}.{}]", db_name, table_name); } -DMContextPtr DeltaMergeStore::newDMContext(const Context & db_context, const DB::Settings & db_settings, const String & query_id) +DMContextPtr DeltaMergeStore::newDMContext(const Context & db_context, const DB::Settings & db_settings, const String & tracing_id) { std::shared_lock lock(read_write_mutex); @@ -458,7 +458,7 @@ DMContextPtr DeltaMergeStore::newDMContext(const Context & db_context, const DB: is_common_handle, rowkey_column_size, db_settings, - query_id); + tracing_id); return DMContextPtr(ctx); } @@ -529,7 +529,7 @@ void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_ if (rows == 0) return; - auto dm_context = newDMContext(db_context, db_settings); + auto dm_context = newDMContext(db_context, db_settings, "write"); const auto bytes = block.bytes(); @@ -588,8 +588,8 @@ void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_ auto alloc_bytes = block.bytes(offset, limit); bool is_small = limit < dm_context->delta_cache_limit_rows / 4 && alloc_bytes < dm_context->delta_cache_limit_bytes / 4; - // Small column fies are appended to Delta Cache, then flushed later. - // While large column fies are directly written to PageStorage. + // Small column files are appended to Delta Cache, then flushed later. + // While large column files are directly written to PageStorage. if (is_small) { if (segment->writeToCache(*dm_context, block, offset, limit)) @@ -641,6 +641,7 @@ void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_ throw Exception("Fail point random_exception_after_dt_write_done is triggered.", ErrorCodes::FAIL_POINT_ERROR); }); + // TODO: Update the tracing_id before checkSegmentUpdate for (auto & segment : updated_segments) checkSegmentUpdate(dm_context, segment, ThreadType::Write); } @@ -854,6 +855,7 @@ void DeltaMergeStore::ingestFiles( flushCache(dm_context, range); + // TODO: Update the tracing_id before checkSegmentUpdate? for (auto & segment : updated_segments) checkSegmentUpdate(dm_context, segment, ThreadType::Write); } @@ -867,7 +869,7 @@ void DeltaMergeStore::deleteRange(const Context & db_context, const DB::Settings if (delete_range.none()) return; - auto dm_context = newDMContext(db_context, db_settings); + auto dm_context = newDMContext(db_context, db_settings, "delete_range"); Segments updated_segments; @@ -912,6 +914,7 @@ void DeltaMergeStore::deleteRange(const Context & db_context, const DB::Settings cur_range.setEnd(delete_range.end); } + // TODO: Update the tracing_id before checkSegmentUpdate? for (auto & segment : updated_segments) checkSegmentUpdate(dm_context, segment, ThreadType::Write); } @@ -954,7 +957,7 @@ void DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRa void DeltaMergeStore::mergeDeltaAll(const Context & context) { - auto dm_context = newDMContext(context, context.getSettingsRef()); + auto dm_context = newDMContext(context, context.getSettingsRef(), /*tracing_id*/ "mergeDeltaAll"); std::vector all_segments; { @@ -974,7 +977,7 @@ void DeltaMergeStore::mergeDeltaAll(const Context & context) void DeltaMergeStore::compact(const Context & db_context, const RowKeyRange & range) { - auto dm_context = newDMContext(db_context, db_context.getSettingsRef()); + auto dm_context = newDMContext(db_context, db_context.getSettingsRef(), /*tracing_id*/ "compact"); RowKeyRange cur_range = range; while (!cur_range.none()) @@ -1009,6 +1012,8 @@ void DeltaMergeStore::compact(const Context & db_context, const RowKeyRange & ra } } +// Read data without mvcc filtering && delete-range filtering. +// just for debug BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context, const DB::Settings & db_settings, const ColumnDefines & columns_to_read, @@ -1018,7 +1023,7 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context, { SegmentReadTasks tasks; - auto dm_context = newDMContext(db_context, db_settings, db_context.getCurrentQueryId()); + auto dm_context = newDMContext(db_context, db_settings, fmt::format("read_raw_{}", db_context.getCurrentQueryId())); { std::shared_lock lock(read_write_mutex); @@ -1073,17 +1078,21 @@ BlockInputStreams DeltaMergeStore::read(const Context & db_context, size_t num_streams, UInt64 max_version, const RSOperatorPtr & filter, + const String & tracing_id, size_t expected_block_size, const SegmentIdSet & read_segments, size_t extra_table_id_index) { - auto dm_context = newDMContext(db_context, db_settings, db_context.getCurrentQueryId()); + // Use the id from MPP/Coprocessor level as tracing_id + auto dm_context = newDMContext(db_context, db_settings, tracing_id); SegmentReadTasks tasks = getReadTasksByRanges(*dm_context, sorted_ranges, num_streams, read_segments); - LOG_FMT_DEBUG(log, "Read create segment snapshot done"); + auto tracing_logger = Logger::get(log->name(), dm_context->tracing_id); + LOG_FMT_DEBUG(tracing_logger, "Read create segment snapshot done"); auto after_segment_read = [&](const DMContextPtr & dm_context_, const SegmentPtr & segment_) { + // TODO: Update the tracing_id before checkSegmentUpdate? this->checkSegmentUpdate(dm_context_, segment_, ThreadType::Read); }; @@ -1113,7 +1122,7 @@ BlockInputStreams DeltaMergeStore::read(const Context & db_context, res.push_back(stream); } - LOG_FMT_DEBUG(log, "Read create stream done"); + LOG_FMT_DEBUG(tracing_logger, "Read create stream done"); return res; } @@ -1530,7 +1539,7 @@ namespace GC { // Returns true if it needs gc. // This is for optimization purpose, does not mean to be accurate. -bool shouldCompactStable(const SegmentPtr & seg, DB::Timestamp gc_safepoint, double ratio_threshold, Poco::Logger * log) +bool shouldCompactStable(const SegmentPtr & seg, DB::Timestamp gc_safepoint, double ratio_threshold, const LoggerPtr & log) { // Always GC. if (ratio_threshold < 1.0) @@ -1550,7 +1559,7 @@ bool shouldCompactStable(const SegmentPtr & seg, DB::Timestamp gc_safepoint, dou return false; } -bool shouldCompactDeltaWithStable(const DMContext & context, const SegmentSnapshotPtr & snap, const RowKeyRange & segment_range, double ratio_threshold, Poco::Logger * log) +bool shouldCompactDeltaWithStable(const DMContext & context, const SegmentSnapshotPtr & snap, const RowKeyRange & segment_range, double ratio_threshold, const LoggerPtr & log) { auto actual_delete_range = snap->delta->getSquashDeleteRange().shrink(segment_range); if (actual_delete_range.none()) @@ -1610,7 +1619,7 @@ UInt64 DeltaMergeStore::onSyncGc(Int64 limit) if (shutdown_called.load(std::memory_order_relaxed)) break; - auto dm_context = newDMContext(global_context, global_context.getSettingsRef()); + auto dm_context = newDMContext(global_context, global_context.getSettingsRef(), "onSyncGc"); SegmentPtr segment; SegmentSnapshotPtr segment_snap; { @@ -2501,8 +2510,9 @@ SegmentReadTasks DeltaMergeStore::getReadTasksByRanges( total_ranges += task->ranges.size(); } + auto tracing_logger = Logger::get(log->name(), dm_context.tracing_id); LOG_FMT_DEBUG( - log, + tracing_logger, "[sorted_ranges: {}] [tasks before split: {}] [tasks final: {}] [ranges final: {}]", sorted_ranges.size(), tasks.size(), diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 51e50a255dc..4f831ddfe0e 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -287,9 +287,9 @@ class DeltaMergeStore : private boost::noncopyable // first element of return value means whether task is added or not // second element of return value means whether task is heavy or not - std::pair tryAddTask(const BackgroundTask & task, const ThreadType & whom, size_t max_task_num, Poco::Logger * log_); + std::pair tryAddTask(const BackgroundTask & task, const ThreadType & whom, size_t max_task_num, const LoggerPtr & log_); - BackgroundTask nextTask(bool is_heavy, Poco::Logger * log_); + BackgroundTask nextTask(bool is_heavy, const LoggerPtr & log_); }; DeltaMergeStore(Context & db_context, // @@ -358,6 +358,7 @@ class DeltaMergeStore : private boost::noncopyable size_t num_streams, UInt64 max_version, const RSOperatorPtr & filter, + const String & tracing_id, size_t expected_block_size = DEFAULT_BLOCK_SIZE, const SegmentIdSet & read_segments = {}, size_t extra_table_id_index = InvalidColumnID); @@ -414,7 +415,7 @@ class DeltaMergeStore : private boost::noncopyable private: #endif - DMContextPtr newDMContext(const Context & db_context, const DB::Settings & db_settings, const String & query_id = ""); + DMContextPtr newDMContext(const Context & db_context, const DB::Settings & db_settings, const String & tracing_id = ""); static bool pkIsHandle(const ColumnDefine & handle_define) { return handle_define.id != EXTRA_HANDLE_COLUMN_ID; } @@ -496,7 +497,7 @@ class DeltaMergeStore : private boost::noncopyable UInt64 hash_salt; - Poco::Logger * log; + LoggerPtr log; }; // namespace DM using DeltaMergeStorePtr = std::shared_ptr; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index 749423b0bfa..c9212c4b81e 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -54,7 +54,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build(const DMFilePtr & read_packs, file_provider, read_limiter, - tracing_logger); + tracing_id); DMFileReader reader( dmfile, @@ -72,7 +72,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build(const DMFilePtr & read_limiter, rows_threshold_per_read, read_one_pack_every_time, - tracing_logger); + tracing_id); return std::make_shared(std::move(reader)); } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h index 9f166acd5e1..a36bf50a937 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h @@ -111,9 +111,9 @@ class DMFileBlockInputStreamBuilder return *this; } - DMFileBlockInputStreamBuilder & setTracingLogger(const DB::LoggerPtr & logger) + DMFileBlockInputStreamBuilder & setTracingID(const String & tracing_id_) { - tracing_logger = logger; + tracing_id = tracing_id_; return *this; } @@ -155,7 +155,7 @@ class DMFileBlockInputStreamBuilder size_t rows_threshold_per_read = DMFILE_READ_ROWS_THRESHOLD; bool read_one_pack_every_time = false; - DB::LoggerPtr tracing_logger; + String tracing_id; }; /** diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index ee59fb7bc1c..ea0c3265757 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -49,9 +50,9 @@ class DMFilePackFilter const IdSetPtr & read_packs, const FileProviderPtr & file_provider, const ReadLimiterPtr & read_limiter, - const DB::LoggerPtr & tracing_logger) + const String & tracing_id) { - auto pack_filter = DMFilePackFilter(dmfile, index_cache, set_cache_if_miss, rowkey_ranges, filter, read_packs, file_provider, read_limiter, tracing_logger); + auto pack_filter = DMFilePackFilter(dmfile, index_cache, set_cache_if_miss, rowkey_ranges, filter, read_packs, file_provider, read_limiter, tracing_id); pack_filter.init(); return pack_filter; } @@ -109,7 +110,7 @@ class DMFilePackFilter const IdSetPtr & read_packs_, // filter by pack index const FileProviderPtr & file_provider_, const ReadLimiterPtr & read_limiter_, - const DB::LoggerPtr & tracing_logger) + const String & tracing_id) : dmfile(dmfile_) , index_cache(index_cache_) , set_cache_if_miss(set_cache_if_miss_) @@ -119,7 +120,7 @@ class DMFilePackFilter , file_provider(file_provider_) , handle_res(dmfile->getPacks(), RSResult::All) , use_packs(dmfile->getPacks()) - , log(tracing_logger ? tracing_logger : DB::Logger::get("DMFilePackFilter")) + , log(Logger::get("DMFilePackFilter", tracing_id)) , read_limiter(read_limiter_) { } @@ -299,7 +300,7 @@ class DMFilePackFilter std::vector handle_res; std::vector use_packs; - DB::LoggerPtr log; + LoggerPtr log; ReadLimiterPtr read_limiter; }; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 2ffc75c938f..423d8d4d031 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -44,7 +44,7 @@ DMFileReader::Stream::Stream( const String & file_name_base, size_t aio_threshold, size_t max_read_buffer_size, - const DB::LoggerPtr & log, + const LoggerPtr & log, const ReadLimiterPtr & read_limiter) : single_file_mode(reader.single_file_mode) , avg_size_hint(reader.dmfile->getColumnStat(col_id).avg_size) @@ -223,7 +223,7 @@ DMFileReader::DMFileReader( const ReadLimiterPtr & read_limiter, size_t rows_threshold_per_read_, bool read_one_pack_every_time_, - const DB::LoggerPtr & tracing_logger) + const String & tracing_id_) : dmfile(dmfile_) , read_columns(read_columns_) , is_common_handle(is_common_handle_) @@ -238,7 +238,7 @@ DMFileReader::DMFileReader( , column_cache(column_cache_) , rows_threshold_per_read(rows_threshold_per_read_) , file_provider(file_provider_) - , log(tracing_logger ? tracing_logger : DB::Logger::get("DMFileReader")) + , log(Logger::get("DMFileReader", tracing_id_)) { for (const auto & cd : read_columns) { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h index e844df64cfd..9211918c2d0 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h @@ -44,7 +44,7 @@ class DMFileReader const String & file_name_base, size_t aio_threshold, size_t max_read_buffer_size, - const DB::LoggerPtr & log, + const LoggerPtr & log, const ReadLimiterPtr & read_limiter); const bool single_file_mode; @@ -90,7 +90,7 @@ class DMFileReader const ReadLimiterPtr & read_limiter, size_t rows_threshold_per_read_, bool read_one_pack_every_time_, - const DB::LoggerPtr & tracing_logger); + const String & tracing_id_); Block getHeader() const { return toEmptyBlock(read_columns); } @@ -142,7 +142,7 @@ class DMFileReader FileProviderPtr file_provider; - DB::LoggerPtr log; + LoggerPtr log; }; } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp index 3db5fb8610c..e90eab156a4 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp @@ -106,7 +106,7 @@ inline RSOperatorPtr parseTiCompareExpr( // const ColumnDefines & columns_to_read, const FilterParser::AttrCreatorByColumnID & creator, const TimezoneInfo & timezone_info, - Poco::Logger * /* log */) + const LoggerPtr & /*log*/) { if (unlikely(expr.children_size() != 2)) return createUnsupported(expr.ShortDebugString(), @@ -247,7 +247,7 @@ RSOperatorPtr parseTiExpr(const tipb::Expr & expr, const ColumnDefines & columns_to_read, const FilterParser::AttrCreatorByColumnID & creator, const TimezoneInfo & timezone_info, - Poco::Logger * log) + const LoggerPtr & log) { assert(isFunctionExpr(expr)); @@ -331,7 +331,7 @@ inline RSOperatorPtr tryParse(const tipb::Expr & filter, const ColumnDefines & columns_to_read, const FilterParser::AttrCreatorByColumnID & creator, const TimezoneInfo & timezone_info, - Poco::Logger * log) + const LoggerPtr & log) { if (isFunctionExpr(filter)) return cop::parseTiExpr(filter, columns_to_read, creator, timezone_info, log); @@ -345,7 +345,7 @@ inline RSOperatorPtr tryParse(const tipb::Expr & filter, RSOperatorPtr FilterParser::parseDAGQuery(const DAGQueryInfo & dag_info, const ColumnDefines & columns_to_read, FilterParser::AttrCreatorByColumnID && creator, - Poco::Logger * log) + const LoggerPtr & log) { RSOperatorPtr op = EMPTY_FILTER; if (dag_info.filters.empty()) diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h index e9428a3b0ba..79d11f82d4f 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.h @@ -47,7 +47,7 @@ class FilterParser const DAGQueryInfo & dag_info, const ColumnDefines & columns_to_read, AttrCreatorByColumnID && creator, - Poco::Logger * log); + const LoggerPtr & log); /// Some helper structure diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.cpp b/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.cpp index dc1fae350f9..a2d992e0e8b 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.cpp +++ b/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include +#include namespace DB { @@ -44,7 +46,7 @@ class MergeRangeHelper public: explicit MergeRangeHelper(RowKeyRanges && sorted_ranges_) - : sorted_ranges(std::move(sorted_ranges_)) // + : sorted_ranges(std::move(sorted_ranges_)) { genMergeStats(); } @@ -156,7 +158,7 @@ void sortRangesByStartEdge(RowKeyRanges & ranges) }); } -RowKeyRanges tryMergeRanges(RowKeyRanges && sorted_ranges, size_t expected_ranges_count, Poco::Logger * log) +RowKeyRanges tryMergeRanges(RowKeyRanges && sorted_ranges, size_t expected_ranges_count, const LoggerPtr & log) { if (sorted_ranges.size() <= 1) return std::move(sorted_ranges); @@ -170,7 +172,6 @@ RowKeyRanges tryMergeRanges(RowKeyRanges && sorted_ranges, size_t expected_range /// Try to make the number of merged_ranges result larger or equal to expected_ranges_count. do_merge_ranges.trySplit(expected_ranges_count); - if (log) LOG_FMT_TRACE(log, "[original ranges: {}] [expected ranges: {}] [after merged ranges: {}] [final ranges: {}]", ori_size, expected_ranges_count, after_merge_count, do_merge_ranges.currentRangesCount()); diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.h b/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.h index 8331c11993b..59b2c46d45b 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRangeUtils.h @@ -15,15 +15,17 @@ #pragma once #include -#include namespace DB { +class Logger; +using LoggerPtr = std::shared_ptr; + namespace DM { void sortRangesByStartEdge(RowKeyRanges & ranges); -RowKeyRanges tryMergeRanges(RowKeyRanges && ranges, size_t expected_ranges_count, Poco::Logger * log = nullptr); +RowKeyRanges tryMergeRanges(RowKeyRanges && ranges, size_t expected_ranges_count, const LoggerPtr & log = nullptr); } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index a38aa202954..ac192ff6082 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -382,7 +382,7 @@ BlockInputStreamPtr Segment::getInputStream(const DMContext & dm_context, BlockInputStreamPtr stream; if (dm_context.read_delta_only) { - throw Exception("Unsupported"); + throw Exception("Unsupported for read_delta_only"); } else if (dm_context.read_stable_only) { @@ -430,10 +430,10 @@ BlockInputStreamPtr Segment::getInputStream(const DMContext & dm_context, columns_to_read, max_version, is_common_handle, - dm_context.query_id); + dm_context.tracing_id); LOG_FMT_TRACE( - log, + Logger::get(log, dm_context.tracing_id), "Segment [{}] is read by max_version: {}, {} ranges: {}", segment_id, max_version, @@ -727,6 +727,7 @@ std::optional Segment::getSplitPointFast(DMContext & dm_context, co auto stream = builder .setColumnCache(stable_snap->getColumnCaches()[file_index]) .setReadPacks(read_pack) + .setTracingID(fmt::format("{}-getSplitPointFast", dm_context.tracing_id)) .build( read_file, /*read_columns=*/{getExtraHandleColumnDefine(is_common_handle)}, @@ -1347,7 +1348,8 @@ Segment::ReadInfo Segment::getReadInfo(const DMContext & dm_context, const RowKeyRanges & read_ranges, UInt64 max_version) const { - LOG_FMT_DEBUG(log, "Segment[{}] getReadInfo start", segment_id); + auto tracing_logger = Logger::get(log, dm_context.tracing_id); + LOG_FMT_DEBUG(tracing_logger, "Segment[{}] [epoch={}] getReadInfo start", segment_id, epoch); auto new_read_columns = arrangeReadColumns(getExtraHandleColumnDefine(is_common_handle), read_columns); auto pk_ver_col_defs @@ -1362,14 +1364,14 @@ Segment::ReadInfo Segment::getReadInfo(const DMContext & dm_context, // Hold compacted_index reference, to prevent it from deallocated. delta_reader->setDeltaIndex(compacted_index); - LOG_FMT_DEBUG(log, "Segment[{}] getReadInfo end", segment_id); + LOG_FMT_DEBUG(tracing_logger, "Segment[{}] [epoch={}] getReadInfo end", segment_id, epoch); if (fully_indexed) { // Try update shared index, if my_delta_index is more advanced. bool ok = segment_snap->delta->getSharedDeltaIndex()->updateIfAdvanced(*my_delta_index); if (ok) - LOG_FMT_DEBUG(log, "{} Updated delta index", simpleInfo()); + LOG_FMT_DEBUG(tracing_logger, "{} Updated delta index", simpleInfo()); } // Refresh the reference in DeltaIndexManager, so that the index can be properly managed. diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h index fa0a1b93b22..dc14716af80 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h @@ -60,7 +60,7 @@ struct SegmentReadTask class SegmentReadTaskPool : private boost::noncopyable { public: - SegmentReadTaskPool(SegmentReadTasks && tasks_) + explicit SegmentReadTaskPool(SegmentReadTasks && tasks_) : tasks(std::move(tasks_)) {} diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 01cfdccfc38..648ffd4f084 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -60,7 +60,7 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang {}, dm_context->db_context.getFileProvider(), dm_context->getReadLimiter(), - /*tracing_logger*/ nullptr); + dm_context->tracing_id); auto [file_valid_rows, file_valid_bytes] = pack_filter.validRowsAndBytes(); rows += file_valid_rows; bytes += file_valid_bytes; @@ -130,7 +130,7 @@ size_t StableValueSpace::getBytes() const size_t StableValueSpace::getBytesOnDisk() const { - // If this stable value space is logical splited, some file may not used, + // If this stable value space is logical splitted, some file may not used, // and this will return more bytes than actual used. size_t bytes = 0; for (const auto & file : files) @@ -184,7 +184,7 @@ void StableValueSpace::calculateStableProperty(const DMContext & context, const const auto & pack_properties = file->getPackProperties(); if (pack_stats.empty()) continue; - // if PackPropertys of this DMFile is empty, this must be an old format file generated by previous version. + // if PackProperties of this DMFile is empty, this must be an old format file generated by previous version. // so we need to create file property for this file. // but to keep dmfile immutable, we just cache the result in memory. // @@ -209,6 +209,7 @@ void StableValueSpace::calculateStableProperty(const DMContext & context, const BlockInputStreamPtr data_stream = builder .setRowsThreshold(std::numeric_limits::max()) // because we just read one pack at a time .onlyReadOnePackEveryTime() + .setTracingID(fmt::format("{}-calculateStableProperty", context.tracing_id)) .build(file, read_columns, RowKeyRanges{rowkey_range}); auto mvcc_stream = std::make_shared>( data_stream, @@ -243,7 +244,7 @@ void StableValueSpace::calculateStableProperty(const DMContext & context, const {}, context.db_context.getFileProvider(), context.getReadLimiter(), - /*tracing_logger*/ nullptr); + context.tracing_id); const auto & use_packs = pack_filter.getUsePacks(); size_t new_pack_properties_index = 0; bool use_new_pack_properties = pack_properties.property_size() == 0; @@ -257,9 +258,9 @@ void StableValueSpace::calculateStableProperty(const DMContext & context, const } if (unlikely((size_t)new_pack_properties.property_size() != use_packs_count)) { - throw Exception("new_pack_propertys size " + std::to_string(new_pack_properties.property_size()) - + " doesn't match use packs size " + std::to_string(use_packs_count), - ErrorCodes::LOGICAL_ERROR); + throw Exception( + fmt::format("size doesn't match [new_pack_properties_size={}] [use_packs_size={}]", new_pack_properties.property_size(), use_packs_count), + ErrorCodes::LOGICAL_ERROR); } } for (size_t pack_id = 0; pack_id < use_packs.size(); pack_id++) @@ -319,13 +320,15 @@ void StableValueSpace::drop(const FileProviderPtr & file_provider) } } -SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream(const DMContext & context, // - const ColumnDefines & read_columns, - const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, - UInt64 max_data_version, - size_t expected_block_size, - bool enable_clean_read) +SkippableBlockInputStreamPtr +StableValueSpace::Snapshot::getInputStream( + const DMContext & context, + const ColumnDefines & read_columns, + const RowKeyRanges & rowkey_ranges, + const RSOperatorPtr & filter, + UInt64 max_data_version, + size_t expected_block_size, + bool enable_clean_read) { LOG_FMT_DEBUG(log, "max_data_version: {}, enable_clean_read: {}", max_data_version, enable_clean_read); SkippableBlockInputStreams streams; @@ -337,6 +340,7 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream(const DM .enableCleanRead(enable_clean_read, max_data_version) .setRSOperator(filter) .setColumnCache(column_caches[i]) + .setTracingID(context.tracing_id) .setRowsThreshold(expected_block_size); streams.push_back(builder.build(stable->files[i], read_columns, rowkey_ranges)); } @@ -366,7 +370,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & IdSetPtr{}, context.db_context.getFileProvider(), context.getReadLimiter(), - /*tracing_logger*/ nullptr); + context.tracing_id); const auto & pack_stats = f->getPackStats(); const auto & use_packs = filter.getUsePacks(); for (size_t i = 0; i < pack_stats.size(); ++i) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 9ebe1cc7bb7..bb0e47bddbd 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include #include #include #include @@ -206,6 +208,8 @@ class DeltaMergeStoreRWTest protected: TestMode mode; DeltaMergeStorePtr store; + + constexpr static const char * TRACING_NAME = "DeltaMergeStoreRWTest"; }; TEST_F(DeltaMergeStoreTest, Create) @@ -414,6 +418,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; @@ -519,6 +524,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -555,6 +561,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -639,6 +646,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -723,6 +731,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -753,6 +762,7 @@ try /* num_streams= */ 1, /* max_version= */ UInt64(1), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -807,6 +817,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -845,6 +856,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; // block_num represents index of current segment @@ -903,6 +915,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -925,6 +938,7 @@ try /* num_streams= */ 1, /* max_version= */ tso2, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -947,6 +961,7 @@ try /* num_streams= */ 1, /* max_version= */ tso1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -969,6 +984,7 @@ try /* num_streams= */ 1, /* max_version= */ tso1 - 1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -1029,6 +1045,7 @@ try /* num_streams= */ 1, /* max_version= */ tso1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -1065,6 +1082,7 @@ try /* num_streams= */ 1, /* max_version= */ tso2 - 1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -1102,6 +1120,7 @@ try /* num_streams= */ 1, /* max_version= */ tso3 - 1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -1126,6 +1145,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -1150,6 +1170,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -1210,6 +1231,7 @@ try /* num_streams= */ 1, /* max_version= */ tso1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; @@ -1246,6 +1268,7 @@ try /* num_streams= */ 1, /* max_version= */ tso2 - 1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; @@ -1283,6 +1306,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; @@ -1338,6 +1362,7 @@ try /* num_streams= */ 1, /* max_version= */ tso1, EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; @@ -1374,6 +1399,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; @@ -1459,6 +1485,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -1566,6 +1593,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr & in = ins[0]; @@ -1672,6 +1700,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr & in = ins[0]; @@ -1759,6 +1788,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr & in = ins[0]; @@ -1861,6 +1891,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -1936,6 +1967,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -2011,6 +2043,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -2086,6 +2119,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -2161,6 +2195,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -2234,6 +2269,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; @@ -2306,6 +2342,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -2395,6 +2432,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr & in = ins[0]; @@ -2528,6 +2566,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr & in = ins[0]; @@ -2588,6 +2627,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr & in = ins[0]; @@ -2686,6 +2726,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -2738,6 +2779,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; in->readPrefix(); @@ -2853,6 +2895,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; @@ -2981,6 +3024,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -3054,6 +3098,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -3084,6 +3129,7 @@ try /* num_streams= */ 1, /* max_version= */ UInt64(1), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -3141,6 +3187,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -3190,6 +3237,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024)[0]; size_t num_rows_read = 0; while (Block block = in->read()) @@ -3262,6 +3310,7 @@ try /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, + TRACING_NAME, /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; @@ -3305,10 +3354,11 @@ try } CATCH -INSTANTIATE_TEST_CASE_P(TestMode, // - DeltaMergeStoreRWTest, - testing::Values(TestMode::V1_BlockOnly, TestMode::V2_BlockOnly, TestMode::V2_FileOnly, TestMode::V2_Mix), - testModeToString); +INSTANTIATE_TEST_CASE_P( + TestMode, + DeltaMergeStoreRWTest, + testing::Values(TestMode::V1_BlockOnly, TestMode::V2_BlockOnly, TestMode::V2_FileOnly, TestMode::V2_Mix), + testModeToString); } // namespace tests } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index bbb8aaf8206..31fd99faf01 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -124,7 +124,7 @@ bool checkMatch( store->mergeDeltaAll(context); const ColumnDefine & col_to_read = check_pk ? getExtraHandleColumnDefine(is_common_handle) : cd; - auto streams = store->read(context, context.getSettingsRef(), {col_to_read}, {all_range}, 1, std::numeric_limits::max(), filter); + auto streams = store->read(context, context.getSettingsRef(), {col_to_read}, {all_range}, 1, std::numeric_limits::max(), filter, name); streams[0]->readPrefix(); auto rows = streams[0]->read().rows(); streams[0]->readSuffix(); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp index 5f99cbcd33f..e5c7fd30f40 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp @@ -191,7 +191,7 @@ void DTWorkload::read(const ColumnDefines & columns, int stream_count, T func) auto filter = EMPTY_FILTER; int excepted_block_size = 1024; uint64_t read_ts = ts_gen->get(); - auto streams = store->read(*context, context->getSettingsRef(), columns, ranges, stream_count, read_ts, filter, excepted_block_size); + auto streams = store->read(*context, context->getSettingsRef(), columns, ranges, stream_count, read_ts, filter, "DTWorkload", excepted_block_size); std::vector threads; threads.reserve(streams.size()); for (auto & stream : streams) diff --git a/dbms/src/Storages/SelectQueryInfo.cpp b/dbms/src/Storages/SelectQueryInfo.cpp index 631f707e21d..075c0ad0631 100644 --- a/dbms/src/Storages/SelectQueryInfo.cpp +++ b/dbms/src/Storages/SelectQueryInfo.cpp @@ -12,28 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include #include +#include namespace DB { SelectQueryInfo::SelectQueryInfo() = default; -SelectQueryInfo::SelectQueryInfo(const SelectQueryInfo & query_info_) - : query(query_info_.query), - sets(query_info_.sets), - mvcc_query_info(query_info_.mvcc_query_info != nullptr ? std::make_unique(*query_info_.mvcc_query_info) : nullptr), - dag_query(query_info_.dag_query != nullptr ? std::make_unique(*query_info_.dag_query) : nullptr) -{} +SelectQueryInfo::~SelectQueryInfo() = default; -SelectQueryInfo::SelectQueryInfo(SelectQueryInfo && query_info_) - : query(query_info_.query), sets(query_info_.sets), mvcc_query_info(std::move(query_info_.mvcc_query_info)), - dag_query(std::move(query_info_.dag_query)) +SelectQueryInfo::SelectQueryInfo(const SelectQueryInfo & rhs) + : query(rhs.query) + , sets(rhs.sets) + , mvcc_query_info(rhs.mvcc_query_info != nullptr ? std::make_unique(*rhs.mvcc_query_info) : nullptr) + , dag_query(rhs.dag_query != nullptr ? std::make_unique(*rhs.dag_query) : nullptr) + , req_id(rhs.req_id) {} -SelectQueryInfo::~SelectQueryInfo() = default; +SelectQueryInfo::SelectQueryInfo(SelectQueryInfo && rhs) noexcept + : query(std::move(rhs.query)) + , sets(std::move(rhs.sets)) + , mvcc_query_info(std::move(rhs.mvcc_query_info)) + , dag_query(std::move(rhs.dag_query)) + , req_id(std::move(rhs.req_id)) +{} } // namespace DB diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h index bed607ce0fe..d49d4c831d1 100644 --- a/dbms/src/Storages/SelectQueryInfo.h +++ b/dbms/src/Storages/SelectQueryInfo.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include namespace DB @@ -32,6 +33,7 @@ using PreparedSets = std::unordered_map; struct MvccQueryInfo; struct DAGQueryInfo; + /** Query along with some additional data, * that can be used during query processing * inside storage engines. @@ -48,14 +50,16 @@ struct SelectQueryInfo std::unique_ptr dag_query; - SelectQueryInfo(); - - SelectQueryInfo(const SelectQueryInfo & query_info_); + std::string req_id; - SelectQueryInfo(SelectQueryInfo && query_info_); + SelectQueryInfo(); ~SelectQueryInfo(); + // support copying and moving + SelectQueryInfo(const SelectQueryInfo & rhs); + SelectQueryInfo(SelectQueryInfo && rhs) noexcept; + bool fromAST() const { return dag_query == nullptr; }; }; diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 224500cd69d..1f496ade671 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -80,7 +81,7 @@ StorageDeltaMerge::StorageDeltaMerge( , store_inited(false) , max_column_id_used(0) , global_context(global_context_.getGlobalContext()) - , log(&Poco::Logger::get("StorageDeltaMerge")) + , log(Logger::get("StorageDeltaMerge", fmt::format("{}.{}", db_name_, table_name_))) { if (primary_expr_ast_->children.empty()) throw Exception("No primary key"); @@ -626,8 +627,9 @@ BlockInputStreams StorageDeltaMerge::read( throw Exception("TMTContext is not initialized", ErrorCodes::LOGICAL_ERROR); const auto & mvcc_query_info = *query_info.mvcc_query_info; + auto tracing_logger = Logger::get("StorageDeltaMerge", log->identifier(), query_info.req_id); - LOG_FMT_DEBUG(log, "Read with tso: {}", mvcc_query_info.read_tso); + LOG_FMT_DEBUG(tracing_logger, "Read with tso: {}", mvcc_query_info.read_tso); // Check whether tso is smaller than TiDB GcSafePoint const auto check_read_tso = [&tmt, &context, this](UInt64 read_tso) { @@ -652,7 +654,7 @@ BlockInputStreams StorageDeltaMerge::read( check_read_tso(mvcc_query_info.read_tso); FmtBuffer fmt_buf; - if (unlikely(log->trace())) + if (unlikely(tracing_logger->trace())) { fmt_buf.append("orig, "); fmt_buf.joinStr( @@ -685,9 +687,9 @@ BlockInputStreams StorageDeltaMerge::read( is_common_handle, rowkey_column_size, /*expected_ranges_count*/ num_streams, - log); + tracing_logger); - if (unlikely(log->trace())) + if (unlikely(tracing_logger->trace())) { fmt_buf.append(" merged, "); fmt_buf.joinStr( @@ -697,7 +699,7 @@ BlockInputStreams StorageDeltaMerge::read( fb.append(range.toDebugString()); }, ","); - LOG_FMT_TRACE(log, "reading ranges: {}", fmt_buf.toString()); + LOG_FMT_TRACE(tracing_logger, "reading ranges: {}", fmt_buf.toString()); } /// Get Rough set filter from query @@ -722,10 +724,10 @@ BlockInputStreams StorageDeltaMerge::read( rs_operator = FilterParser::parseDAGQuery(*query_info.dag_query, columns_to_read, std::move(create_attr_by_column_id), log); } if (likely(rs_operator != DM::EMPTY_FILTER)) - LOG_FMT_DEBUG(log, "Rough set filter: {}", rs_operator->toDebugString()); + LOG_FMT_DEBUG(tracing_logger, "Rough set filter: {}", rs_operator->toDebugString()); } else - LOG_FMT_DEBUG(log, "Rough set filter is disabled."); + LOG_FMT_DEBUG(tracing_logger, "Rough set filter is disabled."); auto streams = store->read( context, @@ -735,6 +737,7 @@ BlockInputStreams StorageDeltaMerge::read( num_streams, /*max_version=*/mvcc_query_info.read_tso, rs_operator, + query_info.req_id, max_block_size, parseSegmentSet(select_query.segment_expression_list), extra_table_id_index); @@ -742,7 +745,7 @@ BlockInputStreams StorageDeltaMerge::read( /// Ensure read_tso info after read. check_read_tso(mvcc_query_info.read_tso); - LOG_FMT_TRACE(log, "[ranges: {}] [streams: {}]", ranges.size(), streams.size()); + LOG_FMT_TRACE(tracing_logger, "[ranges: {}] [streams: {}]", ranges.size(), streams.size()); return streams; } @@ -797,6 +800,7 @@ UInt64 StorageDeltaMerge::onSyncGc(Int64 limit) return 0; } +// just for testing size_t getRows(DM::DeltaMergeStorePtr & store, const Context & context, const DM::RowKeyRange & range) { size_t rows = 0; @@ -809,7 +813,8 @@ size_t getRows(DM::DeltaMergeStorePtr & store, const Context & context, const DM {range}, 1, std::numeric_limits::max(), - EMPTY_FILTER)[0]; + EMPTY_FILTER, + /*tracing_id*/ "getRows")[0]; stream->readPrefix(); Block block; while ((block = stream->read())) @@ -819,6 +824,7 @@ size_t getRows(DM::DeltaMergeStorePtr & store, const Context & context, const DM return rows; } +// just for testing DM::RowKeyRange getRange(DM::DeltaMergeStorePtr & store, const Context & context, size_t total_rows, size_t delete_rows) { auto start_index = rand() % (total_rows - delete_rows + 1); // NOLINT(cert-msc50-cpp) @@ -832,7 +838,8 @@ DM::RowKeyRange getRange(DM::DeltaMergeStorePtr & store, const Context & context {DM::RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, 1, std::numeric_limits::max(), - EMPTY_FILTER)[0]; + EMPTY_FILTER, + /*tracing_id*/ "getRange")[0]; stream->readPrefix(); Block block; size_t index = 0; diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index f5513063d3e..84ae387ecee 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -235,7 +235,7 @@ class StorageDeltaMerge Context & global_context; - Poco::Logger * log; + LoggerPtr log; }; diff --git a/dbms/src/Storages/StorageDeltaMergeHelpers.h b/dbms/src/Storages/StorageDeltaMergeHelpers.h index 96b3424b836..18337f879de 100644 --- a/dbms/src/Storages/StorageDeltaMergeHelpers.h +++ b/dbms/src/Storages/StorageDeltaMergeHelpers.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -30,7 +31,13 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -inline DM::RowKeyRanges getQueryRanges(const DB::MvccQueryInfo::RegionsQueryInfo & regions, TableID table_id, bool is_common_handle, size_t rowkey_column_size, size_t expected_ranges_count = 1, Poco::Logger * log = nullptr) +inline DM::RowKeyRanges getQueryRanges( + const DB::MvccQueryInfo::RegionsQueryInfo & regions, + TableID table_id, + bool is_common_handle, + size_t rowkey_column_size, + size_t expected_ranges_count = 1, + const LoggerPtr & log = nullptr) { // todo check table id in DecodedTiKVKey??? DM::RowKeyRanges ranges; diff --git a/dbms/src/Storages/System/StorageSystemDTTables.cpp b/dbms/src/Storages/System/StorageSystemDTTables.cpp index 9523aff1f44..b3f9cf5b29e 100644 --- a/dbms/src/Storages/System/StorageSystemDTTables.cpp +++ b/dbms/src/Storages/System/StorageSystemDTTables.cpp @@ -28,7 +28,8 @@ namespace DB { -StorageSystemDTTables::StorageSystemDTTables(const std::string & name_) : name(name_) +StorageSystemDTTables::StorageSystemDTTables(const std::string & name_) + : name(name_) { setColumns(ColumnsDescription({ {"database", std::make_shared()}, @@ -85,6 +86,7 @@ StorageSystemDTTables::StorageSystemDTTables(const std::string & name_) : name(n {"storage_stable_num_snapshots", std::make_shared()}, {"storage_stable_oldest_snapshot_lifetime", std::make_shared()}, {"storage_stable_oldest_snapshot_thread_id", std::make_shared()}, + {"storage_stable_oldest_snapshot_tracing_id", std::make_shared()}, {"storage_stable_num_pages", std::make_shared()}, {"storage_stable_num_normal_pages", std::make_shared()}, {"storage_stable_max_page_id", std::make_shared()}, @@ -92,6 +94,7 @@ StorageSystemDTTables::StorageSystemDTTables(const std::string & name_) : name(n {"storage_delta_num_snapshots", std::make_shared()}, {"storage_delta_oldest_snapshot_lifetime", std::make_shared()}, {"storage_delta_oldest_snapshot_thread_id", std::make_shared()}, + {"storage_delta_oldest_snapshot_tracing_id", std::make_shared()}, {"storage_delta_num_pages", std::make_shared()}, {"storage_delta_num_normal_pages", std::make_shared()}, {"storage_delta_max_page_id", std::make_shared()}, @@ -99,6 +102,7 @@ StorageSystemDTTables::StorageSystemDTTables(const std::string & name_) : name(n {"storage_meta_num_snapshots", std::make_shared()}, {"storage_meta_oldest_snapshot_lifetime", std::make_shared()}, {"storage_meta_oldest_snapshot_thread_id", std::make_shared()}, + {"storage_meta_oldest_snapshot_tracing_id", std::make_shared()}, {"storage_meta_num_pages", std::make_shared()}, {"storage_meta_num_normal_pages", std::make_shared()}, {"storage_meta_max_page_id", std::make_shared()}, @@ -108,7 +112,8 @@ StorageSystemDTTables::StorageSystemDTTables(const std::string & name_) : name(n } -BlockInputStreams StorageSystemDTTables::read(const Names & column_names, +BlockInputStreams StorageSystemDTTables::read( + const Names & column_names, const SelectQueryInfo &, const Context & context, QueryProcessingStage::Enum & processed_stage, @@ -126,19 +131,19 @@ BlockInputStreams StorageSystemDTTables::read(const Names & column_names, for (const auto & d : databases) { String database_name = d.first; - auto & database = d.second; + const auto & database = d.second; const DatabaseTiFlash * db_tiflash = typeid_cast(database.get()); auto it = database->getIterator(context); for (; it->isValid(); it->next()) { - auto & table_name = it->name(); + const auto & table_name = it->name(); auto & storage = it->table(); if (storage->getName() != MutableSupport::delta_tree_storage_name) continue; auto dm_storage = std::dynamic_pointer_cast(storage); - auto & table_info = dm_storage->getTableInfo(); + const auto & table_info = dm_storage->getTableInfo(); auto table_id = table_info.id; auto stat = dm_storage->getStore()->getStat(); @@ -201,6 +206,7 @@ BlockInputStreams StorageSystemDTTables::read(const Names & column_names, res_columns[j++]->insert(stat.storage_stable_num_snapshots); res_columns[j++]->insert(stat.storage_stable_oldest_snapshot_lifetime); res_columns[j++]->insert(stat.storage_stable_oldest_snapshot_thread_id); + res_columns[j++]->insert(stat.storage_stable_oldest_snapshot_tracing_id); res_columns[j++]->insert(stat.storage_stable_num_pages); res_columns[j++]->insert(stat.storage_stable_num_normal_pages); res_columns[j++]->insert(stat.storage_stable_max_page_id); @@ -208,6 +214,7 @@ BlockInputStreams StorageSystemDTTables::read(const Names & column_names, res_columns[j++]->insert(stat.storage_delta_num_snapshots); res_columns[j++]->insert(stat.storage_delta_oldest_snapshot_lifetime); res_columns[j++]->insert(stat.storage_delta_oldest_snapshot_thread_id); + res_columns[j++]->insert(stat.storage_delta_oldest_snapshot_tracing_id); res_columns[j++]->insert(stat.storage_delta_num_pages); res_columns[j++]->insert(stat.storage_delta_num_normal_pages); res_columns[j++]->insert(stat.storage_delta_max_page_id); @@ -215,6 +222,7 @@ BlockInputStreams StorageSystemDTTables::read(const Names & column_names, res_columns[j++]->insert(stat.storage_meta_num_snapshots); res_columns[j++]->insert(stat.storage_meta_oldest_snapshot_lifetime); res_columns[j++]->insert(stat.storage_meta_oldest_snapshot_thread_id); + res_columns[j++]->insert(stat.storage_meta_oldest_snapshot_tracing_id); res_columns[j++]->insert(stat.storage_meta_num_pages); res_columns[j++]->insert(stat.storage_meta_num_normal_pages); res_columns[j++]->insert(stat.storage_meta_max_page_id); diff --git a/dbms/src/Storages/tests/gtest_filter_parser.cpp b/dbms/src/Storages/tests/gtest_filter_parser.cpp index 52f9efe0f1e..a027ea71cfc 100644 --- a/dbms/src/Storages/tests/gtest_filter_parser.cpp +++ b/dbms/src/Storages/tests/gtest_filter_parser.cpp @@ -54,14 +54,14 @@ class FilterParserTest : public ::testing::Test } FilterParserTest() - : log(&Poco::Logger::get("FilterParserTest")) + : log(Logger::get("FilterParserTest")) , ctx(TiFlashTestEnv::getContext()) { default_timezone_info = ctx.getTimezoneInfo(); } protected: - Poco::Logger * log; + LoggerPtr log; Context ctx; static TimezoneInfo default_timezone_info; DM::RSOperatorPtr generateRsOperator(String table_info_json, const String & query, TimezoneInfo & timezone_info); @@ -98,7 +98,7 @@ DM::RSOperatorPtr FilterParserTest::generateRsOperator(const String table_info_j DM::ColumnDefines columns_to_read; { NamesAndTypes source_columns; - std::tie(source_columns, std::ignore) = parseColumnsFromTableInfo(table_info, log); + std::tie(source_columns, std::ignore) = parseColumnsFromTableInfo(table_info, log->getLog()); dag_query = std::make_unique( conditions, DAGPreparedSets(), From dae5d3179d3c191827006bda226e331bad280cb5 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Mon, 11 Apr 2022 12:02:34 +0800 Subject: [PATCH 09/79] Inhibit warning messages in tiflash-proxy (#4613) close pingcap/tiflash#4616 --- contrib/tiflash-proxy-cmake/CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt index 5021782dfad..e243ecba37c 100644 --- a/contrib/tiflash-proxy-cmake/CMakeLists.txt +++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt @@ -3,6 +3,9 @@ set(_TIFLASH_PROXY_LIBRARY "${_TIFLASH_PROXY_SOURCE_DIR}/target/release/${CMAKE_ file(GLOB_RECURSE _TIFLASH_PROXY_SRCS "${_TIFLASH_PROXY_SOURCE_DIR}/*.rs") list(FILTER _TIFLASH_PROXY_SRCS EXCLUDE REGEX ${_TIFLASH_PROXY_SOURCE_DIR}/target/.*) +# use `CFLAGS=-w CXXFLAGS=-w` to inhibit warning messages. +set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} CFLAGS=-w CXXFLAGS=-w) + if(TIFLASH_LLVM_TOOLCHAIN AND USE_LIBCXX) set(TIFLASH_RUST_LINKER ${CMAKE_CURRENT_BINARY_DIR}/tiflash-linker) set(TIFLASH_RUST_LINKER_TMP ${CMAKE_CURRENT_BINARY_DIR}/tmp/tiflash-linker) @@ -18,10 +21,12 @@ if(TIFLASH_LLVM_TOOLCHAIN AND USE_LIBCXX) if(LINKER_NAME) set(TIFLASH_RUSTFLAGS "-C link-arg=-fuse-ld=${LINKER_NAME} ${TIFLASH_RUSTFLAGS}") endif() - set(TIFLASH_RUST_ENV CXXSTDLIB=c++ CMAKE=${CMAKE_COMMAND} CFLAGS=-w CXXFLAGS=-w RUSTFLAGS=${TIFLASH_RUSTFLAGS} PROTOC=${Protobuf_PROTOC_EXECUTABLE} PROTOC_INCLUDE=${Protobuf_INCLUDE_DIR}) - message(STATUS "enforce LLVM toolchain for rust: ${TIFLASH_RUST_ENV}") + set(TIFLASH_RUST_ENV CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXSTDLIB=c++ ${TIFLASH_RUST_ENV} RUSTFLAGS=${TIFLASH_RUSTFLAGS} PROTOC=${Protobuf_PROTOC_EXECUTABLE} PROTOC_INCLUDE=${Protobuf_INCLUDE_DIR}) + message(STATUS "Enforce LLVM toolchain for rust") endif() +message(STATUS "Using rust env for tiflash-proxy: ${TIFLASH_RUST_ENV}") + add_custom_command(OUTPUT ${_TIFLASH_PROXY_LIBRARY} COMMENT "Building tiflash proxy" # `ENGINE_LABEL_VALUE` is used in proxy for copying `libraftstore_proxy.xx` to `lib${ENGINE_LABEL_VALUE}_proxy.xx` From 9b8c999f1f6055afc3d7775df143dd56534399fb Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 12 Apr 2022 12:46:35 +0800 Subject: [PATCH 10/79] Split AstToExecutor from dbgFuncCoprocessor. (#4610) ref pingcap/tiflash#4609 --- dbms/src/Debug/DAGProperties.h | 34 + dbms/src/Debug/astToExecutor.cpp | 1531 ++++++++++++++++++++++ dbms/src/Debug/astToExecutor.h | 287 +++++ dbms/src/Debug/dbgFuncCoprocessor.cpp | 1675 +------------------------ dbms/src/Debug/dbgFuncCoprocessor.h | 14 +- 5 files changed, 1859 insertions(+), 1682 deletions(-) create mode 100644 dbms/src/Debug/DAGProperties.h create mode 100644 dbms/src/Debug/astToExecutor.cpp create mode 100644 dbms/src/Debug/astToExecutor.h diff --git a/dbms/src/Debug/DAGProperties.h b/dbms/src/Debug/DAGProperties.h new file mode 100644 index 00000000000..bcb4170c9ac --- /dev/null +++ b/dbms/src/Debug/DAGProperties.h @@ -0,0 +1,34 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB +{ +struct DAGProperties +{ + String encode_type; + Int64 tz_offset = 0; + String tz_name; + Int32 collator = 0; + bool is_mpp_query = false; + bool use_broadcast_join = false; + Int32 mpp_partition_num = 1; + Timestamp start_ts = DEFAULT_MAX_READ_TSO; + Int32 mpp_timeout = 10; +}; +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp new file mode 100644 index 00000000000..5f7567f0eff --- /dev/null +++ b/dbms/src/Debug/astToExecutor.cpp @@ -0,0 +1,1531 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ +namespace +{ +std::unordered_map func_name_to_sig({ + {"equals", tipb::ScalarFuncSig::EQInt}, + {"notEquals", tipb::ScalarFuncSig::NEInt}, + {"and", tipb::ScalarFuncSig::LogicalAnd}, + {"or", tipb::ScalarFuncSig::LogicalOr}, + {"xor", tipb::ScalarFuncSig::LogicalXor}, + {"not", tipb::ScalarFuncSig::UnaryNotInt}, + {"greater", tipb::ScalarFuncSig::GTInt}, + {"greaterorequals", tipb::ScalarFuncSig::GEInt}, + {"less", tipb::ScalarFuncSig::LTInt}, + {"lessorequals", tipb::ScalarFuncSig::LEInt}, + {"in", tipb::ScalarFuncSig::InInt}, + {"notin", tipb::ScalarFuncSig::InInt}, + {"date_format", tipb::ScalarFuncSig::DateFormatSig}, + {"if", tipb::ScalarFuncSig::IfInt}, + {"from_unixtime", tipb::ScalarFuncSig::FromUnixTime2Arg}, + /// bit_and/bit_or/bit_xor is aggregated function in clickhouse/mysql + {"bitand", tipb::ScalarFuncSig::BitAndSig}, + {"bitor", tipb::ScalarFuncSig::BitOrSig}, + {"bitxor", tipb::ScalarFuncSig::BitXorSig}, + {"bitnot", tipb::ScalarFuncSig::BitNegSig}, + {"notequals", tipb::ScalarFuncSig::NEInt}, + {"like", tipb::ScalarFuncSig::LikeSig}, + {"cast_int_int", tipb::ScalarFuncSig::CastIntAsInt}, + {"cast_int_real", tipb::ScalarFuncSig::CastIntAsReal}, + {"cast_real_int", tipb::ScalarFuncSig::CastRealAsInt}, + {"cast_real_real", tipb::ScalarFuncSig::CastRealAsReal}, + {"cast_decimal_int", tipb::ScalarFuncSig::CastDecimalAsInt}, + {"cast_time_int", tipb::ScalarFuncSig::CastTimeAsInt}, + {"cast_string_int", tipb::ScalarFuncSig::CastStringAsInt}, + {"cast_int_decimal", tipb::ScalarFuncSig::CastIntAsDecimal}, + {"cast_real_decimal", tipb::ScalarFuncSig::CastRealAsDecimal}, + {"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal}, + {"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal}, + {"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal}, + {"cast_int_string", tipb::ScalarFuncSig::CastIntAsString}, + {"cast_real_string", tipb::ScalarFuncSig::CastRealAsString}, + {"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString}, + {"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString}, + {"cast_string_string", tipb::ScalarFuncSig::CastStringAsString}, + {"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime}, + {"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, + {"round_int", tipb::ScalarFuncSig::RoundInt}, + {"round_uint", tipb::ScalarFuncSig::RoundInt}, + {"round_dec", tipb::ScalarFuncSig::RoundDec}, + {"round_real", tipb::ScalarFuncSig::RoundReal}, + {"round_with_frac_int", tipb::ScalarFuncSig::RoundWithFracInt}, + {"round_with_frac_uint", tipb::ScalarFuncSig::RoundWithFracInt}, + {"round_with_frac_dec", tipb::ScalarFuncSig::RoundWithFracDec}, + {"round_with_frac_real", tipb::ScalarFuncSig::RoundWithFracReal}, +}); + +std::unordered_map agg_func_name_to_sig({ + {"min", tipb::ExprType::Min}, + {"max", tipb::ExprType::Max}, + {"count", tipb::ExprType::Count}, + {"sum", tipb::ExprType::Sum}, + {"first_row", tipb::ExprType::First}, + {"uniqRawRes", tipb::ExprType::ApproxCountDistinct}, + {"group_concat", tipb::ExprType::GroupConcat}, +}); + +DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input) +{ + DAGColumnInfo output = input; + output.second.clearNotNullFlag(); + return output; +} + +void literalToPB(tipb::Expr * expr, const Field & value, uint32_t collator_id) +{ + WriteBufferFromOwnString ss; + switch (value.getType()) + { + case Field::Types::Which::Null: + { + expr->set_tp(tipb::Null); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeNull); + ft->set_collate(collator_id); + // Null literal expr doesn't need value. + break; + } + case Field::Types::Which::UInt64: + { + expr->set_tp(tipb::Uint64); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); + ft->set_collate(collator_id); + encodeDAGUInt64(value.get(), ss); + break; + } + case Field::Types::Which::Int64: + { + expr->set_tp(tipb::Int64); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagNotNull); + ft->set_collate(collator_id); + encodeDAGInt64(value.get(), ss); + break; + } + case Field::Types::Which::Float64: + { + expr->set_tp(tipb::Float64); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeFloat); + ft->set_flag(TiDB::ColumnFlagNotNull); + ft->set_collate(collator_id); + encodeDAGFloat64(value.get(), ss); + break; + } + case Field::Types::Which::Decimal32: + case Field::Types::Which::Decimal64: + case Field::Types::Which::Decimal128: + case Field::Types::Which::Decimal256: + { + expr->set_tp(tipb::MysqlDecimal); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeNewDecimal); + ft->set_flag(TiDB::ColumnFlagNotNull); + ft->set_collate(collator_id); + encodeDAGDecimal(value, ss); + break; + } + case Field::Types::Which::String: + { + expr->set_tp(tipb::String); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + ft->set_flag(TiDB::ColumnFlagNotNull); + ft->set_collate(collator_id); + // TODO: Align with TiDB. + encodeDAGBytes(value.get(), ss); + break; + } + default: + throw Exception(String("Unsupported literal type: ") + value.getTypeName(), ErrorCodes::LOGICAL_ERROR); + } + expr->set_val(ss.releaseStr()); +} + +String getFunctionNameForConstantFolding(tipb::Expr * expr) +{ + // todo support more function for constant folding + switch (expr->sig()) + { + case tipb::ScalarFuncSig::CastStringAsTime: + return "toMyDateTimeOrNull"; + default: + return ""; + } +} + + +void foldConstant(tipb::Expr * expr, uint32_t collator_id, const Context & context) +{ + if (expr->tp() == tipb::ScalarFunc) + { + bool all_const = true; + for (const auto & c : expr->children()) + { + if (!isLiteralExpr(c)) + { + all_const = false; + break; + } + } + if (!all_const) + return; + DataTypes arguments_types; + ColumnsWithTypeAndName argument_columns; + for (const auto & c : expr->children()) + { + Field value = decodeLiteral(c); + DataTypePtr flash_type = applyVisitor(FieldToDataType(), value); + DataTypePtr target_type = inferDataType4Literal(c); + ColumnWithTypeAndName column; + column.column = target_type->createColumnConst(1, convertFieldToType(value, *target_type, flash_type.get())); + column.name = exprToString(c, {}) + "_" + target_type->getName(); + column.type = target_type; + arguments_types.emplace_back(target_type); + argument_columns.emplace_back(column); + } + auto func_name = getFunctionNameForConstantFolding(expr); + if (func_name.empty()) + return; + const auto & function_builder_ptr = FunctionFactory::instance().get(func_name, context); + auto function_ptr = function_builder_ptr->build(argument_columns); + if (function_ptr->isSuitableForConstantFolding()) + { + Block block_with_constants(argument_columns); + ColumnNumbers argument_numbers(arguments_types.size()); + for (size_t i = 0, size = arguments_types.size(); i < size; i++) + argument_numbers[i] = i; + size_t result_pos = argument_numbers.size(); + block_with_constants.insert({nullptr, function_ptr->getReturnType(), "result"}); + function_ptr->execute(block_with_constants, argument_numbers, result_pos); + const auto & result_column = block_with_constants.getByPosition(result_pos).column; + if (result_column->isColumnConst()) + { + auto updated_value = (*result_column)[0]; + tipb::FieldType orig_field_type = expr->field_type(); + expr->Clear(); + literalToPB(expr, updated_value, collator_id); + expr->clear_field_type(); + auto * field_type = expr->mutable_field_type(); + (*field_type) = orig_field_type; + } + } + } +} + +void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr, uint32_t collator_id, const Context & context); + +void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * expr, uint32_t collator_id); + + +void astToPB(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, uint32_t collator_id, const Context & context) +{ + if (ASTIdentifier * id = typeid_cast(ast.get())) + { + identifierToPB(input, id, expr, collator_id); + } + else if (ASTFunction * func = typeid_cast(ast.get())) + { + functionToPB(input, func, expr, collator_id, context); + } + else if (ASTLiteral * lit = typeid_cast(ast.get())) + { + literalToPB(expr, lit->value, collator_id); + } + else + { + throw Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); + } +} + +void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr, uint32_t collator_id, const Context & context) +{ + /// aggregation function is handled in Aggregation, so just treated as a column + auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { + auto column_name = splitQualifiedName(func->getColumnName()); + auto field_name = splitQualifiedName(field.first); + if (column_name.first.empty()) + return field_name.second == column_name.second; + else + return field_name.first == column_name.first && field_name.second == column_name.second; + }); + if (ft != input.end()) + { + expr->set_tp(tipb::ColumnRef); + *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); + WriteBufferFromOwnString ss; + encodeDAGInt64(ft - input.begin(), ss); + expr->set_val(ss.releaseStr()); + return; + } + if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + { + throw Exception("No such column " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + } + String func_name_lowercase = Poco::toLower(func->name); + // TODO: Support more functions. + // TODO: Support type inference. + + const auto it_sig = func_name_to_sig.find(func_name_lowercase); + if (it_sig == func_name_to_sig.end()) + { + throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); + } + switch (it_sig->second) + { + case tipb::ScalarFuncSig::InInt: + { + tipb::Expr * in_expr = expr; + if (func_name_lowercase == "notin") + { + // notin is transformed into not(in()) by tidb + expr->set_sig(tipb::ScalarFuncSig::UnaryNotInt); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned); + expr->set_tp(tipb::ExprType::ScalarFunc); + in_expr = expr->add_children(); + } + in_expr->set_sig(tipb::ScalarFuncSig::InInt); + auto * ft = in_expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned); + ft->set_collate(collator_id); + in_expr->set_tp(tipb::ExprType::ScalarFunc); + for (const auto & child_ast : func->arguments->children) + { + auto * tuple_func = typeid_cast(child_ast.get()); + if (tuple_func != nullptr && tuple_func->name == "tuple") + { + // flatten tuple elements + for (const auto & c : tuple_func->arguments->children) + { + tipb::Expr * child = in_expr->add_children(); + astToPB(input, c, child, collator_id, context); + } + } + else + { + tipb::Expr * child = in_expr->add_children(); + astToPB(input, child_ast, child, collator_id, context); + } + } + return; + } + case tipb::ScalarFuncSig::IfInt: + case tipb::ScalarFuncSig::BitAndSig: + case tipb::ScalarFuncSig::BitOrSig: + case tipb::ScalarFuncSig::BitXorSig: + case tipb::ScalarFuncSig::BitNegSig: + expr->set_sig(it_sig->second); + expr->set_tp(tipb::ExprType::ScalarFunc); + for (size_t i = 0; i < func->arguments->children.size(); i++) + { + const auto & child_ast = func->arguments->children[i]; + tipb::Expr * child = expr->add_children(); + astToPB(input, child_ast, child, collator_id, context); + // todo should infer the return type based on all input types + if ((it_sig->second == tipb::ScalarFuncSig::IfInt && i == 1) + || (it_sig->second != tipb::ScalarFuncSig::IfInt && i == 0)) + *(expr->mutable_field_type()) = child->field_type(); + } + return; + case tipb::ScalarFuncSig::LikeSig: + { + expr->set_sig(tipb::ScalarFuncSig::LikeSig); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned); + ft->set_collate(collator_id); + expr->set_tp(tipb::ExprType::ScalarFunc); + for (const auto & child_ast : func->arguments->children) + { + tipb::Expr * child = expr->add_children(); + astToPB(input, child_ast, child, collator_id, context); + } + // for like need to add the third argument + *expr->add_children() = constructInt64LiteralTiExpr(92); + return; + } + case tipb::ScalarFuncSig::FromUnixTime2Arg: + if (func->arguments->children.size() == 1) + { + expr->set_sig(tipb::ScalarFuncSig::FromUnixTime1Arg); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeDatetime); + ft->set_decimal(6); + } + else + { + expr->set_sig(tipb::ScalarFuncSig::FromUnixTime2Arg); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + } + break; + case tipb::ScalarFuncSig::DateFormatSig: + expr->set_sig(tipb::ScalarFuncSig::DateFormatSig); + expr->mutable_field_type()->set_tp(TiDB::TypeString); + break; + case tipb::ScalarFuncSig::CastIntAsTime: + case tipb::ScalarFuncSig::CastRealAsTime: + case tipb::ScalarFuncSig::CastTimeAsTime: + case tipb::ScalarFuncSig::CastDecimalAsTime: + case tipb::ScalarFuncSig::CastStringAsTime: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + if (it_sig->first.find("datetime")) + { + ft->set_tp(TiDB::TypeDatetime); + } + else + { + ft->set_tp(TiDB::TypeDate); + } + break; + } + case tipb::ScalarFuncSig::CastIntAsReal: + case tipb::ScalarFuncSig::CastRealAsReal: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeDouble); + ft->set_collate(collator_id); + break; + } + case tipb::ScalarFuncSig::RoundInt: + case tipb::ScalarFuncSig::RoundWithFracInt: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + if (it_sig->first.find("uint") != std::string::npos) + ft->set_flag(TiDB::ColumnFlagUnsigned); + ft->set_collate(collator_id); + break; + } + case tipb::ScalarFuncSig::RoundDec: + case tipb::ScalarFuncSig::RoundWithFracDec: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeNewDecimal); + ft->set_collate(collator_id); + break; + } + case tipb::ScalarFuncSig::RoundReal: + case tipb::ScalarFuncSig::RoundWithFracReal: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeDouble); + ft->set_collate(collator_id); + break; + } + default: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned); + ft->set_collate(collator_id); + break; + } + } + expr->set_tp(tipb::ExprType::ScalarFunc); + for (const auto & child_ast : func->arguments->children) + { + tipb::Expr * child = expr->add_children(); + astToPB(input, child_ast, child, collator_id, context); + } + foldConstant(expr, collator_id, context); +} + +void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * expr, uint32_t collator_id) +{ + auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { + auto column_name = splitQualifiedName(id->getColumnName()); + auto field_name = splitQualifiedName(field.first); + if (column_name.first.empty()) + return field_name.second == column_name.second; + else + return field_name.first == column_name.first && field_name.second == column_name.second; + }); + if (ft == input.end()) + throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + expr->set_tp(tipb::ColumnRef); + *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); + expr->mutable_field_type()->set_collate(collator_id); + WriteBufferFromOwnString ss; + encodeDAGInt64(ft - input.begin(), ss); + expr->set_val(ss.releaseStr()); +} + +void collectUsedColumnsFromExpr(const DAGSchema & input, ASTPtr ast, std::unordered_set & used_columns) +{ + if (ASTIdentifier * id = typeid_cast(ast.get())) + { + auto column_name = splitQualifiedName(id->getColumnName()); + if (!column_name.first.empty()) + used_columns.emplace(id->getColumnName()); + else + { + bool found = false; + for (const auto & field : input) + { + auto field_name = splitQualifiedName(field.first); + if (field_name.second == column_name.second) + { + if (found) + throw Exception("ambiguous column for " + column_name.second); + found = true; + used_columns.emplace(field.first); + } + } + } + } + else if (ASTFunction * func = typeid_cast(ast.get())) + { + if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + { + used_columns.emplace(func->getColumnName()); + } + else + { + /// check function + auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { + auto column_name = splitQualifiedName(func->getColumnName()); + auto field_name = splitQualifiedName(field.first); + if (column_name.first.empty()) + return field_name.second == column_name.second; + else + return field_name.first == column_name.first && field_name.second == column_name.second; + }); + if (ft != input.end()) + { + used_columns.emplace(func->getColumnName()); + return; + } + for (const auto & child_ast : func->arguments->children) + { + collectUsedColumnsFromExpr(input, child_ast, used_columns); + } + } + } +} + +TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) +{ + TiDB::ColumnInfo ci; + if (ASTIdentifier * id = typeid_cast(ast.get())) + { + /// check column + auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { + auto column_name = splitQualifiedName(id->getColumnName()); + auto field_name = splitQualifiedName(field.first); + if (column_name.first.empty()) + return field_name.second == column_name.second; + else + return field_name.first == column_name.first && field_name.second == column_name.second; + }); + if (ft == input.end()) + throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + ci = ft->second; + } + else if (ASTFunction * func = typeid_cast(ast.get())) + { + /// check function + String func_name_lowercase = Poco::toLower(func->name); + const auto it_sig = func_name_to_sig.find(func_name_lowercase); + if (it_sig == func_name_to_sig.end()) + { + throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); + } + switch (it_sig->second) + { + case tipb::ScalarFuncSig::InInt: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + for (const auto & child_ast : func->arguments->children) + { + auto * tuple_func = typeid_cast(child_ast.get()); + if (tuple_func != nullptr && tuple_func->name == "tuple") + { + // flatten tuple elements + for (const auto & c : tuple_func->arguments->children) + { + compileExpr(input, c); + } + } + else + { + compileExpr(input, child_ast); + } + } + return ci; + case tipb::ScalarFuncSig::IfInt: + case tipb::ScalarFuncSig::BitAndSig: + case tipb::ScalarFuncSig::BitOrSig: + case tipb::ScalarFuncSig::BitXorSig: + case tipb::ScalarFuncSig::BitNegSig: + for (size_t i = 0; i < func->arguments->children.size(); i++) + { + const auto & child_ast = func->arguments->children[i]; + auto child_ci = compileExpr(input, child_ast); + // todo should infer the return type based on all input types + if ((it_sig->second == tipb::ScalarFuncSig::IfInt && i == 1) + || (it_sig->second != tipb::ScalarFuncSig::IfInt && i == 0)) + ci = child_ci; + } + return ci; + case tipb::ScalarFuncSig::LikeSig: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + for (const auto & child_ast : func->arguments->children) + { + compileExpr(input, child_ast); + } + return ci; + case tipb::ScalarFuncSig::FromUnixTime2Arg: + if (func->arguments->children.size() == 1) + { + ci.tp = TiDB::TypeDatetime; + ci.decimal = 6; + } + else + { + ci.tp = TiDB::TypeString; + } + break; + case tipb::ScalarFuncSig::DateFormatSig: + ci.tp = TiDB::TypeString; + break; + case tipb::ScalarFuncSig::CastIntAsTime: + case tipb::ScalarFuncSig::CastRealAsTime: + case tipb::ScalarFuncSig::CastTimeAsTime: + case tipb::ScalarFuncSig::CastDecimalAsTime: + case tipb::ScalarFuncSig::CastStringAsTime: + if (it_sig->first.find("datetime")) + { + ci.tp = TiDB::TypeDatetime; + } + else + { + ci.tp = TiDB::TypeDate; + } + break; + case tipb::ScalarFuncSig::CastIntAsReal: + case tipb::ScalarFuncSig::CastRealAsReal: + { + ci.tp = TiDB::TypeDouble; + break; + } + case tipb::ScalarFuncSig::RoundInt: + case tipb::ScalarFuncSig::RoundWithFracInt: + { + ci.tp = TiDB::TypeLongLong; + if (it_sig->first.find("uint") != std::string::npos) + ci.flag = TiDB::ColumnFlagUnsigned; + break; + } + case tipb::ScalarFuncSig::RoundDec: + case tipb::ScalarFuncSig::RoundWithFracDec: + { + ci.tp = TiDB::TypeNewDecimal; + break; + } + case tipb::ScalarFuncSig::RoundReal: + case tipb::ScalarFuncSig::RoundWithFracReal: + { + ci.tp = TiDB::TypeDouble; + break; + } + default: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + break; + } + for (const auto & child_ast : func->arguments->children) + { + compileExpr(input, child_ast); + } + } + else if (ASTLiteral * lit = typeid_cast(ast.get())) + { + switch (lit->value.getType()) + { + case Field::Types::Which::Null: + ci.tp = TiDB::TypeNull; + // Null literal expr doesn't need value. + break; + case Field::Types::Which::UInt64: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + break; + case Field::Types::Which::Int64: + ci.tp = TiDB::TypeLongLong; + break; + case Field::Types::Which::Float64: + ci.tp = TiDB::TypeDouble; + break; + case Field::Types::Which::Decimal32: + case Field::Types::Which::Decimal64: + case Field::Types::Which::Decimal128: + case Field::Types::Which::Decimal256: + ci.tp = TiDB::TypeNewDecimal; + break; + case Field::Types::Which::String: + ci.tp = TiDB::TypeString; + break; + default: + throw Exception(String("Unsupported literal type: ") + lit->value.getTypeName(), ErrorCodes::LOGICAL_ERROR); + } + } + else + { + /// not supported unless this is a literal + throw Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); + } + return ci; +} + +void compileFilter(const DAGSchema & input, ASTPtr ast, std::vector & conditions) +{ + if (auto * func = typeid_cast(ast.get())) + { + if (func->name == "and") + { + for (auto & child : func->arguments->children) + { + compileFilter(input, child, conditions); + } + return; + } + } + conditions.push_back(ast); + compileExpr(input, ast); +} +} // namespace + +namespace Debug +{ +String LOCAL_HOST = "127.0.0.1:3930"; +void setServiceAddr(const std::string & addr) +{ + LOCAL_HOST = addr; +} +} // namespace Debug + +std::pair splitQualifiedName(const String & s) +{ + std::pair ret; + Poco::StringTokenizer string_tokens(s, "."); + if (string_tokens.count() == 1) + { + ret.second = s; + } + else if (string_tokens.count() == 2) + { + ret.first = string_tokens[0]; + ret.second = string_tokens[1]; + } + else + { + throw Exception("Invalid identifier name"); + } + return ret; +} + +namespace mock +{ +bool ExchangeSender::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeExchangeSender); + tipb_executor->set_executor_id(name); + tipb::ExchangeSender * exchange_sender = tipb_executor->mutable_exchange_sender(); + exchange_sender->set_tp(type); + for (auto i : partition_keys) + { + auto * expr = exchange_sender->add_partition_keys(); + expr->set_tp(tipb::ColumnRef); + WriteBufferFromOwnString ss; + encodeDAGInt64(i, ss); + expr->set_val(ss.releaseStr()); + auto tipb_type = TiDB::columnInfoToFieldType(output_schema[i].second); + *expr->mutable_field_type() = tipb_type; + tipb_type.set_collate(collator_id); + *exchange_sender->add_types() = tipb_type; + } + for (auto task_id : mpp_info.sender_target_task_ids) + { + mpp::TaskMeta meta; + meta.set_start_ts(mpp_info.start_ts); + meta.set_task_id(task_id); + meta.set_partition_id(mpp_info.partition_id); + meta.set_address(Debug::LOCAL_HOST); + auto * meta_string = exchange_sender->add_encoded_task_meta(); + meta.AppendToString(meta_string); + } + auto * child_executor = exchange_sender->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} + +bool ExchangeReceiver::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context &) +{ + tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); + tipb_executor->set_executor_id(name); + tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); + for (auto & field : output_schema) + { + auto tipb_type = TiDB::columnInfoToFieldType(field.second); + tipb_type.set_collate(collator_id); + + auto * field_type = exchange_receiver->add_field_types(); + *field_type = tipb_type; + } + auto it = mpp_info.receiver_source_task_ids_map.find(name); + if (it == mpp_info.receiver_source_task_ids_map.end()) + throw Exception("Can not found mpp receiver info"); + for (size_t i = 0; i < it->second.size(); i++) + { + mpp::TaskMeta meta; + meta.set_start_ts(mpp_info.start_ts); + meta.set_task_id(it->second[i]); + meta.set_partition_id(i); + meta.set_address(Debug::LOCAL_HOST); + auto * meta_string = exchange_receiver->add_encoded_task_meta(); + meta.AppendToString(meta_string); + } + return true; +} + +void TableScan::columnPrune(std::unordered_set & used_columns) +{ + output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), + output_schema.end()); +} +bool TableScan::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t, const MPPInfo &, const Context &) +{ + if (table_info.is_partition_table) + { + tipb_executor->set_tp(tipb::ExecType::TypePartitionTableScan); + tipb_executor->set_executor_id(name); + auto * partition_ts = tipb_executor->mutable_partition_table_scan(); + partition_ts->set_table_id(table_info.id); + for (const auto & info : output_schema) + setTipbColumnInfo(partition_ts->add_columns(), info); + for (const auto & partition : table_info.partition.definitions) + partition_ts->add_partition_ids(partition.id); + } + else + { + tipb_executor->set_tp(tipb::ExecType::TypeTableScan); + tipb_executor->set_executor_id(name); + auto * ts = tipb_executor->mutable_tbl_scan(); + ts->set_table_id(table_info.id); + for (const auto & info : output_schema) + setTipbColumnInfo(ts->add_columns(), info); + } + return true; +} + +bool Selection::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeSelection); + tipb_executor->set_executor_id(name); + auto * sel = tipb_executor->mutable_selection(); + for (auto & expr : conditions) + { + tipb::Expr * cond = sel->add_conditions(); + astToPB(children[0]->output_schema, expr, cond, collator_id, context); + } + auto * child_executor = sel->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} +void Selection::columnPrune(std::unordered_set & used_columns) +{ + for (auto & expr : conditions) + collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); + children[0]->columnPrune(used_columns); + /// update output schema after column prune + output_schema = children[0]->output_schema; +} + +bool TopN::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeTopN); + tipb_executor->set_executor_id(name); + tipb::TopN * topn = tipb_executor->mutable_topn(); + for (const auto & child : order_columns) + { + ASTOrderByElement * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = topn->add_order_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + topn->set_limit(limit); + auto * child_executor = topn->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} +void TopN::columnPrune(std::unordered_set & used_columns) +{ + for (auto & expr : order_columns) + collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); + children[0]->columnPrune(used_columns); + /// update output schema after column prune + output_schema = children[0]->output_schema; +} + +bool Limit::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeLimit); + tipb_executor->set_executor_id(name); + tipb::Limit * lt = tipb_executor->mutable_limit(); + lt->set_limit(limit); + auto * child_executor = lt->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} +void Limit::columnPrune(std::unordered_set & used_columns) +{ + children[0]->columnPrune(used_columns); + /// update output schema after column prune + output_schema = children[0]->output_schema; +} + +bool Aggregation::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeAggregation); + tipb_executor->set_executor_id(name); + auto * agg = tipb_executor->mutable_aggregation(); + auto & input_schema = children[0]->output_schema; + for (const auto & expr : agg_exprs) + { + const ASTFunction * func = typeid_cast(expr.get()); + if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); + + tipb::Expr * agg_func = agg->add_agg_func(); + + for (const auto & arg : func->arguments->children) + { + tipb::Expr * arg_expr = agg_func->add_children(); + astToPB(input_schema, arg, arg_expr, collator_id, context); + } + auto agg_sig_it = agg_func_name_to_sig.find(func->name); + if (agg_sig_it == agg_func_name_to_sig.end()) + throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + auto agg_sig = agg_sig_it->second; + agg_func->set_tp(agg_sig); + + if (agg_sig == tipb::ExprType::Count || agg_sig == tipb::ExprType::Sum) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); + } + else if (agg_sig == tipb::ExprType::Min || agg_sig == tipb::ExprType::Max || agg_sig == tipb::ExprType::First) + { + if (agg_func->children_size() != 1) + throw Exception("udaf " + func->name + " only accept 1 argument"); + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(agg_func->children(0).field_type().tp()); + ft->set_decimal(agg_func->children(0).field_type().decimal()); + ft->set_flag(agg_func->children(0).field_type().flag() & (~TiDB::ColumnFlagNotNull)); + ft->set_collate(collator_id); + } + else if (agg_sig == tipb::ExprType::ApproxCountDistinct) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + ft->set_flag(1); + } + else if (agg_sig == tipb::ExprType::GroupConcat) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + } + if (is_final_mode) + agg_func->set_aggfuncmode(tipb::AggFunctionMode::FinalMode); + else + agg_func->set_aggfuncmode(tipb::AggFunctionMode::Partial1Mode); + } + + for (const auto & child : gby_exprs) + { + tipb::Expr * gby = agg->add_group_by(); + astToPB(input_schema, child, gby, collator_id, context); + } + + auto * child_executor = agg->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} +void Aggregation::columnPrune(std::unordered_set & used_columns) +{ + /// output schema for partial agg is the original agg's output schema + output_schema_for_partial_agg = output_schema; + output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), + output_schema.end()); + std::unordered_set used_input_columns; + for (auto & func : agg_exprs) + { + if (used_columns.find(func->getColumnName()) != used_columns.end()) + { + const ASTFunction * agg_func = typeid_cast(func.get()); + if (agg_func != nullptr) + { + /// agg_func should not be nullptr, just double check + for (auto & child : agg_func->arguments->children) + collectUsedColumnsFromExpr(children[0]->output_schema, child, used_input_columns); + } + } + } + for (auto & gby_expr : gby_exprs) + { + collectUsedColumnsFromExpr(children[0]->output_schema, gby_expr, used_input_columns); + } + children[0]->columnPrune(used_input_columns); +} +void Aggregation::toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) +{ + if (!is_final_mode) + { + children[0]->toMPPSubPlan(executor_index, properties, exchange_map); + return; + } + /// for aggregation, change aggregation to partial_aggregation => exchange_sender => exchange_receiver => final_aggregation + // todo support avg + if (has_uniq_raw_res) + throw Exception("uniq raw res not supported in mpp query"); + std::shared_ptr partial_agg = std::make_shared( + executor_index, + output_schema_for_partial_agg, + has_uniq_raw_res, + false, + std::move(agg_exprs), + std::move(gby_exprs), + false); + partial_agg->children.push_back(children[0]); + std::vector partition_keys; + size_t agg_func_num = partial_agg->agg_exprs.size(); + for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) + { + partition_keys.push_back(i + agg_func_num); + } + std::shared_ptr exchange_sender + = std::make_shared(executor_index, output_schema_for_partial_agg, partition_keys.empty() ? tipb::PassThrough : tipb::Hash, partition_keys); + exchange_sender->children.push_back(partial_agg); + + std::shared_ptr exchange_receiver + = std::make_shared(executor_index, output_schema_for_partial_agg); + exchange_map[exchange_receiver->name] = std::make_pair(exchange_receiver, exchange_sender); + /// re-construct agg_exprs and gby_exprs in final_agg + for (size_t i = 0; i < partial_agg->agg_exprs.size(); i++) + { + const ASTFunction * agg_func = typeid_cast(partial_agg->agg_exprs[i].get()); + ASTPtr update_agg_expr = agg_func->clone(); + auto * update_agg_func = typeid_cast(update_agg_expr.get()); + if (agg_func->name == "count") + update_agg_func->name = "sum"; + update_agg_func->arguments->children.clear(); + update_agg_func->arguments->children.push_back(std::make_shared(output_schema_for_partial_agg[i].first)); + agg_exprs.push_back(update_agg_expr); + } + for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) + { + gby_exprs.push_back(std::make_shared(output_schema_for_partial_agg[agg_func_num + i].first)); + } + children[0] = exchange_receiver; +} + +bool Project::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeProjection); + tipb_executor->set_executor_id(name); + auto * proj = tipb_executor->mutable_projection(); + auto & input_schema = children[0]->output_schema; + for (const auto & child : exprs) + { + if (typeid_cast(child.get())) + { + /// special case, select * + for (size_t i = 0; i < input_schema.size(); i++) + { + tipb::Expr * expr = proj->add_exprs(); + expr->set_tp(tipb::ColumnRef); + *(expr->mutable_field_type()) = columnInfoToFieldType(input_schema[i].second); + WriteBufferFromOwnString ss; + encodeDAGInt64(i, ss); + expr->set_val(ss.releaseStr()); + } + continue; + } + tipb::Expr * expr = proj->add_exprs(); + astToPB(input_schema, child, expr, collator_id, context); + } + auto * children_executor = proj->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} +void Project::columnPrune(std::unordered_set & used_columns) +{ + output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), + output_schema.end()); + std::unordered_set used_input_columns; + for (auto & expr : exprs) + { + if (typeid_cast(expr.get())) + { + /// for select *, just add all its input columns, maybe + /// can do some optimization, but it is not worth for mock + /// tests + for (auto & field : children[0]->output_schema) + { + used_input_columns.emplace(field.first); + } + break; + } + if (used_columns.find(expr->getColumnName()) != used_columns.end()) + { + collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_input_columns); + } + } + children[0]->columnPrune(used_input_columns); +} + +void Join::columnPrune(std::unordered_set & used_columns) +{ + std::unordered_set left_columns; + std::unordered_set right_columns; + for (auto & field : children[0]->output_schema) + left_columns.emplace(field.first); + for (auto & field : children[1]->output_schema) + right_columns.emplace(field.first); + + std::unordered_set left_used_columns; + std::unordered_set right_used_columns; + for (const auto & s : used_columns) + { + if (left_columns.find(s) != left_columns.end()) + left_used_columns.emplace(s); + else + right_used_columns.emplace(s); + } + for (const auto & child : join_params.using_expression_list->children) + { + if (auto * identifier = typeid_cast(child.get())) + { + auto col_name = identifier->getColumnName(); + for (auto & field : children[0]->output_schema) + { + if (col_name == splitQualifiedName(field.first).second) + { + left_used_columns.emplace(field.first); + break; + } + } + for (auto & field : children[1]->output_schema) + { + if (col_name == splitQualifiedName(field.first).second) + { + right_used_columns.emplace(field.first); + break; + } + } + } + else + { + throw Exception("Only support Join on columns"); + } + } + children[0]->columnPrune(left_used_columns); + children[1]->columnPrune(right_used_columns); + output_schema.clear(); + /// update output schema + for (auto & field : children[0]->output_schema) + { + if (join_params.kind == ASTTableJoin::Kind::Right && field.second.hasNotNullFlag()) + output_schema.push_back(toNullableDAGColumnInfo(field)); + else + output_schema.push_back(field); + } + for (auto & field : children[1]->output_schema) + { + if (join_params.kind == ASTTableJoin::Kind::Left && field.second.hasNotNullFlag()) + output_schema.push_back(toNullableDAGColumnInfo(field)); + else + output_schema.push_back(field); + } +} + +void Join::fillJoinKeyAndFieldType( + ASTPtr key, + const DAGSchema & schema, + tipb::Expr * tipb_key, + tipb::FieldType * tipb_field_type, + uint32_t collator_id) +{ + auto * identifier = typeid_cast(key.get()); + for (size_t index = 0; index < schema.size(); index++) + { + const auto & field = schema[index]; + if (splitQualifiedName(field.first).second == identifier->getColumnName()) + { + auto tipb_type = TiDB::columnInfoToFieldType(field.second); + tipb_type.set_collate(collator_id); + + tipb_key->set_tp(tipb::ColumnRef); + WriteBufferFromOwnString ss; + encodeDAGInt64(index, ss); + tipb_key->set_val(ss.releaseStr()); + *tipb_key->mutable_field_type() = tipb_type; + + *tipb_field_type = tipb_type; + break; + } + } +} +bool Join::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeJoin); + tipb_executor->set_executor_id(name); + tipb::Join * join = tipb_executor->mutable_join(); + switch (join_params.kind) // todo support more type... + { + case ASTTableJoin::Kind::Inner: + join->set_join_type(tipb::JoinType::TypeInnerJoin); + break; + case ASTTableJoin::Kind::Left: + join->set_join_type(tipb::JoinType::TypeLeftOuterJoin); + break; + case ASTTableJoin::Kind::Right: + join->set_join_type(tipb::JoinType::TypeRightOuterJoin); + break; + default: + throw Exception("Unsupported join type"); + } + join->set_join_exec_type(tipb::JoinExecType::TypeHashJoin); + join->set_inner_idx(1); + for (auto & key : join_params.using_expression_list->children) + { + fillJoinKeyAndFieldType(key, children[0]->output_schema, join->add_left_join_keys(), join->add_probe_types(), collator_id); + fillJoinKeyAndFieldType(key, children[1]->output_schema, join->add_right_join_keys(), join->add_build_types(), collator_id); + } + auto * left_child_executor = join->add_children(); + children[0]->toTiPBExecutor(left_child_executor, collator_id, mpp_info, context); + auto * right_child_executor = join->add_children(); + return children[1]->toTiPBExecutor(right_child_executor, collator_id, mpp_info, context); +} +void Join::toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) +{ + if (properties.use_broadcast_join) + { + /// for broadcast join, always use right side as the broadcast side + std::shared_ptr right_exchange_sender + = std::make_shared(executor_index, children[1]->output_schema, tipb::Broadcast); + right_exchange_sender->children.push_back(children[1]); + + std::shared_ptr right_exchange_receiver + = std::make_shared(executor_index, children[1]->output_schema); + children[1] = right_exchange_receiver; + exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); + return; + } + std::vector left_partition_keys; + std::vector right_partition_keys; + for (auto & key : join_params.using_expression_list->children) + { + size_t index = 0; + for (; index < children[0]->output_schema.size(); index++) + { + if (splitQualifiedName(children[0]->output_schema[index].first).second == key->getColumnName()) + { + left_partition_keys.push_back(index); + break; + } + } + index = 0; + for (; index < children[1]->output_schema.size(); index++) + { + if (splitQualifiedName(children[1]->output_schema[index].first).second == key->getColumnName()) + { + right_partition_keys.push_back(index); + break; + } + } + } + std::shared_ptr left_exchange_sender + = std::make_shared(executor_index, children[0]->output_schema, tipb::Hash, left_partition_keys); + left_exchange_sender->children.push_back(children[0]); + std::shared_ptr right_exchange_sender + = std::make_shared(executor_index, children[1]->output_schema, tipb::Hash, right_partition_keys); + right_exchange_sender->children.push_back(children[1]); + + std::shared_ptr left_exchange_receiver + = std::make_shared(executor_index, children[0]->output_schema); + std::shared_ptr right_exchange_receiver + = std::make_shared(executor_index, children[1]->output_schema); + children[0] = left_exchange_receiver; + children[1] = right_exchange_receiver; + + exchange_map[left_exchange_receiver->name] = std::make_pair(left_exchange_receiver, left_exchange_sender); + exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); +} +} // namespace mock + +ExecutorPtr compileTableScan(size_t & executor_index, TableInfo & table_info, String & table_alias, bool append_pk_column) +{ + DAGSchema ts_output; + for (const auto & column_info : table_info.columns) + { + ColumnInfo ci; + ci.tp = column_info.tp; + ci.flag = column_info.flag; + ci.flen = column_info.flen; + ci.decimal = column_info.decimal; + ci.elems = column_info.elems; + ci.default_value = column_info.default_value; + ci.origin_default_value = column_info.origin_default_value; + /// use qualified name as the column name to handle multiple table queries, not very + /// efficient but functionally enough for mock test + ts_output.emplace_back(std::make_pair(table_alias + "." + column_info.name, std::move(ci))); + } + if (append_pk_column) + { + ColumnInfo ci; + ci.tp = TiDB::TypeLongLong; + ci.setPriKeyFlag(); + ci.setNotNullFlag(); + ts_output.emplace_back(std::make_pair(MutableSupport::tidb_pk_column_name, std::move(ci))); + } + + return std::make_shared(executor_index, ts_output, table_info); +} + +ExecutorPtr compileSelection(ExecutorPtr input, size_t & executor_index, ASTPtr filter) +{ + std::vector conditions; + compileFilter(input->output_schema, filter, conditions); + auto selection = std::make_shared(executor_index, input->output_schema, std::move(conditions)); + selection->children.push_back(input); + return selection; +} + +ExecutorPtr compileTopN(ExecutorPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr) +{ + std::vector order_columns; + for (const auto & child : order_exprs->children) + { + ASTOrderByElement * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + auto limit = safeGet(typeid_cast(*limit_expr).value); + auto top_n = std::make_shared(executor_index, input->output_schema, std::move(order_columns), limit); + top_n->children.push_back(input); + return top_n; +} + +ExecutorPtr compileLimit(ExecutorPtr input, size_t & executor_index, ASTPtr limit_expr) +{ + auto limit_length = safeGet(typeid_cast(*limit_expr).value); + auto limit = std::make_shared(executor_index, input->output_schema, limit_length); + limit->children.push_back(input); + return limit; +} + +ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs) +{ + std::vector agg_exprs; + std::vector gby_exprs; + DAGSchema output_schema; + bool has_uniq_raw_res = false; + bool need_append_project = false; + if (agg_funcs != nullptr) + { + for (const auto & expr : agg_funcs->children) + { + const ASTFunction * func = typeid_cast(expr.get()); + if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + { + need_append_project = true; + continue; + } + + agg_exprs.push_back(expr); + std::vector children_ci; + + for (const auto & arg : func->arguments->children) + { + children_ci.push_back(compileExpr(input->output_schema, arg)); + } + + TiDB::ColumnInfo ci; + if (func->name == "count") + { + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull; + } + else if (func->name == "max" || func->name == "min" || func->name == "first_row") + { + ci = children_ci[0]; + ci.flag &= ~TiDB::ColumnFlagNotNull; + } + else if (func->name == uniq_raw_res_name) + { + has_uniq_raw_res = true; + ci.tp = TiDB::TypeString; + ci.flag = 1; + } + // TODO: Other agg func. + else + { + throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + } + + output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); + } + } + + if (group_by_exprs != nullptr) + { + for (const auto & child : group_by_exprs->children) + { + gby_exprs.push_back(child); + auto ci = compileExpr(input->output_schema, child); + output_schema.emplace_back(std::make_pair(child->getColumnName(), ci)); + } + } + + auto aggregation = std::make_shared( + executor_index, + output_schema, + has_uniq_raw_res, + need_append_project, + std::move(agg_exprs), + std::move(gby_exprs), + true); + aggregation->children.push_back(input); + return aggregation; +} + +ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr select_list) +{ + std::vector exprs; + DAGSchema output_schema; + for (const auto & expr : select_list->children) + { + if (typeid_cast(expr.get())) + { + /// special case, select * + exprs.push_back(expr); + const auto & last_output = input->output_schema; + for (const auto & field : last_output) + { + // todo need to use the subquery alias to reconstruct the field + // name if subquery is supported + output_schema.emplace_back(field.first, field.second); + } + } + else + { + exprs.push_back(expr); + auto ft = std::find_if(input->output_schema.begin(), input->output_schema.end(), [&](const auto & field) { return field.first == expr->getColumnName(); }); + if (ft != input->output_schema.end()) + { + output_schema.emplace_back(ft->first, ft->second); + continue; + } + const ASTFunction * func = typeid_cast(expr.get()); + if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + { + throw Exception("No such agg " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + } + else + { + auto ci = compileExpr(input->output_schema, expr); + // todo need to use the subquery alias to reconstruct the field + // name if subquery is supported + output_schema.emplace_back(std::make_pair(expr->getColumnName(), ci)); + } + } + } + + auto project = std::make_shared(executor_index, output_schema, std::move(exprs)); + project->children.push_back(input); + return project; +} + +ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params) +{ + DAGSchema output_schema; + const auto & join_params = (static_cast(*params)); + for (auto & field : left->output_schema) + { + if (join_params.kind == ASTTableJoin::Kind::Right && field.second.hasNotNullFlag()) + output_schema.push_back(toNullableDAGColumnInfo(field)); + else + output_schema.push_back(field); + } + for (auto & field : right->output_schema) + { + if (join_params.kind == ASTTableJoin::Kind::Left && field.second.hasNotNullFlag()) + output_schema.push_back(toNullableDAGColumnInfo(field)); + else + output_schema.push_back(field); + } + auto join = std::make_shared(executor_index, output_schema, params); + join->children.push_back(left); + join->children.push_back(right); + return join; +} + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h new file mode 100644 index 00000000000..4f1b262e76d --- /dev/null +++ b/dbms/src/Debug/astToExecutor.h @@ -0,0 +1,287 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +extern const int NO_SUCH_COLUMN_IN_TABLE; +} // namespace ErrorCodes + +using DAGColumnInfo = std::pair; +using DAGSchema = std::vector; + +namespace Debug +{ +extern String LOCAL_HOST; +void setServiceAddr(const std::string & addr); +} // namespace Debug + +std::pair splitQualifiedName(const String & s); + +struct MPPCtx +{ + Timestamp start_ts; + Int64 next_task_id; + std::vector sender_target_task_ids; + explicit MPPCtx(Timestamp start_ts_) + : start_ts(start_ts_) + , next_task_id(1) + {} +}; + +using MPPCtxPtr = std::shared_ptr; + +struct MPPInfo +{ + Timestamp start_ts; + Int64 partition_id; + Int64 task_id; + const std::vector & sender_target_task_ids; + const std::unordered_map> & receiver_source_task_ids_map; + MPPInfo(Timestamp start_ts_, Int64 partition_id_, Int64 task_id_, const std::vector & sender_target_task_ids_, const std::unordered_map> & receiver_source_task_ids_map_) + : start_ts(start_ts_) + , partition_id(partition_id_) + , task_id(task_id_) + , sender_target_task_ids(sender_target_task_ids_) + , receiver_source_task_ids_map(receiver_source_task_ids_map_) + {} +}; + +struct TaskMeta +{ + UInt64 start_ts = 0; + Int64 task_id = 0; + Int64 partition_id = 0; +}; + +using TaskMetas = std::vector; + +namespace mock +{ +struct ExchangeSender; +struct ExchangeReceiver; +struct Executor +{ + size_t index; + String name; + DAGSchema output_schema; + std::vector> children; + virtual void columnPrune(std::unordered_set & used_columns) = 0; + Executor(size_t & index_, String && name_, const DAGSchema & output_schema_) + : index(index_) + , name(std::move(name_)) + , output_schema(output_schema_) + { + index_++; + } + virtual bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) + = 0; + virtual void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) + { + children[0]->toMPPSubPlan(executor_index, properties, exchange_map); + } + virtual ~Executor() = default; +}; + +struct ExchangeSender : Executor +{ + tipb::ExchangeType type; + TaskMetas task_metas; + std::vector partition_keys; + ExchangeSender(size_t & index, const DAGSchema & output, tipb::ExchangeType type_, const std::vector & partition_keys_ = {}) + : Executor(index, "exchange_sender_" + std::to_string(index), output) + , type(type_) + , partition_keys(partition_keys_) + {} + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; +}; + +struct ExchangeReceiver : Executor +{ + TaskMetas task_metas; + ExchangeReceiver(size_t & index, const DAGSchema & output) + : Executor(index, "exchange_receiver_" + std::to_string(index), output) + {} + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context &) override; +}; + +struct TableScan : public Executor +{ + TableInfo table_info; + /// used by column pruner + TableScan(size_t & index_, const DAGSchema & output_schema_, const TableInfo & table_info_) + : Executor(index_, "table_scan_" + std::to_string(index_), output_schema_) + , table_info(table_info_) + {} + void columnPrune(std::unordered_set & used_columns) override; + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t, const MPPInfo &, const Context &) override; + void toMPPSubPlan(size_t &, const DAGProperties &, std::unordered_map, std::shared_ptr>> &) override + {} + + void setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const + { + auto column_name = splitQualifiedName(dag_column_info.first).second; + if (column_name == MutableSupport::tidb_pk_column_name) + ci->set_column_id(-1); + else + ci->set_column_id(table_info.getColumnID(column_name)); + ci->set_tp(dag_column_info.second.tp); + ci->set_flag(dag_column_info.second.flag); + ci->set_columnlen(dag_column_info.second.flen); + ci->set_decimal(dag_column_info.second.decimal); + if (!dag_column_info.second.elems.empty()) + { + for (const auto & pair : dag_column_info.second.elems) + { + ci->add_elems(pair.first); + } + } + } +}; + +struct Selection : public Executor +{ + std::vector conditions; + Selection(size_t & index_, const DAGSchema & output_schema_, std::vector conditions_) + : Executor(index_, "selection_" + std::to_string(index_), output_schema_) + , conditions(std::move(conditions_)) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; +}; + +struct TopN : public Executor +{ + std::vector order_columns; + size_t limit; + TopN(size_t & index_, const DAGSchema & output_schema_, std::vector order_columns_, size_t limit_) + : Executor(index_, "topn_" + std::to_string(index_), output_schema_) + , order_columns(std::move(order_columns_)) + , limit(limit_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; +}; + +struct Limit : public Executor +{ + size_t limit; + Limit(size_t & index_, const DAGSchema & output_schema_, size_t limit_) + : Executor(index_, "limit_" + std::to_string(index_), output_schema_) + , limit(limit_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; +}; + +struct Aggregation : public Executor +{ + bool has_uniq_raw_res; + bool need_append_project; + std::vector agg_exprs; + std::vector gby_exprs; + bool is_final_mode; + DAGSchema output_schema_for_partial_agg; + Aggregation(size_t & index_, const DAGSchema & output_schema_, bool has_uniq_raw_res_, bool need_append_project_, std::vector agg_exprs_, std::vector gby_exprs_, bool is_final_mode_) + : Executor(index_, "aggregation_" + std::to_string(index_), output_schema_) + , has_uniq_raw_res(has_uniq_raw_res_) + , need_append_project(need_append_project_) + , agg_exprs(std::move(agg_exprs_)) + , gby_exprs(std::move(gby_exprs_)) + , is_final_mode(is_final_mode_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; + void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; +}; + +struct Project : public Executor +{ + std::vector exprs; + Project(size_t & index_, const DAGSchema & output_schema_, std::vector && exprs_) + : Executor(index_, "project_" + std::to_string(index_), output_schema_) + , exprs(std::move(exprs_)) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; +}; + +struct Join : Executor +{ + ASTPtr params; + const ASTTableJoin & join_params; + Join(size_t & index_, const DAGSchema & output_schema_, ASTPtr params_) + : Executor(index_, "Join_" + std::to_string(index_), output_schema_) + , params(params_) + , join_params(static_cast(*params)) + { + if (join_params.using_expression_list == nullptr) + throw Exception("No join condition found."); + if (join_params.strictness != ASTTableJoin::Strictness::All) + throw Exception("Only support join with strictness ALL"); + } + + void columnPrune(std::unordered_set & used_columns) override; + + static void fillJoinKeyAndFieldType( + ASTPtr key, + const DAGSchema & schema, + tipb::Expr * tipb_key, + tipb::FieldType * tipb_field_type, + uint32_t collator_id); + + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + + void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; +}; +} // namespace mock + +using ExecutorPtr = std::shared_ptr; + +ExecutorPtr compileTableScan(size_t & executor_index, TableInfo & table_info, String & table_alias, bool append_pk_column); + +ExecutorPtr compileSelection(ExecutorPtr input, size_t & executor_index, ASTPtr filter); + +ExecutorPtr compileTopN(ExecutorPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr); + +ExecutorPtr compileLimit(ExecutorPtr input, size_t & executor_index, ASTPtr limit_expr); + +ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs); + +ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr select_list); + +ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params); + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 3e5fc0ea7fc..c17f26d45b7 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -73,113 +75,6 @@ static const String MPP_QUERY = "mpp_query"; static const String USE_BROADCAST_JOIN = "use_broadcast_join"; static const String MPP_PARTITION_NUM = "mpp_partition_num"; static const String MPP_TIMEOUT = "mpp_timeout"; -static String LOCAL_HOST = "127.0.0.1:3930"; - -namespace Debug -{ -void setServiceAddr(const std::string & addr) -{ - LOCAL_HOST = addr; -} -} // namespace Debug - - -std::unordered_map func_name_to_sig({ - {"equals", tipb::ScalarFuncSig::EQInt}, - {"and", tipb::ScalarFuncSig::LogicalAnd}, - {"or", tipb::ScalarFuncSig::LogicalOr}, - {"xor", tipb::ScalarFuncSig::LogicalXor}, - {"not", tipb::ScalarFuncSig::UnaryNotInt}, - {"greater", tipb::ScalarFuncSig::GTInt}, - {"greaterorequals", tipb::ScalarFuncSig::GEInt}, - {"less", tipb::ScalarFuncSig::LTInt}, - {"lessorequals", tipb::ScalarFuncSig::LEInt}, - {"in", tipb::ScalarFuncSig::InInt}, - {"notin", tipb::ScalarFuncSig::InInt}, - {"date_format", tipb::ScalarFuncSig::DateFormatSig}, - {"if", tipb::ScalarFuncSig::IfInt}, - {"from_unixtime", tipb::ScalarFuncSig::FromUnixTime2Arg}, - /// bit_and/bit_or/bit_xor is aggregated function in clickhouse/mysql - {"bitand", tipb::ScalarFuncSig::BitAndSig}, - {"bitor", tipb::ScalarFuncSig::BitOrSig}, - {"bitxor", tipb::ScalarFuncSig::BitXorSig}, - {"bitnot", tipb::ScalarFuncSig::BitNegSig}, - {"notequals", tipb::ScalarFuncSig::NEInt}, - {"like", tipb::ScalarFuncSig::LikeSig}, - {"cast_int_int", tipb::ScalarFuncSig::CastIntAsInt}, - {"cast_int_real", tipb::ScalarFuncSig::CastIntAsReal}, - {"cast_real_int", tipb::ScalarFuncSig::CastRealAsInt}, - {"cast_real_real", tipb::ScalarFuncSig::CastRealAsReal}, - {"cast_decimal_int", tipb::ScalarFuncSig::CastDecimalAsInt}, - {"cast_time_int", tipb::ScalarFuncSig::CastTimeAsInt}, - {"cast_string_int", tipb::ScalarFuncSig::CastStringAsInt}, - {"cast_int_decimal", tipb::ScalarFuncSig::CastIntAsDecimal}, - {"cast_real_decimal", tipb::ScalarFuncSig::CastRealAsDecimal}, - {"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal}, - {"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal}, - {"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal}, - {"cast_int_string", tipb::ScalarFuncSig::CastIntAsString}, - {"cast_real_string", tipb::ScalarFuncSig::CastRealAsString}, - {"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString}, - {"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString}, - {"cast_string_string", tipb::ScalarFuncSig::CastStringAsString}, - {"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime}, - {"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime}, - {"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime}, - {"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime}, - {"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime}, - {"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime}, - {"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime}, - {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, - {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, - {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, - {"round_int", tipb::ScalarFuncSig::RoundInt}, - {"round_uint", tipb::ScalarFuncSig::RoundInt}, - {"round_dec", tipb::ScalarFuncSig::RoundDec}, - {"round_real", tipb::ScalarFuncSig::RoundReal}, - {"round_with_frac_int", tipb::ScalarFuncSig::RoundWithFracInt}, - {"round_with_frac_uint", tipb::ScalarFuncSig::RoundWithFracInt}, - {"round_with_frac_dec", tipb::ScalarFuncSig::RoundWithFracDec}, - {"round_with_frac_real", tipb::ScalarFuncSig::RoundWithFracReal}, - -}); - -std::unordered_map agg_func_name_to_sig({ - {"min", tipb::ExprType::Min}, - {"max", tipb::ExprType::Max}, - {"count", tipb::ExprType::Count}, - {"sum", tipb::ExprType::Sum}, - {"first_row", tipb::ExprType::First}, - {"uniqRawRes", tipb::ExprType::ApproxCountDistinct}, - {"group_concat", tipb::ExprType::GroupConcat}, -}); - -std::pair splitQualifiedName(String s) -{ - std::pair ret; - Poco::StringTokenizer string_tokens(s, "."); - if (string_tokens.count() == 1) - { - ret.second = s; - } - else if (string_tokens.count() == 2) - { - ret.first = string_tokens[0]; - ret.second = string_tokens[1]; - } - else - { - throw Exception("Invalid identifier name"); - } - return ret; -} - -DAGColumnInfo toNullableDAGColumnInfo(DAGColumnInfo & input) -{ - DAGColumnInfo output = input; - output.second.clearNotNullFlag(); - return output; -} class UniqRawResReformatBlockOutputStream : public IProfilingBlockInputStream { @@ -304,7 +199,7 @@ BlockInputStreamPtr executeQuery(Context & context, RegionID region_id, const DA auto * tm = req->mutable_meta(); tm->set_start_ts(properties.start_ts); tm->set_partition_id(task.partition_id); - tm->set_address(LOCAL_HOST); + tm->set_address(Debug::LOCAL_HOST); tm->set_task_id(task.task_id); auto * encoded_plan = req->mutable_encoded_plan(); task.dag_request->AppendToString(encoded_plan); @@ -355,7 +250,7 @@ BlockInputStreamPtr executeQuery(Context & context, RegionID region_id, const DA } } pingcap::kv::RpcCall call(req); - context.getTMTContext().getCluster()->rpc_client->sendRequest(LOCAL_HOST, call, 1000); + context.getTMTContext().getCluster()->rpc_client->sendRequest(Debug::LOCAL_HOST, call, 1000); if (call.getResp()->has_error()) throw Exception("Meet error while dispatch mpp task: " + call.getResp()->error().msg()); } @@ -364,7 +259,7 @@ BlockInputStreamPtr executeQuery(Context & context, RegionID region_id, const DA { mpp::TaskMeta tm; tm.set_start_ts(properties.start_ts); - tm.set_address(LOCAL_HOST); + tm.set_address(Debug::LOCAL_HOST); tm.set_task_id(root_task_id); tm.set_partition_id(-1); auto * tm_string = tipb_exchange_receiver.add_encoded_task_meta(); @@ -379,7 +274,7 @@ BlockInputStreamPtr executeQuery(Context & context, RegionID region_id, const DA } mpp::TaskMeta root_tm; root_tm.set_start_ts(properties.start_ts); - root_tm.set_address(LOCAL_HOST); + root_tm.set_address(Debug::LOCAL_HOST); root_tm.set_task_id(-1); root_tm.set_partition_id(-1); std::shared_ptr exchange_receiver @@ -559,1564 +454,6 @@ BlockInputStreamPtr dbgFuncMockTiDBQuery(Context & context, const ASTs & args) return executeQuery(context, region_id, properties, query_tasks, func_wrap_output_stream); } -void literalToPB(tipb::Expr * expr, const Field & value, uint32_t collator_id) -{ - WriteBufferFromOwnString ss; - switch (value.getType()) - { - case Field::Types::Which::Null: - { - expr->set_tp(tipb::Null); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeNull); - ft->set_collate(collator_id); - // Null literal expr doesn't need value. - break; - } - case Field::Types::Which::UInt64: - { - expr->set_tp(tipb::Uint64); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGUInt64(value.get(), ss); - break; - } - case Field::Types::Which::Int64: - { - expr->set_tp(tipb::Int64); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGInt64(value.get(), ss); - break; - } - case Field::Types::Which::Float64: - { - expr->set_tp(tipb::Float64); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeFloat); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGFloat64(value.get(), ss); - break; - } - case Field::Types::Which::Decimal32: - case Field::Types::Which::Decimal64: - case Field::Types::Which::Decimal128: - case Field::Types::Which::Decimal256: - { - expr->set_tp(tipb::MysqlDecimal); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeNewDecimal); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGDecimal(value, ss); - break; - } - case Field::Types::Which::String: - { - expr->set_tp(tipb::String); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - // TODO: Align with TiDB. - encodeDAGBytes(value.get(), ss); - break; - } - default: - throw Exception(String("Unsupported literal type: ") + value.getTypeName(), ErrorCodes::LOGICAL_ERROR); - } - expr->set_val(ss.releaseStr()); -} - -String getFunctionNameForConstantFolding(tipb::Expr * expr) -{ - // todo support more function for constant folding - switch (expr->sig()) - { - case tipb::ScalarFuncSig::CastStringAsTime: - return "toMyDateTimeOrNull"; - default: - return ""; - } -} - -void foldConstant(tipb::Expr * expr, uint32_t collator_id, const Context & context) -{ - if (expr->tp() == tipb::ScalarFunc) - { - bool all_const = true; - for (const auto & c : expr->children()) - { - if (!isLiteralExpr(c)) - { - all_const = false; - break; - } - } - if (!all_const) - return; - DataTypes arguments_types; - ColumnsWithTypeAndName argument_columns; - for (const auto & c : expr->children()) - { - Field value = decodeLiteral(c); - DataTypePtr flash_type = applyVisitor(FieldToDataType(), value); - DataTypePtr target_type = inferDataType4Literal(c); - ColumnWithTypeAndName column; - column.column = target_type->createColumnConst(1, convertFieldToType(value, *target_type, flash_type.get())); - column.name = exprToString(c, {}) + "_" + target_type->getName(); - column.type = target_type; - arguments_types.emplace_back(target_type); - argument_columns.emplace_back(column); - } - auto func_name = getFunctionNameForConstantFolding(expr); - if (func_name.empty()) - return; - const auto & function_builder_ptr = FunctionFactory::instance().get(func_name, context); - auto function_ptr = function_builder_ptr->build(argument_columns); - if (function_ptr->isSuitableForConstantFolding()) - { - Block block_with_constants(argument_columns); - ColumnNumbers argument_numbers(arguments_types.size()); - for (size_t i = 0, size = arguments_types.size(); i < size; i++) - argument_numbers[i] = i; - size_t result_pos = argument_numbers.size(); - block_with_constants.insert({nullptr, function_ptr->getReturnType(), "result"}); - function_ptr->execute(block_with_constants, argument_numbers, result_pos); - const auto & result_column = block_with_constants.getByPosition(result_pos).column; - if (result_column->isColumnConst()) - { - auto updated_value = (*result_column)[0]; - tipb::FieldType orig_field_type = expr->field_type(); - expr->Clear(); - literalToPB(expr, updated_value, collator_id); - expr->clear_field_type(); - auto * field_type = expr->mutable_field_type(); - (*field_type) = orig_field_type; - } - } - } -} - -void astToPB(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, uint32_t collator_id, const Context & context) -{ - if (ASTIdentifier * id = typeid_cast(ast.get())) - { - auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { - auto column_name = splitQualifiedName(id->getColumnName()); - auto field_name = splitQualifiedName(field.first); - if (column_name.first.empty()) - return field_name.second == column_name.second; - else - return field_name.first == column_name.first && field_name.second == column_name.second; - }); - if (ft == input.end()) - throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - expr->set_tp(tipb::ColumnRef); - *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); - expr->mutable_field_type()->set_collate(collator_id); - WriteBufferFromOwnString ss; - encodeDAGInt64(ft - input.begin(), ss); - expr->set_val(ss.releaseStr()); - } - else if (ASTFunction * func = typeid_cast(ast.get())) - { - /// aggregation function is handled in Aggregation, so just treated as a column - auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { - auto column_name = splitQualifiedName(func->getColumnName()); - auto field_name = splitQualifiedName(field.first); - if (column_name.first.empty()) - return field_name.second == column_name.second; - else - return field_name.first == column_name.first && field_name.second == column_name.second; - }); - if (ft != input.end()) - { - expr->set_tp(tipb::ColumnRef); - *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); - WriteBufferFromOwnString ss; - encodeDAGInt64(ft - input.begin(), ss); - expr->set_val(ss.releaseStr()); - return; - } - if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - { - throw Exception("No such column " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - } - String func_name_lowercase = Poco::toLower(func->name); - // TODO: Support more functions. - // TODO: Support type inference. - - const auto it_sig = func_name_to_sig.find(func_name_lowercase); - if (it_sig == func_name_to_sig.end()) - { - throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); - } - switch (it_sig->second) - { - case tipb::ScalarFuncSig::InInt: - { - tipb::Expr * in_expr = expr; - if (func_name_lowercase == "notin") - { - // notin is transformed into not(in()) by tidb - expr->set_sig(tipb::ScalarFuncSig::UnaryNotInt); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned); - expr->set_tp(tipb::ExprType::ScalarFunc); - in_expr = expr->add_children(); - } - in_expr->set_sig(tipb::ScalarFuncSig::InInt); - auto * ft = in_expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned); - ft->set_collate(collator_id); - in_expr->set_tp(tipb::ExprType::ScalarFunc); - for (const auto & child_ast : func->arguments->children) - { - auto * tuple_func = typeid_cast(child_ast.get()); - if (tuple_func != nullptr && tuple_func->name == "tuple") - { - // flatten tuple elements - for (const auto & c : tuple_func->arguments->children) - { - tipb::Expr * child = in_expr->add_children(); - astToPB(input, c, child, collator_id, context); - } - } - else - { - tipb::Expr * child = in_expr->add_children(); - astToPB(input, child_ast, child, collator_id, context); - } - } - return; - } - case tipb::ScalarFuncSig::IfInt: - case tipb::ScalarFuncSig::BitAndSig: - case tipb::ScalarFuncSig::BitOrSig: - case tipb::ScalarFuncSig::BitXorSig: - case tipb::ScalarFuncSig::BitNegSig: - expr->set_sig(it_sig->second); - expr->set_tp(tipb::ExprType::ScalarFunc); - for (size_t i = 0; i < func->arguments->children.size(); i++) - { - const auto & child_ast = func->arguments->children[i]; - tipb::Expr * child = expr->add_children(); - astToPB(input, child_ast, child, collator_id, context); - // todo should infer the return type based on all input types - if ((it_sig->second == tipb::ScalarFuncSig::IfInt && i == 1) - || (it_sig->second != tipb::ScalarFuncSig::IfInt && i == 0)) - *(expr->mutable_field_type()) = child->field_type(); - } - return; - case tipb::ScalarFuncSig::LikeSig: - { - expr->set_sig(tipb::ScalarFuncSig::LikeSig); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned); - ft->set_collate(collator_id); - expr->set_tp(tipb::ExprType::ScalarFunc); - for (const auto & child_ast : func->arguments->children) - { - tipb::Expr * child = expr->add_children(); - astToPB(input, child_ast, child, collator_id, context); - } - // for like need to add the third argument - *expr->add_children() = constructInt64LiteralTiExpr(92); - return; - } - case tipb::ScalarFuncSig::FromUnixTime2Arg: - if (func->arguments->children.size() == 1) - { - expr->set_sig(tipb::ScalarFuncSig::FromUnixTime1Arg); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeDatetime); - ft->set_decimal(6); - } - else - { - expr->set_sig(tipb::ScalarFuncSig::FromUnixTime2Arg); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - } - break; - case tipb::ScalarFuncSig::DateFormatSig: - expr->set_sig(tipb::ScalarFuncSig::DateFormatSig); - expr->mutable_field_type()->set_tp(TiDB::TypeString); - break; - case tipb::ScalarFuncSig::CastIntAsTime: - case tipb::ScalarFuncSig::CastRealAsTime: - case tipb::ScalarFuncSig::CastTimeAsTime: - case tipb::ScalarFuncSig::CastDecimalAsTime: - case tipb::ScalarFuncSig::CastStringAsTime: - { - expr->set_sig(it_sig->second); - auto * ft = expr->mutable_field_type(); - if (it_sig->first.find("datetime")) - { - ft->set_tp(TiDB::TypeDatetime); - } - else - { - ft->set_tp(TiDB::TypeDate); - } - break; - } - case tipb::ScalarFuncSig::CastIntAsReal: - case tipb::ScalarFuncSig::CastRealAsReal: - { - expr->set_sig(it_sig->second); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeDouble); - ft->set_collate(collator_id); - break; - } - case tipb::ScalarFuncSig::RoundInt: - case tipb::ScalarFuncSig::RoundWithFracInt: - { - expr->set_sig(it_sig->second); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - if (it_sig->first.find("uint") != std::string::npos) - ft->set_flag(TiDB::ColumnFlagUnsigned); - ft->set_collate(collator_id); - break; - } - case tipb::ScalarFuncSig::RoundDec: - case tipb::ScalarFuncSig::RoundWithFracDec: - { - expr->set_sig(it_sig->second); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeNewDecimal); - ft->set_collate(collator_id); - break; - } - case tipb::ScalarFuncSig::RoundReal: - case tipb::ScalarFuncSig::RoundWithFracReal: - { - expr->set_sig(it_sig->second); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeDouble); - ft->set_collate(collator_id); - break; - } - default: - { - expr->set_sig(it_sig->second); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned); - ft->set_collate(collator_id); - break; - } - } - expr->set_tp(tipb::ExprType::ScalarFunc); - for (const auto & child_ast : func->arguments->children) - { - tipb::Expr * child = expr->add_children(); - astToPB(input, child_ast, child, collator_id, context); - } - foldConstant(expr, collator_id, context); - } - else if (ASTLiteral * lit = typeid_cast(ast.get())) - { - literalToPB(expr, lit->value, collator_id); - } - else - { - throw Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); - } -} - -void collectUsedColumnsFromExpr(const DAGSchema & input, ASTPtr ast, std::unordered_set & used_columns) -{ - if (ASTIdentifier * id = typeid_cast(ast.get())) - { - auto column_name = splitQualifiedName(id->getColumnName()); - if (!column_name.first.empty()) - used_columns.emplace(id->getColumnName()); - else - { - bool found = false; - for (const auto & field : input) - { - auto field_name = splitQualifiedName(field.first); - if (field_name.second == column_name.second) - { - if (found) - throw Exception("ambiguous column for " + column_name.second); - found = true; - used_columns.emplace(field.first); - } - } - } - } - else if (ASTFunction * func = typeid_cast(ast.get())) - { - if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - { - used_columns.emplace(func->getColumnName()); - } - else - { - /// check function - auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { - auto column_name = splitQualifiedName(func->getColumnName()); - auto field_name = splitQualifiedName(field.first); - if (column_name.first.empty()) - return field_name.second == column_name.second; - else - return field_name.first == column_name.first && field_name.second == column_name.second; - }); - if (ft != input.end()) - { - used_columns.emplace(func->getColumnName()); - return; - } - for (const auto & child_ast : func->arguments->children) - { - collectUsedColumnsFromExpr(input, child_ast, used_columns); - } - } - } -} - -struct MPPCtx -{ - Timestamp start_ts; - Int64 next_task_id; - std::vector sender_target_task_ids; - explicit MPPCtx(Timestamp start_ts_) - : start_ts(start_ts_) - , next_task_id(1) - {} -}; - -using MPPCtxPtr = std::shared_ptr; - -struct MPPInfo -{ - Timestamp start_ts; - Int64 partition_id; - Int64 task_id; - const std::vector sender_target_task_ids; - const std::unordered_map> receiver_source_task_ids_map; - - MPPInfo( - Timestamp start_ts_, - Int64 partition_id_, - Int64 task_id_, - const std::vector & sender_target_task_ids_, - const std::unordered_map> & receiver_source_task_ids_map_) - : start_ts(start_ts_) - , partition_id(partition_id_) - , task_id(task_id_) - , sender_target_task_ids(sender_target_task_ids_) - , receiver_source_task_ids_map(receiver_source_task_ids_map_) - {} -}; - -struct TaskMeta -{ - UInt64 start_ts = 0; - Int64 task_id = 0; - Int64 partition_id = 0; -}; - -using TaskMetas = std::vector; - -namespace mock -{ -struct ExchangeSender; -struct ExchangeReceiver; -struct Executor -{ - size_t index; - String name; - DAGSchema output_schema; - std::vector> children; - virtual void columnPrune(std::unordered_set & used_columns) = 0; - Executor(size_t & index_, String && name_, const DAGSchema & output_schema_) - : index(index_) - , name(std::move(name_)) - , output_schema(output_schema_) - { - index_++; - } - virtual bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) - = 0; - virtual void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) - { - children[0]->toMPPSubPlan(executor_index, properties, exchange_map); - } - virtual ~Executor() = default; -}; - -struct ExchangeSender : Executor -{ - tipb::ExchangeType type; - TaskMetas task_metas; - std::vector partition_keys; - ExchangeSender(size_t & index, const DAGSchema & output, tipb::ExchangeType type_, const std::vector & partition_keys_ = {}) - : Executor(index, "exchange_sender_" + std::to_string(index), output) - , type(type_) - , partition_keys(partition_keys_) - {} - void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override - { - tipb_executor->set_tp(tipb::ExecType::TypeExchangeSender); - tipb_executor->set_executor_id(name); - tipb::ExchangeSender * exchange_sender = tipb_executor->mutable_exchange_sender(); - exchange_sender->set_tp(type); - for (auto i : partition_keys) - { - auto * expr = exchange_sender->add_partition_keys(); - expr->set_tp(tipb::ColumnRef); - WriteBufferFromOwnString ss; - encodeDAGInt64(i, ss); - expr->set_val(ss.releaseStr()); - auto tipb_type = TiDB::columnInfoToFieldType(output_schema[i].second); - *expr->mutable_field_type() = tipb_type; - tipb_type.set_collate(collator_id); - *exchange_sender->add_types() = tipb_type; - } - for (auto task_id : mpp_info.sender_target_task_ids) - { - mpp::TaskMeta meta; - meta.set_start_ts(mpp_info.start_ts); - meta.set_task_id(task_id); - meta.set_partition_id(mpp_info.partition_id); - meta.set_address(LOCAL_HOST); - auto * meta_string = exchange_sender->add_encoded_task_meta(); - meta.AppendToString(meta_string); - } - auto * child_executor = exchange_sender->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); - } -}; - -struct ExchangeReceiver : Executor -{ - TaskMetas task_metas; - ExchangeReceiver(size_t & index, const DAGSchema & output) - : Executor(index, "exchange_receiver_" + std::to_string(index), output) - {} - void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context &) override - { - tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); - tipb_executor->set_executor_id(name); - tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); - for (auto & field : output_schema) - { - auto tipb_type = TiDB::columnInfoToFieldType(field.second); - tipb_type.set_collate(collator_id); - - auto * field_type = exchange_receiver->add_field_types(); - *field_type = tipb_type; - } - auto it = mpp_info.receiver_source_task_ids_map.find(name); - if (it == mpp_info.receiver_source_task_ids_map.end()) - throw Exception("Can not found mpp receiver info"); - for (size_t i = 0; i < it->second.size(); i++) - { - mpp::TaskMeta meta; - meta.set_start_ts(mpp_info.start_ts); - meta.set_task_id(it->second[i]); - meta.set_partition_id(i); - meta.set_address(LOCAL_HOST); - auto * meta_string = exchange_receiver->add_encoded_task_meta(); - meta.AppendToString(meta_string); - } - return true; - } -}; - -struct TableScan : public Executor -{ - TableInfo table_info; - /// used by column pruner - TableScan(size_t & index_, const DAGSchema & output_schema_, TableInfo & table_info_) - : Executor(index_, "table_scan_" + std::to_string(index_), output_schema_) - , table_info(table_info_) - {} - void columnPrune(std::unordered_set & used_columns) override - { - output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), - output_schema.end()); - } - - void setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const - { - auto column_name = splitQualifiedName(dag_column_info.first).second; - if (column_name == MutableSupport::tidb_pk_column_name) - ci->set_column_id(-1); - else - ci->set_column_id(table_info.getColumnID(column_name)); - ci->set_tp(dag_column_info.second.tp); - ci->set_flag(dag_column_info.second.flag); - ci->set_columnlen(dag_column_info.second.flen); - ci->set_decimal(dag_column_info.second.decimal); - if (!dag_column_info.second.elems.empty()) - { - for (const auto & pair : dag_column_info.second.elems) - { - ci->add_elems(pair.first); - } - } - } - - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t, const MPPInfo &, const Context &) override - { - if (table_info.is_partition_table) - { - tipb_executor->set_tp(tipb::ExecType::TypePartitionTableScan); - tipb_executor->set_executor_id(name); - auto * partition_ts = tipb_executor->mutable_partition_table_scan(); - partition_ts->set_table_id(table_info.id); - for (const auto & info : output_schema) - setTipbColumnInfo(partition_ts->add_columns(), info); - for (const auto & partition : table_info.partition.definitions) - partition_ts->add_partition_ids(partition.id); - } - else - { - tipb_executor->set_tp(tipb::ExecType::TypeTableScan); - tipb_executor->set_executor_id(name); - auto * ts = tipb_executor->mutable_tbl_scan(); - ts->set_table_id(table_info.id); - for (const auto & info : output_schema) - setTipbColumnInfo(ts->add_columns(), info); - } - return true; - } - void toMPPSubPlan(size_t &, const DAGProperties &, std::unordered_map, std::shared_ptr>> &) override - {} -}; - -struct Selection : public Executor -{ - std::vector conditions; - Selection(size_t & index_, const DAGSchema & output_schema_, std::vector && conditions_) - : Executor(index_, "selection_" + std::to_string(index_), output_schema_) - , conditions(std::move(conditions_)) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override - { - tipb_executor->set_tp(tipb::ExecType::TypeSelection); - tipb_executor->set_executor_id(name); - auto * sel = tipb_executor->mutable_selection(); - for (auto & expr : conditions) - { - tipb::Expr * cond = sel->add_conditions(); - astToPB(children[0]->output_schema, expr, cond, collator_id, context); - } - auto * child_executor = sel->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); - } - void columnPrune(std::unordered_set & used_columns) override - { - for (auto & expr : conditions) - collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); - children[0]->columnPrune(used_columns); - /// update output schema after column prune - output_schema = children[0]->output_schema; - } -}; - -struct TopN : public Executor -{ - std::vector order_columns; - size_t limit; - TopN(size_t & index_, const DAGSchema & output_schema_, std::vector && order_columns_, size_t limit_) - : Executor(index_, "topn_" + std::to_string(index_), output_schema_) - , order_columns(std::move(order_columns_)) - , limit(limit_) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override - { - tipb_executor->set_tp(tipb::ExecType::TypeTopN); - tipb_executor->set_executor_id(name); - tipb::TopN * topn = tipb_executor->mutable_topn(); - for (const auto & child : order_columns) - { - ASTOrderByElement * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - tipb::ByItem * by = topn->add_order_by(); - by->set_desc(elem->direction < 0); - tipb::Expr * expr = by->mutable_expr(); - astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); - } - topn->set_limit(limit); - auto * child_executor = topn->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); - } - void columnPrune(std::unordered_set & used_columns) override - { - for (auto & expr : order_columns) - collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); - children[0]->columnPrune(used_columns); - /// update output schema after column prune - output_schema = children[0]->output_schema; - } -}; - -struct Limit : public Executor -{ - size_t limit; - Limit(size_t & index_, const DAGSchema & output_schema_, size_t limit_) - : Executor(index_, "limit_" + std::to_string(index_), output_schema_) - , limit(limit_) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override - { - tipb_executor->set_tp(tipb::ExecType::TypeLimit); - tipb_executor->set_executor_id(name); - tipb::Limit * lt = tipb_executor->mutable_limit(); - lt->set_limit(limit); - auto * child_executor = lt->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); - } - void columnPrune(std::unordered_set & used_columns) override - { - children[0]->columnPrune(used_columns); - /// update output schema after column prune - output_schema = children[0]->output_schema; - } -}; - -struct Aggregation : public Executor -{ - bool has_uniq_raw_res; - bool need_append_project; - std::vector agg_exprs; - std::vector gby_exprs; - bool is_final_mode; - DAGSchema output_schema_for_partial_agg; - Aggregation(size_t & index_, const DAGSchema & output_schema_, bool has_uniq_raw_res_, bool need_append_project_, std::vector && agg_exprs_, std::vector && gby_exprs_, bool is_final_mode_) - : Executor(index_, "aggregation_" + std::to_string(index_), output_schema_) - , has_uniq_raw_res(has_uniq_raw_res_) - , need_append_project(need_append_project_) - , agg_exprs(std::move(agg_exprs_)) - , gby_exprs(std::move(gby_exprs_)) - , is_final_mode(is_final_mode_) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override - { - tipb_executor->set_tp(tipb::ExecType::TypeAggregation); - tipb_executor->set_executor_id(name); - auto * agg = tipb_executor->mutable_aggregation(); - auto & input_schema = children[0]->output_schema; - for (const auto & expr : agg_exprs) - { - const ASTFunction * func = typeid_cast(expr.get()); - if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); - - tipb::Expr * agg_func = agg->add_agg_func(); - - for (const auto & arg : func->arguments->children) - { - tipb::Expr * arg_expr = agg_func->add_children(); - astToPB(input_schema, arg, arg_expr, collator_id, context); - } - auto agg_sig_it = agg_func_name_to_sig.find(func->name); - if (agg_sig_it == agg_func_name_to_sig.end()) - throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); - auto agg_sig = agg_sig_it->second; - agg_func->set_tp(agg_sig); - - if (agg_sig == tipb::ExprType::Count || agg_sig == tipb::ExprType::Sum) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); - } - else if (agg_sig == tipb::ExprType::Min || agg_sig == tipb::ExprType::Max || agg_sig == tipb::ExprType::First) - { - if (agg_func->children_size() != 1) - throw Exception("udaf " + func->name + " only accept 1 argument"); - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(agg_func->children(0).field_type().tp()); - ft->set_decimal(agg_func->children(0).field_type().decimal()); - ft->set_flag(agg_func->children(0).field_type().flag() & (~TiDB::ColumnFlagNotNull)); - ft->set_collate(collator_id); - } - else if (agg_sig == tipb::ExprType::ApproxCountDistinct) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - ft->set_flag(1); - } - else if (agg_sig == tipb::ExprType::GroupConcat) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - } - if (is_final_mode) - agg_func->set_aggfuncmode(tipb::AggFunctionMode::FinalMode); - else - agg_func->set_aggfuncmode(tipb::AggFunctionMode::Partial1Mode); - } - - for (const auto & child : gby_exprs) - { - tipb::Expr * gby = agg->add_group_by(); - astToPB(input_schema, child, gby, collator_id, context); - } - - auto * child_executor = agg->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); - } - void columnPrune(std::unordered_set & used_columns) override - { - /// output schema for partial agg is the original agg's output schema - output_schema_for_partial_agg = output_schema; - output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), - output_schema.end()); - std::unordered_set used_input_columns; - for (auto & func : agg_exprs) - { - if (used_columns.find(func->getColumnName()) != used_columns.end()) - { - const ASTFunction * agg_func = typeid_cast(func.get()); - if (agg_func != nullptr) - { - /// agg_func should not be nullptr, just double check - for (auto & child : agg_func->arguments->children) - collectUsedColumnsFromExpr(children[0]->output_schema, child, used_input_columns); - } - } - } - for (auto & gby_expr : gby_exprs) - { - collectUsedColumnsFromExpr(children[0]->output_schema, gby_expr, used_input_columns); - } - children[0]->columnPrune(used_input_columns); - } - void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override - { - if (!is_final_mode) - { - children[0]->toMPPSubPlan(executor_index, properties, exchange_map); - return; - } - /// for aggregation, change aggregation to partial_aggregation => exchange_sender => exchange_receiver => final_aggregation - // todo support avg - if (has_uniq_raw_res) - throw Exception("uniq raw res not supported in mpp query"); - std::shared_ptr partial_agg = std::make_shared( - executor_index, - output_schema_for_partial_agg, - has_uniq_raw_res, - false, - std::move(agg_exprs), - std::move(gby_exprs), - false); - partial_agg->children.push_back(children[0]); - std::vector partition_keys; - size_t agg_func_num = partial_agg->agg_exprs.size(); - for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) - { - partition_keys.push_back(i + agg_func_num); - } - std::shared_ptr exchange_sender - = std::make_shared(executor_index, output_schema_for_partial_agg, partition_keys.empty() ? tipb::PassThrough : tipb::Hash, partition_keys); - exchange_sender->children.push_back(partial_agg); - - std::shared_ptr exchange_receiver - = std::make_shared(executor_index, output_schema_for_partial_agg); - exchange_map[exchange_receiver->name] = std::make_pair(exchange_receiver, exchange_sender); - /// re-construct agg_exprs and gby_exprs in final_agg - for (size_t i = 0; i < partial_agg->agg_exprs.size(); i++) - { - const ASTFunction * agg_func = typeid_cast(partial_agg->agg_exprs[i].get()); - ASTPtr update_agg_expr = agg_func->clone(); - auto * update_agg_func = typeid_cast(update_agg_expr.get()); - if (agg_func->name == "count") - update_agg_func->name = "sum"; - update_agg_func->arguments->children.clear(); - update_agg_func->arguments->children.push_back(std::make_shared(output_schema_for_partial_agg[i].first)); - agg_exprs.push_back(update_agg_expr); - } - for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) - { - gby_exprs.push_back(std::make_shared(output_schema_for_partial_agg[agg_func_num + i].first)); - } - children[0] = exchange_receiver; - } -}; - -struct Project : public Executor -{ - std::vector exprs; - Project(size_t & index_, const DAGSchema & output_schema_, std::vector && exprs_) - : Executor(index_, "project_" + std::to_string(index_), output_schema_) - , exprs(std::move(exprs_)) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override - { - tipb_executor->set_tp(tipb::ExecType::TypeProjection); - tipb_executor->set_executor_id(name); - auto * proj = tipb_executor->mutable_projection(); - auto & input_schema = children[0]->output_schema; - for (const auto & child : exprs) - { - if (typeid_cast(child.get())) - { - /// special case, select * - for (size_t i = 0; i < input_schema.size(); i++) - { - tipb::Expr * expr = proj->add_exprs(); - expr->set_tp(tipb::ColumnRef); - *(expr->mutable_field_type()) = columnInfoToFieldType(input_schema[i].second); - WriteBufferFromOwnString ss; - encodeDAGInt64(i, ss); - expr->set_val(ss.releaseStr()); - } - continue; - } - tipb::Expr * expr = proj->add_exprs(); - astToPB(input_schema, child, expr, collator_id, context); - } - auto * children_executor = proj->mutable_child(); - return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); - } - void columnPrune(std::unordered_set & used_columns) override - { - output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), - output_schema.end()); - std::unordered_set used_input_columns; - for (auto & expr : exprs) - { - if (typeid_cast(expr.get())) - { - /// for select *, just add all its input columns, maybe - /// can do some optimization, but it is not worth for mock - /// tests - for (auto & field : children[0]->output_schema) - { - used_input_columns.emplace(field.first); - } - break; - } - if (used_columns.find(expr->getColumnName()) != used_columns.end()) - { - collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_input_columns); - } - } - children[0]->columnPrune(used_input_columns); - } -}; - -struct Join : Executor -{ - ASTPtr params; - const ASTTableJoin & join_params; - Join(size_t & index_, const DAGSchema & output_schema_, ASTPtr params_) - : Executor(index_, "Join_" + std::to_string(index_), output_schema_) - , params(params_) - , join_params(static_cast(*params)) - { - if (join_params.using_expression_list == nullptr) - throw Exception("No join condition found."); - if (join_params.strictness != ASTTableJoin::Strictness::All) - throw Exception("Only support join with strictness ALL"); - } - - void columnPrune(std::unordered_set & used_columns) override - { - std::unordered_set left_columns; - std::unordered_set right_columns; - for (auto & field : children[0]->output_schema) - left_columns.emplace(field.first); - for (auto & field : children[1]->output_schema) - right_columns.emplace(field.first); - - std::unordered_set left_used_columns; - std::unordered_set right_used_columns; - for (const auto & s : used_columns) - { - if (left_columns.find(s) != left_columns.end()) - left_used_columns.emplace(s); - else - right_used_columns.emplace(s); - } - for (const auto & child : join_params.using_expression_list->children) - { - if (auto * identifier = typeid_cast(child.get())) - { - auto col_name = identifier->getColumnName(); - for (auto & field : children[0]->output_schema) - { - if (col_name == splitQualifiedName(field.first).second) - { - left_used_columns.emplace(field.first); - break; - } - } - for (auto & field : children[1]->output_schema) - { - if (col_name == splitQualifiedName(field.first).second) - { - right_used_columns.emplace(field.first); - break; - } - } - } - else - { - throw Exception("Only support Join on columns"); - } - } - children[0]->columnPrune(left_used_columns); - children[1]->columnPrune(right_used_columns); - output_schema.clear(); - /// update output schema - for (auto & field : children[0]->output_schema) - { - if (join_params.kind == ASTTableJoin::Kind::Right && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); - else - output_schema.push_back(field); - } - for (auto & field : children[1]->output_schema) - { - if (join_params.kind == ASTTableJoin::Kind::Left && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); - else - output_schema.push_back(field); - } - } - - static void fillJoinKeyAndFieldType( - ASTPtr key, - const DAGSchema & schema, - tipb::Expr * tipb_key, - tipb::FieldType * tipb_field_type, - uint32_t collator_id) - { - auto * identifier = typeid_cast(key.get()); - for (size_t index = 0; index < schema.size(); index++) - { - const auto & field = schema[index]; - if (splitQualifiedName(field.first).second == identifier->getColumnName()) - { - auto tipb_type = TiDB::columnInfoToFieldType(field.second); - tipb_type.set_collate(collator_id); - - tipb_key->set_tp(tipb::ColumnRef); - WriteBufferFromOwnString ss; - encodeDAGInt64(index, ss); - tipb_key->set_val(ss.releaseStr()); - *tipb_key->mutable_field_type() = tipb_type; - - *tipb_field_type = tipb_type; - break; - } - } - } - bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override - { - tipb_executor->set_tp(tipb::ExecType::TypeJoin); - tipb_executor->set_executor_id(name); - tipb::Join * join = tipb_executor->mutable_join(); - switch (join_params.kind) - { - case ASTTableJoin::Kind::Inner: - join->set_join_type(tipb::JoinType::TypeInnerJoin); - break; - case ASTTableJoin::Kind::Left: - join->set_join_type(tipb::JoinType::TypeLeftOuterJoin); - break; - case ASTTableJoin::Kind::Right: - join->set_join_type(tipb::JoinType::TypeRightOuterJoin); - break; - default: - throw Exception("Unsupported join type"); - } - join->set_join_exec_type(tipb::JoinExecType::TypeHashJoin); - join->set_inner_idx(1); - for (auto & key : join_params.using_expression_list->children) - { - fillJoinKeyAndFieldType(key, children[0]->output_schema, join->add_left_join_keys(), join->add_probe_types(), collator_id); - fillJoinKeyAndFieldType(key, children[1]->output_schema, join->add_right_join_keys(), join->add_build_types(), collator_id); - } - auto * left_child_executor = join->add_children(); - children[0]->toTiPBExecutor(left_child_executor, collator_id, mpp_info, context); - auto * right_child_executor = join->add_children(); - return children[1]->toTiPBExecutor(right_child_executor, collator_id, mpp_info, context); - } - void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override - { - if (properties.use_broadcast_join) - { - /// for broadcast join, always use right side as the broadcast side - std::shared_ptr right_exchange_sender - = std::make_shared(executor_index, children[1]->output_schema, tipb::Broadcast); - right_exchange_sender->children.push_back(children[1]); - - std::shared_ptr right_exchange_receiver - = std::make_shared(executor_index, children[1]->output_schema); - children[1] = right_exchange_receiver; - exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); - return; - } - std::vector left_partition_keys; - std::vector right_partition_keys; - for (auto & key : join_params.using_expression_list->children) - { - size_t index = 0; - for (; index < children[0]->output_schema.size(); index++) - { - if (splitQualifiedName(children[0]->output_schema[index].first).second == key->getColumnName()) - { - left_partition_keys.push_back(index); - break; - } - } - index = 0; - for (; index < children[1]->output_schema.size(); index++) - { - if (splitQualifiedName(children[1]->output_schema[index].first).second == key->getColumnName()) - { - right_partition_keys.push_back(index); - break; - } - } - } - std::shared_ptr left_exchange_sender - = std::make_shared(executor_index, children[0]->output_schema, tipb::Hash, left_partition_keys); - left_exchange_sender->children.push_back(children[0]); - std::shared_ptr right_exchange_sender - = std::make_shared(executor_index, children[1]->output_schema, tipb::Hash, right_partition_keys); - right_exchange_sender->children.push_back(children[1]); - - std::shared_ptr left_exchange_receiver - = std::make_shared(executor_index, children[0]->output_schema); - std::shared_ptr right_exchange_receiver - = std::make_shared(executor_index, children[1]->output_schema); - children[0] = left_exchange_receiver; - children[1] = right_exchange_receiver; - - exchange_map[left_exchange_receiver->name] = std::make_pair(left_exchange_receiver, left_exchange_sender); - exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); - } -}; -} // namespace mock - -using ExecutorPtr = std::shared_ptr; - -TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) -{ - TiDB::ColumnInfo ci; - if (ASTIdentifier * id = typeid_cast(ast.get())) - { - /// check column - auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { - auto column_name = splitQualifiedName(id->getColumnName()); - auto field_name = splitQualifiedName(field.first); - if (column_name.first.empty()) - return field_name.second == column_name.second; - else - return field_name.first == column_name.first && field_name.second == column_name.second; - }); - if (ft == input.end()) - throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - ci = ft->second; - } - else if (ASTFunction * func = typeid_cast(ast.get())) - { - /// check function - String func_name_lowercase = Poco::toLower(func->name); - const auto it_sig = func_name_to_sig.find(func_name_lowercase); - if (it_sig == func_name_to_sig.end()) - { - throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); - } - switch (it_sig->second) - { - case tipb::ScalarFuncSig::InInt: - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned; - for (const auto & child_ast : func->arguments->children) - { - auto * tuple_func = typeid_cast(child_ast.get()); - if (tuple_func != nullptr && tuple_func->name == "tuple") - { - // flatten tuple elements - for (const auto & c : tuple_func->arguments->children) - { - compileExpr(input, c); - } - } - else - { - compileExpr(input, child_ast); - } - } - return ci; - case tipb::ScalarFuncSig::IfInt: - case tipb::ScalarFuncSig::BitAndSig: - case tipb::ScalarFuncSig::BitOrSig: - case tipb::ScalarFuncSig::BitXorSig: - case tipb::ScalarFuncSig::BitNegSig: - for (size_t i = 0; i < func->arguments->children.size(); i++) - { - const auto & child_ast = func->arguments->children[i]; - auto child_ci = compileExpr(input, child_ast); - // todo should infer the return type based on all input types - if ((it_sig->second == tipb::ScalarFuncSig::IfInt && i == 1) - || (it_sig->second != tipb::ScalarFuncSig::IfInt && i == 0)) - ci = child_ci; - } - return ci; - case tipb::ScalarFuncSig::LikeSig: - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned; - for (const auto & child_ast : func->arguments->children) - { - compileExpr(input, child_ast); - } - return ci; - case tipb::ScalarFuncSig::FromUnixTime2Arg: - if (func->arguments->children.size() == 1) - { - ci.tp = TiDB::TypeDatetime; - ci.decimal = 6; - } - else - { - ci.tp = TiDB::TypeString; - } - break; - case tipb::ScalarFuncSig::DateFormatSig: - ci.tp = TiDB::TypeString; - break; - case tipb::ScalarFuncSig::CastIntAsTime: - case tipb::ScalarFuncSig::CastRealAsTime: - case tipb::ScalarFuncSig::CastTimeAsTime: - case tipb::ScalarFuncSig::CastDecimalAsTime: - case tipb::ScalarFuncSig::CastStringAsTime: - if (it_sig->first.find("datetime")) - { - ci.tp = TiDB::TypeDatetime; - } - else - { - ci.tp = TiDB::TypeDate; - } - break; - case tipb::ScalarFuncSig::CastIntAsReal: - case tipb::ScalarFuncSig::CastRealAsReal: - { - ci.tp = TiDB::TypeDouble; - break; - } - case tipb::ScalarFuncSig::RoundInt: - case tipb::ScalarFuncSig::RoundWithFracInt: - { - ci.tp = TiDB::TypeLongLong; - if (it_sig->first.find("uint") != std::string::npos) - ci.flag = TiDB::ColumnFlagUnsigned; - break; - } - case tipb::ScalarFuncSig::RoundDec: - case tipb::ScalarFuncSig::RoundWithFracDec: - { - ci.tp = TiDB::TypeNewDecimal; - break; - } - case tipb::ScalarFuncSig::RoundReal: - case tipb::ScalarFuncSig::RoundWithFracReal: - { - ci.tp = TiDB::TypeDouble; - break; - } - default: - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned; - break; - } - for (const auto & child_ast : func->arguments->children) - { - compileExpr(input, child_ast); - } - } - else if (ASTLiteral * lit = typeid_cast(ast.get())) - { - switch (lit->value.getType()) - { - case Field::Types::Which::Null: - ci.tp = TiDB::TypeNull; - // Null literal expr doesn't need value. - break; - case Field::Types::Which::UInt64: - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned; - break; - case Field::Types::Which::Int64: - ci.tp = TiDB::TypeLongLong; - break; - case Field::Types::Which::Float64: - ci.tp = TiDB::TypeDouble; - break; - case Field::Types::Which::Decimal32: - case Field::Types::Which::Decimal64: - case Field::Types::Which::Decimal128: - case Field::Types::Which::Decimal256: - ci.tp = TiDB::TypeNewDecimal; - break; - case Field::Types::Which::String: - ci.tp = TiDB::TypeString; - break; - default: - throw Exception(String("Unsupported literal type: ") + lit->value.getTypeName(), ErrorCodes::LOGICAL_ERROR); - } - } - else - { - /// not supported unless this is a literal - throw Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); - } - return ci; -} - -void compileFilter(const DAGSchema & input, ASTPtr ast, std::vector & conditions) -{ - if (auto * func = typeid_cast(ast.get())) - { - if (func->name == "and") - { - for (auto & child : func->arguments->children) - { - compileFilter(input, child, conditions); - } - return; - } - } - conditions.push_back(ast); - compileExpr(input, ast); -} - -ExecutorPtr compileTableScan(size_t & executor_index, TableInfo & table_info, String & table_alias, bool append_pk_column) -{ - DAGSchema ts_output; - for (const auto & column_info : table_info.columns) - { - ColumnInfo ci; - ci.tp = column_info.tp; - ci.flag = column_info.flag; - ci.flen = column_info.flen; - ci.decimal = column_info.decimal; - ci.elems = column_info.elems; - ci.default_value = column_info.default_value; - ci.origin_default_value = column_info.origin_default_value; - /// use qualified name as the column name to handle multiple table queries, not very - /// efficient but functionally enough for mock test - ts_output.emplace_back(std::make_pair(table_alias + "." + column_info.name, std::move(ci))); - } - if (append_pk_column) - { - ColumnInfo ci; - ci.tp = TiDB::TypeLongLong; - ci.setPriKeyFlag(); - ci.setNotNullFlag(); - ts_output.emplace_back(std::make_pair(MutableSupport::tidb_pk_column_name, std::move(ci))); - } - return std::make_shared(executor_index, ts_output, table_info); -} - -ExecutorPtr compileSelection(ExecutorPtr input, size_t & executor_index, ASTPtr filter) -{ - std::vector conditions; - compileFilter(input->output_schema, filter, conditions); - auto selection = std::make_shared(executor_index, input->output_schema, std::move(conditions)); - selection->children.push_back(input); - return selection; -} - -ExecutorPtr compileTopN(ExecutorPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr) -{ - std::vector order_columns; - for (const auto & child : order_exprs->children) - { - ASTOrderByElement * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - order_columns.push_back(child); - compileExpr(input->output_schema, elem->children[0]); - } - auto limit = safeGet(typeid_cast(*limit_expr).value); - auto top_n = std::make_shared(executor_index, input->output_schema, std::move(order_columns), limit); - top_n->children.push_back(input); - return top_n; -} - -ExecutorPtr compileLimit(ExecutorPtr input, size_t & executor_index, ASTPtr limit_expr) -{ - auto limit_length = safeGet(typeid_cast(*limit_expr).value); - auto limit = std::make_shared(executor_index, input->output_schema, limit_length); - limit->children.push_back(input); - return limit; -} - -ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs) -{ - std::vector agg_exprs; - std::vector gby_exprs; - DAGSchema output_schema; - bool has_uniq_raw_res = false; - bool need_append_project = false; - if (agg_funcs != nullptr) - { - for (const auto & expr : agg_funcs->children) - { - const ASTFunction * func = typeid_cast(expr.get()); - if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - { - need_append_project = true; - continue; - } - - agg_exprs.push_back(expr); - std::vector children_ci; - - for (const auto & arg : func->arguments->children) - { - children_ci.push_back(compileExpr(input->output_schema, arg)); - } - - TiDB::ColumnInfo ci; - if (func->name == "count") - { - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull; - } - else if (func->name == "max" || func->name == "min" || func->name == "first_row") - { - ci = children_ci[0]; - ci.flag &= ~TiDB::ColumnFlagNotNull; - } - else if (func->name == uniq_raw_res_name) - { - has_uniq_raw_res = true; - ci.tp = TiDB::TypeString; - ci.flag = 1; - } - // TODO: Other agg func. - else - { - throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); - } - - output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); - } - } - - if (group_by_exprs != nullptr) - { - for (const auto & child : group_by_exprs->children) - { - gby_exprs.push_back(child); - auto ci = compileExpr(input->output_schema, child); - output_schema.emplace_back(std::make_pair(child->getColumnName(), ci)); - } - } - - auto aggregation = std::make_shared( - executor_index, - output_schema, - has_uniq_raw_res, - need_append_project, - std::move(agg_exprs), - std::move(gby_exprs), - true); - aggregation->children.push_back(input); - return aggregation; -} - -ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr select_list) -{ - std::vector exprs; - DAGSchema output_schema; - for (const auto & expr : select_list->children) - { - if (typeid_cast(expr.get())) - { - /// special case, select * - exprs.push_back(expr); - const auto & last_output = input->output_schema; - for (const auto & field : last_output) - { - // todo need to use the subquery alias to reconstruct the field - // name if subquery is supported - output_schema.emplace_back(field.first, field.second); - } - } - else - { - exprs.push_back(expr); - auto ft = std::find_if(input->output_schema.begin(), input->output_schema.end(), [&](const auto & field) { return field.first == expr->getColumnName(); }); - if (ft != input->output_schema.end()) - { - output_schema.emplace_back(ft->first, ft->second); - continue; - } - const ASTFunction * func = typeid_cast(expr.get()); - if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - { - throw Exception("No such agg " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - } - else - { - auto ci = compileExpr(input->output_schema, expr); - // todo need to use the subquery alias to reconstruct the field - // name if subquery is supported - output_schema.emplace_back(std::make_pair(expr->getColumnName(), ci)); - } - } - } - - auto project = std::make_shared(executor_index, output_schema, std::move(exprs)); - project->children.push_back(input); - return project; -} - -ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params) -{ - DAGSchema output_schema; - const auto & join_params = (static_cast(*params)); - for (auto & field : left->output_schema) - { - if (join_params.kind == ASTTableJoin::Kind::Right && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); - else - output_schema.push_back(field); - } - for (auto & field : right->output_schema) - { - if (join_params.kind == ASTTableJoin::Kind::Left && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); - else - output_schema.push_back(field); - } - auto join = std::make_shared(executor_index, output_schema, params); - join->children.push_back(left); - join->children.push_back(right); - return join; -} - struct QueryFragment { ExecutorPtr root_executor; diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index 3c3791aee48..8bab10df118 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -37,19 +38,6 @@ void dbgFuncTiDBQueryFromNaturalDag(Context & context, const ASTs & args, DBGInv // ./storages-client.sh "DBGInvoke mock_dag(query, region_id[, start_ts])" BlockInputStreamPtr dbgFuncMockTiDBQuery(Context & context, const ASTs & args); -struct DAGProperties -{ - String encode_type; - Int64 tz_offset = 0; - String tz_name; - Int32 collator = 0; - bool is_mpp_query = false; - bool use_broadcast_join = false; - Int32 mpp_partition_num = 1; - Timestamp start_ts = DEFAULT_MAX_READ_TSO; - Int32 mpp_timeout = 10; -}; - DAGProperties getDAGProperties(const String & prop_string); enum QueryTaskType From 3c99a6a5d7223531512cd3f23103547c3877cc2b Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Tue, 12 Apr 2022 19:30:36 +0800 Subject: [PATCH 11/79] Update client c (#4625) close pingcap/tiflash#4624 --- contrib/client-c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/client-c b/contrib/client-c index bd2ea655141..4e50596db3c 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit bd2ea65514109b8f17bd522c5d836656c2a1c6cb +Subproject commit 4e50596db3c878f5bf8de86fe32638f09bf2c117 From 26d4f4a38ab296fb38bdb162a07912226c59402c Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Tue, 12 Apr 2022 20:04:36 +0800 Subject: [PATCH 12/79] Fix can not find right path when blobstore restore (#4608) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/BlobStore.cpp | 200 +++++++----------- dbms/src/Storages/Page/V3/BlobStore.h | 20 +- dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 2 + .../Page/V3/tests/gtest_blob_store.cpp | 21 +- .../Page/V3/tests/gtest_page_directory.cpp | 13 +- 5 files changed, 115 insertions(+), 141 deletions(-) diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 18f5b017e1b..24c2d0c0df9 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -50,12 +50,6 @@ namespace PS::V3 { static constexpr bool BLOBSTORE_CHECKSUM_ON_READ = true; -#ifndef NDEBUG -static constexpr bool CHECK_STATS_ALL_IN_DISK = true; -#else -static constexpr bool CHECK_STATS_ALL_IN_DISK = false; -#endif - using BlobStat = BlobStore::BlobStats::BlobStat; using BlobStatPtr = BlobStore::BlobStats::BlobStatPtr; using ChecksumClass = Digest::CRC64; @@ -74,6 +68,37 @@ BlobStore::BlobStore(String storage_name, const FileProviderPtr & file_provider_ { } +void BlobStore::registerPaths() +{ + for (const auto & path : delegator->listPaths()) + { + Poco::File store_path(path); + if (!store_path.exists()) + { + continue; + } + + std::vector file_list; + store_path.list(file_list); + + for (const auto & blob_name : file_list) + { + const auto & [blob_id, err_msg] = BlobStats::getBlobIdFromName(blob_name); + auto lock_stats = blob_stats.lock(); + if (blob_id != INVALID_BLOBFILE_ID) + { + Poco::File blob(fmt::format("{}/{}", path, blob_name)); + delegator->addPageFileUsedSize({blob_id, 0}, blob.getSize(), path, true); + blob_stats.createStatNotChecking(blob_id, lock_stats); + } + else + { + LOG_FMT_INFO(log, "Ignore not blob file [dir={}] [file={}] [err_msg={}]", path, blob_name, err_msg); + } + } + } +} + PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & write_limiter) { ProfileEvents::increment(ProfileEvents::PSMWritePages, wb.putWriteCount()); @@ -244,7 +269,6 @@ void BlobStore::remove(const PageEntriesV3 & del_entries) for (const auto & blob_id : blob_updated) { const auto & stat = blob_stats.blobIdToStat(blob_id, - /*restore_if_not_exist*/ false, /*ignore_not_exist*/ true); // Some of blob may been removed. @@ -315,11 +339,16 @@ std::pair BlobStore::getPosFromStats(size_t size) void BlobStore::removePosFromStats(BlobFileId blob_id, BlobFileOffset offset, size_t size) { + bool need_remove_stat = false; const auto & stat = blob_stats.blobIdToStat(blob_id); - auto lock = stat->lock(); - stat->removePosFromStat(offset, size, lock); + { + auto lock = stat->lock(); + need_remove_stat = stat->removePosFromStat(offset, size, lock); + } - if (stat->isReadOnly() && stat->sm_valid_size == 0) + // We don't need hold the BlobStat lock(Also can't do that). + // Because once BlobStat become Read-Only type, Then valid size won't increase. + if (need_remove_stat) { LOG_FMT_INFO(log, "Removing BlobFile [blob_id={}]", blob_id); auto lock_stats = blob_stats.lock(); @@ -878,10 +907,42 @@ BlobStore::BlobStats::BlobStats(LoggerPtr log_, PSDiskDelegatorPtr delegator_, B void BlobStore::BlobStats::restoreByEntry(const PageEntryV3 & entry) { - auto stat = blobIdToStat(entry.file_id, /*restore_if_not_exist=*/true); + auto stat = blobIdToStat(entry.file_id); stat->restoreSpaceMap(entry.offset, entry.size); } +std::pair BlobStore::BlobStats::getBlobIdFromName(String blob_name) +{ + String err_msg; + if (!startsWith(blob_name, BlobFile::BLOB_PREFIX_NAME)) + { + return {INVALID_BLOBFILE_ID, err_msg}; + } + + Strings ss; + boost::split(ss, blob_name, boost::is_any_of("_")); + + if (ss.size() != 2) + { + return {INVALID_BLOBFILE_ID, err_msg}; + } + + try + { + const auto & blob_id = std::stoull(ss[1]); + return {blob_id, err_msg}; + } + catch (std::invalid_argument & e) + { + err_msg = e.what(); + } + catch (std::out_of_range & e) + { + err_msg = e.what(); + } + return {INVALID_BLOBFILE_ID, err_msg}; +} + std::set BlobStore::BlobStats::getBlobIdsFromDisk(String path) const { std::set blob_ids_on_disk; @@ -892,43 +953,20 @@ std::set BlobStore::BlobStats::getBlobIdsFromDisk(String path) const return blob_ids_on_disk; } - std::vector file_list; store_path.list(file_list); for (const auto & blob_name : file_list) { - if (!startsWith(blob_name, BlobFile::BLOB_PREFIX_NAME)) - { - LOG_FMT_INFO(log, "Ignore not blob file [dir={}] [file={}]", path, blob_name); - continue; - } - - Strings ss; - boost::split(ss, blob_name, boost::is_any_of("_")); - - if (ss.size() != 2) - { - LOG_FMT_INFO(log, "Ignore unrecognized blob file [dir={}] [file={}]", path, blob_name); - continue; - } - - String err_msg; - try + const auto & [blob_id, err_msg] = getBlobIdFromName(blob_name); + if (blob_id != INVALID_BLOBFILE_ID) { - const auto & blob_id = std::stoull(ss[1]); blob_ids_on_disk.insert(blob_id); - continue; // continue to handle next file - } - catch (std::invalid_argument & e) - { - err_msg = e.what(); } - catch (std::out_of_range & e) + else { - err_msg = e.what(); + LOG_FMT_INFO(log, "Ignore not blob file [dir={}] [file={}] [err_msg={}]", path, blob_name, err_msg); } - LOG_FMT_INFO(log, "Ignore unrecognized blob file [dir={}] [file={}] [err={}]", path, blob_name, err_msg); } return blob_ids_on_disk; @@ -940,85 +978,10 @@ void BlobStore::BlobStats::restore() for (auto & [path, stats] : stats_map) { - std::set blob_ids_in_stats; for (const auto & stat : stats) { stat->recalculateSpaceMap(); max_restored_file_id = std::max(stat->id, max_restored_file_id); - blob_ids_in_stats.insert(stat->id); - } - - // If a BlobFile on disk with a valid rate of 0 (but has not been deleted because of some reason), - // then it won't be restored to stats. But we should check and clean up if such files exist. - - std::set blob_ids_on_disk = getBlobIdsFromDisk(path); - - if (blob_ids_on_disk.size() < blob_ids_in_stats.size()) - { - FmtBuffer fmt_buf; - fmt_buf.fmtAppend( - "Some of Blob are missing in disk.[path={}] [stats ids: ", - path); - - fmt_buf.joinStr( - blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - [](const auto arg, FmtBuffer & fb) { - fb.fmtAppend("{}", arg); - }, - ", "); - - fmt_buf.append("]"); - - throw Exception(fmt_buf.toString(), - ErrorCodes::LOGICAL_ERROR); - } - - if constexpr (CHECK_STATS_ALL_IN_DISK) - { - std::vector blob_ids_on_disk_not_in_stats(blob_ids_in_stats.size()); - auto last_check_it = std::set_difference(blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - blob_ids_on_disk.begin(), - blob_ids_on_disk.end(), - blob_ids_on_disk_not_in_stats.begin()); - - if (last_check_it != blob_ids_on_disk_not_in_stats.begin()) - { - FmtBuffer fmt_buf; - fmt_buf.fmtAppend( - "Some of Blob are missing in disk.[path={}] [stats ids: ", - path); - - fmt_buf.joinStr( - blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - [](const auto arg, FmtBuffer & fb) { - fb.fmtAppend("{}", arg); - }, - ", "); - - fmt_buf.append("]"); - - throw Exception(fmt_buf.toString(), - ErrorCodes::LOGICAL_ERROR); - } - } - - std::vector invalid_blob_ids; - - std::set_difference(blob_ids_on_disk.begin(), - blob_ids_on_disk.end(), - blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - std::back_inserter(invalid_blob_ids)); - - for (const auto & invalid_blob_id : invalid_blob_ids) - { - const auto & invalid_blob_path = fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, invalid_blob_id); - LOG_FMT_INFO(log, "Remove invalid blob file [file={}]", invalid_blob_path); - Poco::File invalid_blob(invalid_blob_path); - invalid_blob.remove(); } } @@ -1175,7 +1138,7 @@ std::pair BlobStore::BlobStats::chooseStat(size_t buf_s return std::make_pair(stat_ptr, INVALID_BLOBFILE_ID); } -BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool restore_if_not_exist, bool ignore_not_exist) +BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool ignore_not_exist) { auto guard = lock(); for (const auto & [path, stats] : stats_map) @@ -1190,12 +1153,6 @@ BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool restore_ } } - if (restore_if_not_exist) - { - // Restore a stat without checking file_id exist or not and won't push forward the roll_id - return createStatNotChecking(file_id, guard); - } - if (!ignore_not_exist) { throw Exception(fmt::format("Can't find BlobStat with [blob_id={}]", @@ -1246,7 +1203,7 @@ BlobFileOffset BlobStore::BlobStats::BlobStat::getPosFromStat(size_t buf_size, c return offset; } -void BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &) +bool BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &) { if (!smap->markFree(offset, buf_size)) { @@ -1260,6 +1217,7 @@ void BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, si sm_valid_size -= buf_size; sm_valid_rate = sm_valid_size * 1.0 / sm_total_size; + return (isReadOnly() && sm_valid_size == 0); } void BlobStore::BlobStats::BlobStat::restoreSpaceMap(BlobFileOffset offset, size_t buf_size) diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index c91ba90177e..bd25542b23b 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -114,7 +114,7 @@ class BlobStore : private Allocator BlobFileOffset getPosFromStat(size_t buf_size, const std::lock_guard &); - void removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &); + bool removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &); /** * This method is only used when blobstore restore @@ -140,6 +140,16 @@ class BlobStore : private Allocator public: BlobStats(LoggerPtr log_, PSDiskDelegatorPtr delegator_, BlobStore::Config config); + // Don't require a lock from BlobStats When you already hold a BlobStat lock + // + // Safe options: + // 1. Hold a BlobStats lock, then Hold a/many BlobStat lock(s). + // 2. Without hold a BlobStats lock, But hold a/many BlobStat lock(s). + // 3. Hold a BlobStats lock, without hold a/many BlobStat lock(s). + // + // Not safe options: + // 1. then Hold a/many BlobStat lock(s), then a BlobStats lock. + // [[nodiscard]] std::lock_guard lock() const; BlobStatPtr createStatNotChecking(BlobFileId blob_file_id, const std::lock_guard &); @@ -166,7 +176,7 @@ class BlobStore : private Allocator */ std::pair chooseStat(size_t buf_size, const std::lock_guard &); - BlobStatPtr blobIdToStat(BlobFileId file_id, bool restore_if_not_exist = false, bool ignore_not_exist = false); + BlobStatPtr blobIdToStat(BlobFileId file_id, bool ignore_not_exist = false); std::map> getStats() const { @@ -174,13 +184,15 @@ class BlobStore : private Allocator return stats_map; } + std::set getBlobIdsFromDisk(String path) const; + + static std::pair getBlobIdFromName(String blob_name); #ifndef DBMS_PUBLIC_GTEST private: #endif void restoreByEntry(const PageEntryV3 & entry); void restore(); - std::set getBlobIdsFromDisk(String path) const; friend class PageDirectoryFactory; #ifndef DBMS_PUBLIC_GTEST @@ -199,6 +211,8 @@ class BlobStore : private Allocator BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, BlobStore::Config config); + void registerPaths(); + std::vector getGCStats(); PageEntriesEdit gc(std::map & entries_need_gc, diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index cefb20a4736..9ee0616c987 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -47,6 +47,8 @@ void PageStorageImpl::restore() { // TODO: clean up blobstore. // TODO: Speedup restoring + blob_store.registerPaths(); + PageDirectoryFactory factory; page_directory = factory .setBlobStore(blob_store) diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index 3db3b53dd2b..22c81cc76f3 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -70,10 +70,11 @@ try BlobFileId file_id1 = 10; BlobFileId file_id2 = 12; - const auto & path = getTemporaryPath(); - createIfNotExist(path); - Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); - Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); + { + const auto & lock = stats.lock(); + stats.createStatNotChecking(file_id1, lock); + stats.createStatNotChecking(file_id2, lock); + } { stats.restoreByEntry(PageEntryV3{ @@ -294,6 +295,7 @@ try createIfNotExist(path); Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); + blob_store.registerPaths(); { blob_store.blob_stats.restoreByEntry(PageEntryV3{ @@ -388,6 +390,7 @@ try }; auto restore_blobs = [](BlobStore & blob_store, std::vector blob_ids) { + blob_store.registerPaths(); for (const auto & id : blob_ids) { blob_store.blob_stats.restoreByEntry(PageEntryV3{ @@ -481,15 +484,7 @@ try ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); auto blob_store_check = BlobStore(getCurrentTestName(), file_provider, delegator, config); - restore_blobs(blob_store_check, {4}); - ASSERT_THROW(blob_store_check.blob_stats.restore(), DB::Exception); - // Won't remove blob if exception happened. - ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); - - auto blob_store_check2 = BlobStore(getCurrentTestName(), file_provider, delegator, config); - restore_blobs(blob_store_check2, {1, 2, 3, 4}); - ASSERT_THROW(blob_store_check2.blob_stats.restore(), DB::Exception); - ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + ASSERT_THROW(restore_blobs(blob_store_check, {4}), DB::Exception); } } CATCH diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 16c2140964b..ae149fbf69b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -1972,8 +1972,8 @@ try PageEntryV3 entry_5_v2{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x400, .checksum = 0x4567}; { PageEntriesEdit edit; - edit.put(file_id1, entry_1_v1); - edit.put(file_id2, entry_5_v1); + edit.put(1, entry_1_v1); + edit.put(5, entry_5_v1); dir->apply(std::move(edit)); } { @@ -1999,6 +1999,11 @@ try auto path = getTemporaryPath(); PSDiskDelegatorPtr delegator = std::make_shared(path); BlobStore::BlobStats stats(log, delegator, BlobStore::Config{}); + { + const auto & lock = stats.lock(); + stats.createStatNotChecking(file_id1, lock); + stats.createStatNotChecking(file_id2, lock); + } auto restored_dir = restore_from_edit(edit, stats); auto temp_snap = restored_dir->createSnapshot(); EXPECT_SAME_ENTRY(entry_1_v1, restored_dir->get(2, temp_snap).second); @@ -2006,9 +2011,9 @@ try EXPECT_SAME_ENTRY(entry_5_v2, restored_dir->get(5, temp_snap).second); // The entry_1_v1 should be restored to stats - auto stat_for_file_1 = stats.blobIdToStat(file_id1, false, false); + auto stat_for_file_1 = stats.blobIdToStat(file_id1, /*ignore_not_exist*/ false); EXPECT_TRUE(stat_for_file_1->smap->isMarkUsed(entry_1_v1.offset, entry_1_v1.size)); - auto stat_for_file_5 = stats.blobIdToStat(file_id2, false, false); + auto stat_for_file_5 = stats.blobIdToStat(file_id2, /*ignore_not_exist*/ false); // entry_5_v1 should not be restored to stats EXPECT_FALSE(stat_for_file_5->smap->isMarkUsed(entry_5_v1.offset, entry_5_v1.size)); EXPECT_TRUE(stat_for_file_5->smap->isMarkUsed(entry_5_v2.offset, entry_5_v2.size)); From 782506551bb82dd043ef8f0443bd8f3cc69b3aa0 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Wed, 13 Apr 2022 12:42:35 +0800 Subject: [PATCH 13/79] *:Fix MppInfo saving reference to temporary object. (#4638) close pingcap/tiflash#4476 --- dbms/src/Debug/astToExecutor.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index 4f1b262e76d..0de229bccfa 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -66,9 +66,15 @@ struct MPPInfo Timestamp start_ts; Int64 partition_id; Int64 task_id; - const std::vector & sender_target_task_ids; - const std::unordered_map> & receiver_source_task_ids_map; - MPPInfo(Timestamp start_ts_, Int64 partition_id_, Int64 task_id_, const std::vector & sender_target_task_ids_, const std::unordered_map> & receiver_source_task_ids_map_) + const std::vector sender_target_task_ids; + const std::unordered_map> receiver_source_task_ids_map; + + MPPInfo( + Timestamp start_ts_, + Int64 partition_id_, + Int64 task_id_, + const std::vector & sender_target_task_ids_, + const std::unordered_map> & receiver_source_task_ids_map_) : start_ts(start_ts_) , partition_id(partition_id_) , task_id(task_id_) From 65247ec061130f2026fe23959167c836563d4511 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Wed, 13 Apr 2022 20:36:35 +0800 Subject: [PATCH 14/79] Fix logger initialization of DTWorkload. (#4637) close pingcap/tiflash#4636 --- .../DeltaMerge/tools/workload/MainEntry.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp index 14635c3feac..9730a44c5c9 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,18 @@ using namespace DB::DM::tests; std::ofstream log_ofs; +void initWorkDirs(const std::vector & dirs) +{ + for (const auto & dir : dirs) + { + int ret = ::mkdir(dir.c_str(), 0777); + if (ret != 0 && errno != EEXIST) + { + throw std::runtime_error(fmt::format("mkdir {} failed: {}", dir, strerror(errno))); + } + } +} + void init(WorkloadOptions & opts) { log_ofs.open(opts.log_file, std::ios_base::out | std::ios_base::app); @@ -248,6 +261,9 @@ int DTWorkload::mainEntry(int argc, char ** argv) return -1; } + // Log file is created in the first directory of `opts.work_dirs` by default. + // So create these work_dirs before logger initialization. + initWorkDirs(opts.work_dirs); // need to init logger before creating global context, // or the logging in global context won't be output to // the log file From e192ce5db1e55b6f14d8a3346327b5a8b245b9e3 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Thu, 14 Apr 2022 00:16:35 +0800 Subject: [PATCH 15/79] move `output_field_types` and `output_offsets` to `DAGContext` (#4626) ref pingcap/tiflash#4118 --- dbms/src/Flash/Coprocessor/DAGContext.cpp | 20 +++++++++++++++++ dbms/src/Flash/Coprocessor/DAGContext.h | 22 ++++++++++++++++--- dbms/src/Flash/Coprocessor/DAGQueryBlock.h | 4 ---- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 11 ++++------ .../Coprocessor/DAGQueryBlockInterpreter.h | 2 -- dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 21 ------------------ dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 1 - 7 files changed, 43 insertions(+), 38 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index a38eeef3145..1f6618d3170 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include #include @@ -33,6 +35,24 @@ bool strictSqlMode(UInt64 sql_mode) return sql_mode & TiDBSQLMode::STRICT_ALL_TABLES || sql_mode & TiDBSQLMode::STRICT_TRANS_TABLES; } +void DAGContext::initOutputInfo() +{ + output_field_types = collectOutputFieldTypes(*dag_request); + output_offsets.clear(); + result_field_types.clear(); + for (UInt32 i : dag_request->output_offsets()) + { + output_offsets.push_back(i); + if (unlikely(i >= output_field_types.size())) + throw TiFlashException( + fmt::format("{}: Invalid output offset(schema has {} columns, access index {}", __PRETTY_FUNCTION__, output_field_types.size(), i), + Errors::Coprocessor::BadRequest); + result_field_types.push_back(output_field_types[i]); + } + encode_type = analyzeDAGEncodeType(*this); + keep_session_timezone_info = encode_type == tipb::EncodeType::TypeChunk || encode_type == tipb::EncodeType::TypeCHBlock; +} + bool DAGContext::allowZeroInDate() const { return flags & TiDBSQLFlags::IGNORE_ZERO_IN_DATE; diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index b1c92a9035e..30397dc496a 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -25,8 +25,8 @@ #include #include +#include #include -#include #include #include #include @@ -112,6 +112,7 @@ constexpr UInt64 ALLOW_INVALID_DATES = 1ul << 32ul; class DAGContext { public: + // for non-mpp(cop/batchCop) explicit DAGContext(const tipb::DAGRequest & dag_request_) : dag_request(&dag_request_) , collect_execution_summaries(dag_request->has_collect_execution_summaries() && dag_request->collect_execution_summaries()) @@ -126,8 +127,11 @@ class DAGContext { assert(dag_request->has_root_executor() || dag_request->executors_size() > 0); return_executor_id = dag_request->root_executor().has_executor_id() || dag_request->executors(0).has_executor_id(); + + initOutputInfo(); } + // for mpp DAGContext(const tipb::DAGRequest & dag_request_, const mpp::TaskMeta & meta_, bool is_root_mpp_task_) : dag_request(&dag_request_) , collect_execution_summaries(dag_request->has_collect_execution_summaries() && dag_request->collect_execution_summaries()) @@ -144,8 +148,13 @@ class DAGContext , warning_count(0) { assert(dag_request->has_root_executor() && dag_request->root_executor().has_executor_id()); + + // only mpp task has join executor. + initExecutorIdToJoinIdMap(); + initOutputInfo(); } + // for test explicit DAGContext(UInt64 max_error_count_) : dag_request(nullptr) , collect_execution_summaries(false) @@ -162,7 +171,6 @@ class DAGContext void attachBlockIO(const BlockIO & io_); std::unordered_map & getProfileStreamsMap(); - void initExecutorIdToJoinIdMap(); std::unordered_map> & getExecutorIdToJoinIdMap(); std::unordered_map & getJoinExecuteInfoMap(); @@ -291,9 +299,17 @@ class DAGContext LoggerPtr log; - bool keep_session_timezone_info = false; + // initialized in `initOutputInfo`. std::vector result_field_types; tipb::EncodeType encode_type = tipb::EncodeType::TypeDefault; + // only meaningful in final projection. + bool keep_session_timezone_info = false; + std::vector output_field_types; + std::vector output_offsets; + +private: + void initExecutorIdToJoinIdMap(); + void initOutputInfo(); private: /// Hold io for correcting the destruction order. diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h index 6ad35bc63be..486345efa03 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h @@ -68,10 +68,6 @@ class DAGQueryBlock String qb_column_prefix; std::vector> children; - // only meaningful for root query block. - std::vector output_field_types; - std::vector output_offsets; - bool isRootQueryBlock() const { return id == 1; }; bool isTableScanSource() const { return source->tp() == tipb::ExecType::TypeTableScan || source->tp() == tipb::ExecType::TypePartitionTableScan; } }; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 6c3c6700577..51cd1bf671f 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -57,12 +57,10 @@ DAGQueryBlockInterpreter::DAGQueryBlockInterpreter( const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, size_t max_streams_, - bool keep_session_timezone_info_, std::vector & subqueries_for_sets_) : context(context_) , input_streams_vec(input_streams_vec_) , query_block(query_block_) - , keep_session_timezone_info(keep_session_timezone_info_) , max_streams(max_streams_) , subqueries_for_sets(subqueries_for_sets_) , log(Logger::get("DAGQueryBlockInterpreter", dagContext().log ? dagContext().log->identifier() : "")) @@ -118,7 +116,6 @@ AnalysisResult analyzeExpressions( Context & context, DAGExpressionAnalyzer & analyzer, const DAGQueryBlock & query_block, - bool keep_session_timezone_info, NamesWithAliases & final_project) { AnalysisResult res; @@ -174,14 +171,15 @@ AnalysisResult analyzeExpressions( res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn()); } + const auto & dag_context = *context.getDAGContext(); // Append final project results if needed. final_project = query_block.isRootQueryBlock() ? analyzer.appendFinalProjectForRootQueryBlock( chain, - query_block.output_field_types, - query_block.output_offsets, + dag_context.output_field_types, + dag_context.output_offsets, query_block.qb_column_prefix, - keep_session_timezone_info) + dag_context.keep_session_timezone_info) : analyzer.appendFinalProjectForNonRootQueryBlock( chain, query_block.qb_column_prefix); @@ -1057,7 +1055,6 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) context, *analyzer, query_block, - keep_session_timezone_info, final_project); if (res.before_where) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 5325b76eec6..35627cd19ee 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -48,7 +48,6 @@ class DAGQueryBlockInterpreter const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, size_t max_streams_, - bool keep_session_timezone_info_, std::vector & subqueries_for_sets_); ~DAGQueryBlockInterpreter() = default; @@ -110,7 +109,6 @@ class DAGQueryBlockInterpreter Context & context; std::vector input_streams_vec; const DAGQueryBlock & query_block; - const bool keep_session_timezone_info; NamesWithAliases final_project; diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 72d93f86e85..882699e1599 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -20,11 +20,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int COP_BAD_DAG_REQUEST; -} // namespace ErrorCodes - DAGQuerySource::DAGQuerySource(Context & context_) : context(context_) { @@ -38,22 +33,6 @@ DAGQuerySource::DAGQuerySource(Context & context_) { root_query_block = std::make_shared(1, dag_request.executors()); } - - root_query_block->output_field_types = collectOutputFieldTypes(dag_request); - getDAGContext().initExecutorIdToJoinIdMap(); - - for (UInt32 i : dag_request.output_offsets()) - { - root_query_block->output_offsets.push_back(i); - if (unlikely(i >= root_query_block->output_field_types.size())) - throw TiFlashException( - fmt::format("{}: Invalid output offset(schema has {} columns, access index {}", __PRETTY_FUNCTION__, root_query_block->output_field_types.size(), i), - Errors::Coprocessor::BadRequest); - getDAGContext().result_field_types.push_back(root_query_block->output_field_types[i]); - } - auto encode_type = analyzeDAGEncodeType(getDAGContext()); - getDAGContext().encode_type = encode_type; - getDAGContext().keep_session_timezone_info = encode_type == tipb::EncodeType::TypeChunk || encode_type == tipb::EncodeType::TypeCHBlock; } std::tuple DAGQuerySource::parse(size_t) diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index d3c23fe2e16..1bfe87e5695 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -51,7 +51,6 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block, input_streams_vec, query_block, max_streams, - dagContext().keep_session_timezone_info || !query_block.isRootQueryBlock(), subqueries_for_sets); return query_block_interpreter.execute(); } From e41c545a6daf17c80010ae3baa2bab2e8b3b730a Mon Sep 17 00:00:00 2001 From: Fu Zhe Date: Thu, 14 Apr 2022 11:58:36 +0800 Subject: [PATCH 16/79] Fix potential data race in DynamicThreadPool (#4648) close pingcap/tiflash#4595 --- dbms/src/Common/DynamicThreadPool.cpp | 34 +++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/dbms/src/Common/DynamicThreadPool.cpp b/dbms/src/Common/DynamicThreadPool.cpp index 482761a8bb8..5b154f8e4fd 100644 --- a/dbms/src/Common/DynamicThreadPool.cpp +++ b/dbms/src/Common/DynamicThreadPool.cpp @@ -120,26 +120,30 @@ void DynamicThreadPool::fixedWork(size_t index) void DynamicThreadPool::dynamicWork(TaskPtr initial_task) { - UPDATE_CUR_AND_MAX_METRIC(tiflash_thread_count, type_total_threads_of_thdpool, type_max_threads_of_thdpool); - executeTask(initial_task); - - DynamicNode node; - while (true) { + UPDATE_CUR_AND_MAX_METRIC(tiflash_thread_count, type_total_threads_of_thdpool, type_max_threads_of_thdpool); + executeTask(initial_task); + + DynamicNode node; + while (true) { - std::unique_lock lock(dynamic_mutex); - if (in_destructing) + { + std::unique_lock lock(dynamic_mutex); + if (in_destructing) + break; + // attach to just after head to reuse hot threads so that cold threads have chance to exit + node.appendTo(&dynamic_idle_head); + node.cv.wait_for(lock, dynamic_auto_shrink_cooldown); + node.detach(); + } + + if (!node.task) // may be timeout or cancelled break; - // attach to just after head to reuse hot threads so that cold threads have chance to exit - node.appendTo(&dynamic_idle_head); - node.cv.wait_for(lock, dynamic_auto_shrink_cooldown); - node.detach(); + executeTask(node.task); } - - if (!node.task) // may be timeout or cancelled - break; - executeTask(node.task); } + // must decrease counter after scope of `UPDATE_CUR_AND_MAX_METRIC` + // to avoid potential data race (#4595) alive_dynamic_threads.fetch_sub(1); } From 51dd32f4d98b38e8bb4b4192cce3803ff18eb57f Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Thu, 14 Apr 2022 13:10:35 +0800 Subject: [PATCH 17/79] Fix create table error (#4630) close pingcap/tiflash#4596 --- dbms/src/Databases/test/gtest_database.cpp | 120 ++++++++++++++++++++- dbms/src/Storages/Transaction/TiDB.cpp | 7 +- dbms/src/Storages/Transaction/TiDB.h | 24 ++--- 3 files changed, 133 insertions(+), 18 deletions(-) diff --git a/dbms/src/Databases/test/gtest_database.cpp b/dbms/src/Databases/test/gtest_database.cpp index 149420f09e5..72915b8644f 100644 --- a/dbms/src/Databases/test/gtest_database.cpp +++ b/dbms/src/Databases/test/gtest_database.cpp @@ -94,7 +94,7 @@ class DatabaseTiFlashTest : public ::testing::Test } } - void recreateMetadataPath() const + static void recreateMetadataPath() { String path = TiFlashTestEnv::getContext().getPath(); @@ -652,6 +652,118 @@ try } CATCH +TEST_F(DatabaseTiFlashTest, ISSUE4596) +try +{ + const String db_name = "db_1"; + auto ctx = TiFlashTestEnv::getContext(); + + { + // Create database + const String statement = "CREATE DATABASE " + db_name + " ENGINE=TiFlash"; + ASTPtr ast = parseCreateStatement(statement); + InterpreterCreateQuery interpreter(ast, ctx); + interpreter.setInternal(true); + interpreter.setForceRestoreData(false); + interpreter.execute(); + } + + auto db = ctx.getDatabase(db_name); + + const String tbl_name = "t_111"; + { + /// Create table + ParserCreateQuery parser; + const String stmt = fmt::format("CREATE TABLE `{}`.`{}` ", db_name, tbl_name) + + R"stmt( + (`id` Int32,`b` String) Engine = DeltaMerge((`id`), + '{ + "cols":[{ + "comment":"", + "default":null, + "default_bit":null, + "id":1, + "name":{ + "L":"id", + "O":"id" + }, + "offset":0, + "origin_default":null, + "state":5, + "type":{ + "Charset":"binary", + "Collate":"binary", + "Decimal":0, + "Elems":null, + "Flag":515, + "Flen":16, + "Tp":3 + } + }, + { + "comment":"", + "default":"", + "default_bit":null, + "id":15, + "name":{ + "L":"b", + "O":"b" + }, + "offset":12, + "origin_default":"", + "state":5, + "type":{ + "Charset":"binary", + "Collate":"binary", + "Decimal":0, + "Elems":null, + "Flag":4225, + "Flen":-1, + "Tp":251 + } + }], + "comment":"", + "id":330, + "index_info":[], + "is_common_handle":false, + "name":{ + "L":"test", + "O":"test" + }, + "partition":null, + "pk_is_handle":true, + "schema_version":465, + "state":5, + "update_timestamp":99999 + }' + ) + )stmt"; + ASTPtr ast = parseQuery(parser, stmt, 0); + + InterpreterCreateQuery interpreter(ast, ctx); + interpreter.setInternal(true); + interpreter.setForceRestoreData(false); + interpreter.execute(); + } + + EXPECT_FALSE(db->empty(ctx)); + EXPECT_TRUE(db->isTableExist(ctx, tbl_name)); + + { + // Get storage from database + auto storage = db->tryGetTable(ctx, tbl_name); + ASSERT_NE(storage, nullptr); + + EXPECT_EQ(storage->getName(), MutableSupport::delta_tree_storage_name); + EXPECT_EQ(storage->getTableName(), tbl_name); + + auto managed_storage = std::dynamic_pointer_cast(storage); + EXPECT_EQ(managed_storage->getDatabaseName(), db_name); + EXPECT_EQ(managed_storage->getTableInfo().name, "test"); + } +} +CATCH + TEST_F(DatabaseTiFlashTest, ISSUE1055) try { @@ -688,7 +800,7 @@ try DatabaseLoading::loadTable(ctx, *db, meta_path, db_name, db_data_path, "TiFlash", "t_45.sql", false); // Get storage from database - const auto tbl_name = "t_45"; + const auto * tbl_name = "t_45"; auto storage = db->tryGetTable(ctx, tbl_name); ASSERT_NE(storage, nullptr); EXPECT_EQ(storage->getName(), MutableSupport::delta_tree_storage_name); @@ -776,7 +888,7 @@ try auto db = ctx.getDatabase(name_mapper.mapDatabaseName(*db_info)); ASSERT_NE(db, nullptr); EXPECT_EQ(db->getEngineName(), "TiFlash"); - auto flash_db = typeid_cast(db.get()); + auto * flash_db = typeid_cast(db.get()); auto & db_info_get = flash_db->getDatabaseInfo(); ASSERT_EQ(db_info_get.name, expect_name); } @@ -841,7 +953,7 @@ try )", }; - for (auto & statement : statements) + for (const auto & statement : statements) { { // Cleanup: Drop database if exists diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 580850de08a..763dcac39fc 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -155,10 +155,13 @@ Field ColumnInfo::defaultValueToField() const auto v = value.convert(); if (hasBinaryFlag()) { - // For binary column, we have to pad trailing zeros according to the specified type length. + // For some binary column(like varchar(20)), we have to pad trailing zeros according to the specified type length. // User may define default value `0x1234` for a `BINARY(4)` column, TiDB stores it in a string "\u12\u34" (sized 2). // But it actually means `0x12340000`. - v.append(flen - v.length(), '\0'); + // And for some binary column(like longblob), we do not need to pad trailing zeros. + // And the `Flen` is set to -1, therefore we need to check `Flen >= 0` here. + if (Int32 vlen = v.length(); flen >= 0 && vlen < flen) + v.append(flen - vlen, '\0'); } return v; } diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 3ea573c0972..f67bfb332c7 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -91,7 +91,7 @@ enum TP #ifdef M #error "Please undefine macro M first." #endif -#define M(tt, v, cf, ct, w) Type##tt = v, +#define M(tt, v, cf, ct, w) Type##tt = (v), COLUMN_TYPES(M) #undef M }; @@ -123,7 +123,7 @@ enum ColumnFlag #ifdef M #error "Please undefine macro M first." #endif -#define M(cf, v) ColumnFlag##cf = v, +#define M(cf, v) ColumnFlag##cf = (v), COLUMN_FLAGS(M) #undef M }; @@ -152,7 +152,7 @@ enum CodecFlag #ifdef M #error "Please undefine macro M first." #endif -#define M(cf, v) CodecFlag##cf = v, +#define M(cf, v) CodecFlag##cf = (v), CODEC_FLAGS(M) #undef M }; @@ -197,10 +197,10 @@ struct ColumnInfo #ifdef M #error "Please undefine macro M first." #endif -#define M(f, v) \ - inline bool has##f##Flag() const { return (flag & v) != 0; } \ - inline void set##f##Flag() { flag |= v; } \ - inline void clear##f##Flag() { flag &= (~v); } +#define M(f, v) \ + inline bool has##f##Flag() const { return (flag & (v)) != 0; } \ + inline void set##f##Flag() { flag |= (v); } \ + inline void clear##f##Flag() { flag &= (~(v)); } COLUMN_FLAGS(M) #undef M @@ -225,7 +225,7 @@ struct PartitionDefinition { PartitionDefinition() = default; - PartitionDefinition(Poco::JSON::Object::Ptr json); + explicit PartitionDefinition(Poco::JSON::Object::Ptr json); Poco::JSON::Object::Ptr getJSONObject() const; @@ -241,7 +241,7 @@ struct PartitionInfo { PartitionInfo() = default; - PartitionInfo(Poco::JSON::Object::Ptr json); + explicit PartitionInfo(Poco::JSON::Object::Ptr json); Poco::JSON::Object::Ptr getJSONObject() const; @@ -264,7 +264,7 @@ struct DBInfo SchemaState state; DBInfo() = default; - DBInfo(const String & json) { deserialize(json); } + explicit DBInfo(const String & json) { deserialize(json); } String serialize() const; @@ -375,9 +375,9 @@ struct TableInfo ::TiDB::StorageEngine engine_type = ::TiDB::StorageEngine::UNSPECIFIED; ColumnID getColumnID(const String & name) const; - String getColumnName(const ColumnID id) const; + String getColumnName(ColumnID id) const; - const ColumnInfo & getColumnInfo(const ColumnID id) const; + const ColumnInfo & getColumnInfo(ColumnID id) const; std::optional> getPKHandleColumn() const; From 402e4779df81abb316d7196b2ac297f1d671e8dc Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Thu, 14 Apr 2022 13:46:36 +0800 Subject: [PATCH 18/79] Add a pagestorage v3 controller. (#4320) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/BlobStore.cpp | 6 +- dbms/src/Storages/Page/V3/BlobStore.h | 2 + dbms/src/Storages/Page/V3/PageDirectory.h | 2 + dbms/src/Storages/Page/V3/PageStorageImpl.h | 1 + .../Storages/Page/V3/spacemap/SpaceMap.cpp | 5 +- dbms/src/Storages/Page/V3/spacemap/SpaceMap.h | 10 +- .../Page/V3/spacemap/SpaceMapRBTree.cpp | 24 +- .../Page/V3/spacemap/SpaceMapRBTree.h | 2 +- .../Page/V3/spacemap/SpaceMapSTDMap.h | 11 +- .../src/Storages/Page/V3/tests/CMakeLists.txt | 8 +- .../Storages/Page/V3/tests/gtest_free_map.cpp | 6 +- .../Page/V3/tests/page_storage_ctl.cpp | 477 ++++++++++++++++++ dbms/src/Storages/Page/stress/PSStressEnv.cpp | 2 + dbms/src/Storages/Page/stress/PSStressEnv.h | 6 +- dbms/src/Storages/Page/stress/PSWorkload.cpp | 7 +- .../Storages/Page/stress/workload/Normal.cpp | 6 +- 16 files changed, 537 insertions(+), 38 deletions(-) create mode 100644 dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 24c2d0c0df9..6919e8081bd 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -325,7 +325,7 @@ std::pair BlobStore::getPosFromStats(size_t size) // Can't insert into this spacemap if (offset == INVALID_BLOBFILE_OFFSET) { - stat->smap->logStats(); + stat->smap->logDebugString(); throw Exception(fmt::format("Get postion from BlobStat failed, it may caused by `sm_max_caps` is no correct. [size={}] [old_max_caps={}] [max_caps={}] [blob_id={}]", size, old_max_cap, @@ -1207,7 +1207,7 @@ bool BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, si { if (!smap->markFree(offset, buf_size)) { - smap->logStats(); + smap->logDebugString(); throw Exception(fmt::format("Remove postion from BlobStat failed, [offset={} , buf_size={}, blob_id={}] is invalid.", offset, buf_size, @@ -1224,7 +1224,7 @@ void BlobStore::BlobStats::BlobStat::restoreSpaceMap(BlobFileOffset offset, size { if (!smap->markUsed(offset, buf_size)) { - smap->logStats(); + smap->logDebugString(); throw Exception(fmt::format("Restore postion from BlobStat failed, [offset={}] [buf_size={}] [blob_id={}] is used or subspan is used", offset, buf_size, diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index bd25542b23b..a289081acab 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -269,6 +269,8 @@ class BlobStore : private Allocator BlobFilePtr getBlobFile(BlobFileId blob_id); friend class PageDirectoryFactory; + friend class PageStorageControl; + #ifndef DBMS_PUBLIC_GTEST private: #endif diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index d0cc6ffd313..4cdf51bbf91 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -252,6 +252,7 @@ class VersionedPageEntries being_ref_count, entries.size()); } + friend class PageStorageControl; private: mutable std::mutex m; @@ -365,6 +366,7 @@ class PageDirectory PageDirectory & operator=(PageDirectory && rhs) = delete; friend class PageDirectoryFactory; + friend class PageStorageControl; private: // Only `std::map` is allow for `MVCCMap`. Cause `std::map::insert` ensure that diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index 2db4ea9d8e9..eb1fc91b2e7 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -82,6 +82,7 @@ class PageStorageImpl : public DB::PageStorage #endif friend class PageDirectoryFactory; + friend class PageStorageControl; #ifndef DBMS_PUBLIC_GTEST private: #endif diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp index e203418454c..7ee9e02ce48 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp @@ -59,11 +59,12 @@ bool SpaceMap::checkSpace(UInt64 offset, size_t size) const return (offset < start) || (offset > end) || (offset + size - 1 > end); } -void SpaceMap::logStats() +void SpaceMap::logDebugString() { - smapStats(); + LOG_DEBUG(log, toDebugString()); } + bool SpaceMap::markFree(UInt64 offset, size_t length) { if (checkSpace(offset, length)) diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h index f50eeee580a..e4af33c5a81 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h @@ -118,7 +118,12 @@ class SpaceMap /** * Log the status of space map */ - void logStats(); + void logDebugString(); + + /** + * return the status of space map + */ + virtual String toDebugString() = 0; SpaceMapType getType() const { @@ -143,9 +148,6 @@ class SpaceMap virtual ~SpaceMap() = default; - /* Print space maps status */ - virtual void smapStats() = 0; - // Return true if space [offset, offset+size) are all free virtual bool isMarkUnused(UInt64 offset, size_t size) = 0; diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp index 33bb1304a19..3b4c6a28099 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp @@ -464,36 +464,28 @@ void RBTreeSpaceMap::freeSmap() } } -void RBTreeSpaceMap::smapStats() +String RBTreeSpaceMap::toDebugString() { struct rb_node * node = nullptr; struct SmapRbEntry * entry; UInt64 count = 0; - UInt64 max_size = 0; - UInt64 min_size = ULONG_MAX; + FmtBuffer fmt_buffer; if (rb_tree->root.rb_node == nullptr) { - LOG_ERROR(log, "Tree have not been inited."); - return; + fmt_buffer.append("Tree have not been inited."); + return fmt_buffer.toString(); } - LOG_DEBUG(log, "RB-Tree entries status: "); + fmt_buffer.append(" RB-Tree entries status: \n"); for (node = rb_tree_first(&rb_tree->root); node != nullptr; node = rb_tree_next(node)) { entry = node_to_entry(node); - LOG_FMT_DEBUG(log, " Space: {} start: {} size: {}", count, entry->start, entry->count); + fmt_buffer.fmtAppend(" Space: {} start: {} size: {} \n", count, entry->start, entry->count); count++; - if (entry->count > max_size) - { - max_size = entry->count; - } - - if (entry->count < min_size) - { - min_size = entry->count; - } } + + return fmt_buffer.toString(); } bool RBTreeSpaceMap::isMarkUnused(UInt64 offset, size_t length) diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h index baeb6ef20b9..8c53724be7d 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h @@ -56,7 +56,7 @@ class RBTreeSpaceMap void freeSmap(); - void smapStats() override; + String toDebugString() override; bool isMarkUnused(UInt64 offset, size_t length) override; diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index ac74fdcaaed..6e57abd43f4 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -57,16 +57,21 @@ class STDMapSpaceMap free_map.insert({start, end}); } - void smapStats() override + String toDebugString() override { UInt64 count = 0; - LOG_FMT_DEBUG(log, "STD-Map entries status: "); + FmtBuffer fmt_buffer; + fmt_buffer.append(" STD-Map entries status: \n"); + + // Need use `count`,so can't use `joinStr` here. for (auto it = free_map.begin(); it != free_map.end(); it++) { - LOG_FMT_DEBUG(log, " Space: {} start: {} size : {}", count, it->first, it->second); + fmt_buffer.fmtAppend(" Space: {} start: {} size : {}\n", count, it->first, it->second); count++; } + + return fmt_buffer.toString(); } std::pair getSizes() const override diff --git a/dbms/src/Storages/Page/V3/tests/CMakeLists.txt b/dbms/src/Storages/Page/V3/tests/CMakeLists.txt index 355247c9eba..8bab6afcded 100644 --- a/dbms/src/Storages/Page/V3/tests/CMakeLists.txt +++ b/dbms/src/Storages/Page/V3/tests/CMakeLists.txt @@ -26,4 +26,10 @@ add_executable(gtests_page_storage_v3 ${ps_v3_gtest_sources} ${TiFlash_SOURCE_DI target_link_libraries(gtests_page_storage_v3 page_storage_v3 gtest_main) target_compile_options(gtests_page_storage_v3 PRIVATE -Wno-unknown-pragmas) target_compile_definitions(gtests_page_storage_v3 PRIVATE DBMS_PUBLIC_GTEST) -add_check(gtests_page_storage_v3) \ No newline at end of file +add_check(gtests_page_storage_v3) + + +add_executable(page_storage_ctl EXCLUDE_FROM_ALL page_storage_ctl.cpp) +target_compile_definitions(page_storage_ctl PUBLIC DBMS_PUBLIC_GTEST) +target_link_libraries(page_storage_ctl dbms page_storage_v3) +target_compile_options(page_storage_ctl PRIVATE -Wno-format -lc++) # turn off printf format check diff --git a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp index a4f3fdbe948..85a94ec0ac3 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp @@ -54,7 +54,7 @@ TEST_P(SpaceMapTest, InitAndDestory) { SpaceMapPtr smap = SpaceMap::createSpaceMap(test_type, 0, 100); - smap->logStats(); + smap->logDebugString(); } @@ -256,11 +256,11 @@ TEST_P(SpaceMapTest, TestMargins2) // Right margin in marked used space // Left margin contain freed space ASSERT_FALSE(smap->markFree(49, 10)); - smap->logStats(); + smap->logDebugString(); // Left margin align with marked used space left margin // But right margin contain freed space ASSERT_FALSE(smap->markFree(51, 20)); - smap->logStats(); + smap->logDebugString(); // Right margin align with marked used space right margin // But left margin contain freed space ASSERT_FALSE(smap->markUsed(40, 19)); diff --git a/dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp b/dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp new file mode 100644 index 00000000000..4f3cefa0ad7 --- /dev/null +++ b/dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp @@ -0,0 +1,477 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::PS::V3 +{ +struct ControlOptions +{ + enum DisplayType + { + DISPLAY_SUMMARY_INFO = 1, + DISPLAY_DIRECTORY_INFO = 2, + DISPLAY_BLOBS_INFO = 3, + CHECK_ALL_DATA_CRC = 4, + }; + + std::vector paths; + int display_mode = DisplayType::DISPLAY_SUMMARY_INFO; + UInt64 query_page_id = UINT64_MAX; + UInt32 query_blob_id = UINT32_MAX; + UInt64 query_ns_id = DB::TEST_NAMESPACE_ID; + UInt64 check_page_id = UINT64_MAX; + bool enable_fo_check = true; + + static ControlOptions parse(int argc, char ** argv); +}; + + +ControlOptions ControlOptions::parse(int argc, char ** argv) +{ + namespace po = boost::program_options; + using po::value; + + po::options_description desc("Allowed options"); + desc.add_options()("help,h", "produce help message") // + ("paths,P", value>(), "store path(s)") // + ("display_mode,D", value()->default_value(1), "Display Mode: 1 is summary information,\n 2 is display all of stored page and version chaim(will be very long),\n 3 is display all blobs(in disk) data distribution. \n 4 is check every data is valid.") // + ("enable_fo_check,E", value()->default_value(true), "Also check the evert field offsets. This options only works when `display_mode` is 4.") // + ("query_ns_id,N", value()->default_value(DB::TEST_NAMESPACE_ID), "When used `check_page_id`/`query_page_id`/`query_blob_id` to query results. You can specify a namespace id.")("check_page_id,C", value()->default_value(UINT64_MAX), "Check a single Page id, display the exception if meet. And also will check the field offsets.") // + ("query_page_id,W", value()->default_value(UINT64_MAX), "Quert a single Page id, and print its version chaim.") // + ("query_blob_id,B", value()->default_value(UINT32_MAX), "Quert a single Blob id, and print its data distribution."); + + + static_assert(sizeof(DB::PageId) == sizeof(UInt64)); + static_assert(sizeof(DB::BlobFileId) == sizeof(UInt32)); + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + po::notify(options); + + if (options.count("help") > 0) + { + std::cerr << desc << std::endl; + exit(0); + } + + ControlOptions opt; + + if (options.count("paths") == 0) + { + std::cerr << "Invalid arg paths." << std::endl; + std::cerr << desc << std::endl; + exit(0); + } + opt.paths = options["paths"].as>(); + opt.display_mode = options["display_mode"].as(); + opt.query_page_id = options["query_page_id"].as(); + opt.query_blob_id = options["query_blob_id"].as(); + opt.enable_fo_check = options["enable_fo_check"].as(); + opt.check_page_id = options["check_page_id"].as(); + opt.query_ns_id = options["query_ns_id"].as(); + + if (opt.display_mode < DisplayType::DISPLAY_SUMMARY_INFO || opt.display_mode > DisplayType::CHECK_ALL_DATA_CRC) + { + std::cerr << "Invalid display mode: " << opt.display_mode << std::endl; + std::cerr << desc << std::endl; + exit(0); + } + + return opt; +} + +class PageStorageControl +{ +public: + explicit PageStorageControl(const ControlOptions & options_) + : options(options_) + { + } + + void run() + { + DB::PSDiskDelegatorPtr delegator; + if (options.paths.size() == 1) + { + delegator = std::make_shared(options.paths[0]); + } + else + { + delegator = std::make_shared(options.paths); + } + + auto key_manager = std::make_shared(false); + auto file_provider = std::make_shared(key_manager, false); + + BlobStore::Config blob_config; + + PageStorage::Config config; + PageStorageImpl ps_v3("PageStorageControl", delegator, config, file_provider); + ps_v3.restore(); + PageDirectory::MVCCMapType & mvcc_table_directory = ps_v3.page_directory->mvcc_table_directory; + + switch (options.display_mode) + { + case ControlOptions::DisplayType::DISPLAY_SUMMARY_INFO: + { + std::cout << getSummaryInfo(mvcc_table_directory, ps_v3.blob_store) << std::endl; + break; + } + case ControlOptions::DisplayType::DISPLAY_DIRECTORY_INFO: + { + std::cout << getDirectoryInfo(mvcc_table_directory, options.query_ns_id, options.query_page_id) << std::endl; + break; + } + case ControlOptions::DisplayType::DISPLAY_BLOBS_INFO: + { + std::cout << getBlobsInfo(ps_v3.blob_store, options.query_blob_id) << std::endl; + break; + } + case ControlOptions::DisplayType::CHECK_ALL_DATA_CRC: + { + if (options.check_page_id != UINT64_MAX) + { + std::cout << checkSinglePage(mvcc_table_directory, ps_v3.blob_store, options.query_ns_id, options.check_page_id) << std::endl; + } + else + { + std::cout << checkAllDatasCrc(mvcc_table_directory, ps_v3.blob_store, options.enable_fo_check) << std::endl; + } + break; + } + default: + std::cout << "Invalid display mode." << std::endl; + break; + } + } + +private: + static String getBlobsInfo(BlobStore & blob_store, UInt32 blob_id) + { + auto stat_info = [](const BlobStore::BlobStats::BlobStatPtr & stat, const String & path) { + FmtBuffer stat_str; + stat_str.fmtAppend(" stat id: {}\n" + " path: {}\n" + " total size: {}\n" + " valid size: {}\n" + " valid rate: {}\n" + " max cap: {}\n", // + stat->id, // + path, + stat->sm_total_size, // + stat->sm_valid_size, // + stat->sm_valid_rate, // + stat->sm_max_caps); + + stat_str.append(stat->smap->toDebugString()); + stat_str.append("\n"); + return stat_str.toString(); + }; + + FmtBuffer stats_info; + stats_info.append(" Blobs specific info: \n\n"); + + for (const auto & [path, stats] : blob_store.blob_stats.getStats()) + { + for (const auto & stat : stats) + { + if (blob_id != UINT32_MAX) + { + if (stat->id == blob_id) + { + stats_info.append(stat_info(stat, path)); + return stats_info.toString(); + } + continue; + } + + stats_info.append(stat_info(stat, path)); + } + } + + if (blob_id != UINT32_MAX) + { + stats_info.fmtAppend(" no found blob {}", blob_id); + } + return stats_info.toString(); + } + + static String getDirectoryInfo(PageDirectory::MVCCMapType & mvcc_table_directory, UInt64 ns_id, UInt64 page_id) + { + auto page_info = [](UInt128 page_internal_id_, const VersionedPageEntriesPtr & versioned_entries) { + FmtBuffer page_str; + page_str.fmtAppend(" page id {}\n", page_internal_id_); + page_str.fmtAppend(" {}\n", versioned_entries->toDebugString()); + + size_t count = 0; + for (const auto & [version, entry_or_del] : versioned_entries->entries) + { + const auto & entry = entry_or_del.entry; + page_str.fmtAppend(" entry {}\n" + " sequence: {}\n" + " epoch: {}\n" + " is del: {}\n" + " blob id: {}\n" + " offset: {}\n" + " size: {}\n" + " crc: {}\n", // + count++, // + version.sequence, // + version.epoch, // + entry_or_del.isDelete(), // + entry.file_id, // + entry.offset, // + entry.size, // + entry.checksum, // + entry.field_offsets.size() // + ); + if (!entry.field_offsets.empty()) + { + page_str.append(" field offset:\n"); + for (const auto & [offset, crc] : entry.field_offsets) + { + page_str.fmtAppend(" offset: {} crc: 0x{:X}\n", offset, crc); + } + page_str.append("\n"); + } + } + return page_str.toString(); + }; + + FmtBuffer directory_info; + directory_info.append(" Directory specific info: \n\n"); + for (const auto & [internal_id, versioned_entries] : mvcc_table_directory) + { + if (page_id != UINT64_MAX) + { + if (internal_id.low == page_id && internal_id.high == ns_id) + { + directory_info.append(page_info(internal_id, versioned_entries)); + return directory_info.toString(); + } + continue; + } + directory_info.append(page_info(internal_id, versioned_entries)); + } + + if (page_id != UINT64_MAX) + { + directory_info.fmtAppend(" no found page {}", page_id); + } + return directory_info.toString(); + } + + static String getSummaryInfo(PageDirectory::MVCCMapType & mvcc_table_directory, BlobStore & blob_store) + { + UInt64 longest_version_chaim = 0; + UInt64 shortest_version_chaim = UINT64_MAX; + FmtBuffer dir_summary_info; + + dir_summary_info.append(" Directory summary info: \n"); + + for (const auto & [internal_id, versioned_entries] : mvcc_table_directory) + { + (void)internal_id; + longest_version_chaim = std::max(longest_version_chaim, versioned_entries->size()); + shortest_version_chaim = std::min(shortest_version_chaim, versioned_entries->size()); + } + + dir_summary_info.fmtAppend(" total pages: {}, longest version chaim: {} , shortest version chaim: {} \n\n", + mvcc_table_directory.size(), + longest_version_chaim, + shortest_version_chaim); + + dir_summary_info.append(" Blobs summary info: \n"); + const auto & blob_stats = blob_store.blob_stats.getStats(); + dir_summary_info.joinStr( + blob_stats.begin(), + blob_stats.end(), + [](const auto arg, FmtBuffer & fb) { + for (const auto & stat : arg.second) + { + fb.fmtAppend(" stat id: {}\n" + " path: {}\n" + " total size: {}\n" + " valid size: {}\n" + " valid rate: {}\n" + " max cap: {}\n", + stat->id, + arg.first, + stat->sm_total_size, + stat->sm_valid_size, + stat->sm_valid_rate, + stat->sm_max_caps); + } + }, + ""); + + return dir_summary_info.toString(); + } + + static String checkSinglePage(PageDirectory::MVCCMapType & mvcc_table_directory, BlobStore & blob_store, UInt64 ns_id, UInt64 page_id) + { + const auto & page_internal_id = buildV3Id(ns_id, page_id); + const auto & it = mvcc_table_directory.find(page_internal_id); + if (it == mvcc_table_directory.end()) + { + return fmt::format("Can't find {}", page_internal_id); + } + + FmtBuffer error_msg; + size_t error_count = 0; + for (const auto & [version, entry_or_del] : it->second->entries) + { + if (entry_or_del.isEntry() && it->second->type == EditRecordType::VAR_ENTRY) + { + (void)blob_store; + try + { + PageIDAndEntryV3 to_read_entry; + const PageEntryV3 & entry = entry_or_del.entry; + PageIDAndEntriesV3 to_read; + to_read_entry.first = page_internal_id; + to_read_entry.second = entry; + + to_read.emplace_back(to_read_entry); + blob_store.read(to_read); + + if (!entry.field_offsets.empty()) + { + DB::PageStorage::FieldIndices indices(entry.field_offsets.size()); + std::iota(std::begin(indices), std::end(indices), 0); + + BlobStore::FieldReadInfos infos; + BlobStore::FieldReadInfo info(page_internal_id, entry, indices); + infos.emplace_back(info); + blob_store.read(infos); + } + } + catch (DB::Exception & e) + { + error_count++; + error_msg.append(e.displayText()); + error_msg.append("\n"); + } + } + } + + if (error_count == 0) + { + return fmt::format("Checked {} without any error.", page_internal_id); + } + + error_msg.fmtAppend("Check {} meet {} errors!", page_internal_id, error_count); + return error_msg.toString(); + } + + static String checkAllDatasCrc(PageDirectory::MVCCMapType & mvcc_table_directory, BlobStore & blob_store, bool enable_fo_check) + { + size_t total_pages = mvcc_table_directory.size(); + size_t cut_index = 0; + size_t index = 0; + std::cout << fmt::format("Begin to check all of datas CRC. enable_fo_check={}", static_cast(enable_fo_check)) << std::endl; + + std::list> error_versioned_pages; + for (const auto & [internal_id, versioned_entries] : mvcc_table_directory) + { + if (index == total_pages / 10 * cut_index) + { + std::cout << fmt::format("processing : {}%", cut_index * 10) << std::endl; + cut_index++; + } + + // TODO : need replace by getLastEntry(); + for (const auto & [version, entry_or_del] : versioned_entries->entries) + { + if (entry_or_del.isEntry() && versioned_entries->type == EditRecordType::VAR_ENTRY) + { + (void)blob_store; + try + { + PageIDAndEntryV3 to_read_entry; + const PageEntryV3 & entry = entry_or_del.entry; + PageIDAndEntriesV3 to_read; + to_read_entry.first = internal_id; + to_read_entry.second = entry; + + to_read.emplace_back(to_read_entry); + blob_store.read(to_read); + + if (enable_fo_check && !entry.field_offsets.empty()) + { + DB::PageStorage::FieldIndices indices(entry.field_offsets.size()); + std::iota(std::begin(indices), std::end(indices), 0); + + BlobStore::FieldReadInfos infos; + BlobStore::FieldReadInfo info(internal_id, entry, indices); + infos.emplace_back(info); + blob_store.read(infos); + } + } + catch (DB::Exception & e) + { + error_versioned_pages.emplace_back(std::make_pair(internal_id, version)); + } + } + } + index++; + } + + if (error_versioned_pages.empty()) + { + return "All of data checked. All passed."; + } + + FmtBuffer error_msg; + error_msg.append("Found error in these pages: "); + for (const auto & [internal_id, versioned] : error_versioned_pages) + { + error_msg.fmtAppend("id: {}, sequence: {}, epoch: {} \n", internal_id, versioned.sequence, versioned.epoch); + } + error_msg.append("Please use `--query_table_id` + `--check_page_id` to get the more error info."); + + return error_msg.toString(); + } + +private: + ControlOptions options; +}; + + +} // namespace DB::PS::V3 + +using namespace DB::PS::V3; +int main(int argc, char ** argv) +{ + const auto & options = ControlOptions::parse(argc, argv); + PageStorageControl(options).run(); + return 0; +} \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.cpp b/dbms/src/Storages/Page/stress/PSStressEnv.cpp index afe2ef700ea..7d680cd43c0 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.cpp +++ b/dbms/src/Storages/Page/stress/PSStressEnv.cpp @@ -49,6 +49,7 @@ StressEnv StressEnv::parse(int argc, char ** argv) ("read_concurrency,R", value()->default_value(16), "number of read threads") // ("clean_before_run,C", value()->default_value(false), "drop data before running") // ("init_pages,I", value()->default_value(false), "init pages if not exist before running") // + ("just_init_pages,J", value()->default_value(false), "Only init pages 0 - 1000.Then quit") // ("timeout,T", value()->default_value(600), "maximum run time (seconds). 0 means run infinitely") // ("writer_slots", value()->default_value(4), "number of PageStorage writer slots") // ("read_delay_ms", value()->default_value(0), "millionseconds of read delay") // @@ -75,6 +76,7 @@ StressEnv StressEnv::parse(int argc, char ** argv) opt.num_writers = options["write_concurrency"].as(); opt.num_readers = options["read_concurrency"].as(); opt.init_pages = options["init_pages"].as(); + opt.just_init_pages = options["just_init_pages"].as(); opt.clean_before_run = options["clean_before_run"].as(); opt.timeout_s = options["timeout"].as(); opt.read_delay_ms = options["read_delay_ms"].as(); diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.h b/dbms/src/Storages/Page/stress/PSStressEnv.h index fce903d37c7..1c7d8ee761f 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.h +++ b/dbms/src/Storages/Page/stress/PSStressEnv.h @@ -75,6 +75,7 @@ struct StressEnv size_t num_writers = 1; size_t num_readers = 4; bool init_pages = false; + bool just_init_pages = false; bool clean_before_run = false; size_t timeout_s = 0; size_t read_delay_ms = 0; @@ -92,8 +93,8 @@ struct StressEnv { return fmt::format( "{{ " - "num_writers: {}, num_readers: {}, init_pages: {}, clean_before_run: {}" - ", timeout_s: {}, read_delay_ms: {}, num_writer_slots: {}" + "num_writers: {}, num_readers: {}, init_pages: {}, just_init_pages: {}" + ", clean_before_run: {}, timeout_s: {}, read_delay_ms: {}, num_writer_slots: {}" ", avg_page_size_mb: {}, paths: [{}], failpoints: [{}]" ", status_interval: {}, situation_mask: {}, verify: {}" ", running_pagestorage_version : {}." @@ -101,6 +102,7 @@ struct StressEnv num_writers, num_readers, init_pages, + just_init_pages, clean_before_run, timeout_s, read_delay_ms, diff --git a/dbms/src/Storages/Page/stress/PSWorkload.cpp b/dbms/src/Storages/Page/stress/PSWorkload.cpp index 6159e15acac..ce1f8d92ce0 100644 --- a/dbms/src/Storages/Page/stress/PSWorkload.cpp +++ b/dbms/src/Storages/Page/stress/PSWorkload.cpp @@ -136,7 +136,7 @@ void StressWorkload::startBackgroundTimer() void StressWorkloadManger::runWorkload() { - if (options.situation_mask == NORMAL_WORKLOAD) + if (options.just_init_pages || options.situation_mask == NORMAL_WORKLOAD) { String name; WorkloadCreator func; @@ -144,7 +144,10 @@ void StressWorkloadManger::runWorkload() auto workload = std::shared_ptr(func(options)); LOG_INFO(StressEnv::logger, fmt::format("Start Running {} , {}", name, workload->desc())); workload->run(); - workload->onDumpResult(); + if (!options.just_init_pages) + { + workload->onDumpResult(); + } return; } diff --git a/dbms/src/Storages/Page/stress/workload/Normal.cpp b/dbms/src/Storages/Page/stress/workload/Normal.cpp index ec385b4b70c..0323b857613 100644 --- a/dbms/src/Storages/Page/stress/workload/Normal.cpp +++ b/dbms/src/Storages/Page/stress/workload/Normal.cpp @@ -52,10 +52,14 @@ class NormalWorkload } // init all pages in PageStorage - if (options.init_pages) + if (options.init_pages || options.just_init_pages) { PSWriter::fillAllPages(ps); LOG_INFO(StressEnv::logger, "All pages have been init."); + if (options.just_init_pages) + { + return; + } } stop_watch.start(); From 989bf8db36e73493ab9e9c780fca9cc77af0fa88 Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 14 Apr 2022 16:26:36 +0800 Subject: [PATCH 19/79] update client-c to disable grpc client keepalive (#4655) ref pingcap/tiflash#4192 --- contrib/client-c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/client-c b/contrib/client-c index 4e50596db3c..184cde7ae4f 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 4e50596db3c878f5bf8de86fe32638f09bf2c117 +Subproject commit 184cde7ae4f83c0e9aaaaf825f3e0e7d600e62fa From 6780525e0a2845e7da823b6fc2e7a147372d1d4b Mon Sep 17 00:00:00 2001 From: nauta Date: Fri, 15 Apr 2022 10:42:36 +0800 Subject: [PATCH 20/79] fix typo (#4671) close pingcap/tiflash#4672 --- dbms/src/Storages/Page/PageStorage.h | 2 +- dbms/src/Storages/Page/V1/PageStorage.h | 4 ++-- dbms/src/Storages/Page/V2/PageStorage.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index f863795ca1d..3b5e1a18c9d 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -62,7 +62,7 @@ struct ExternalPageCallbacks /** * A storage system stored pages. Pages are serialized objects referenced by PageID. Store Page with the same PageID - * will covered the old ones. + * will cover the old ones. * Users should call #gc() constantly to release disk space. * * This class is multi-threads safe. Support multi threads write, and multi threads read. diff --git a/dbms/src/Storages/Page/V1/PageStorage.h b/dbms/src/Storages/Page/V1/PageStorage.h index d6bc0ac85af..84691ce6d53 100644 --- a/dbms/src/Storages/Page/V1/PageStorage.h +++ b/dbms/src/Storages/Page/V1/PageStorage.h @@ -35,8 +35,8 @@ namespace DB::PS::V1 /** * A storage system stored pages. Pages are serialized objects referenced by PageId. Store Page with the same PageId - * will covered the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the - * latest PageFile the Page is stored , the offset in file, and checksum, are cached in memory. Users should call + * will cover the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the + * latest PageFile the Page is stored, the offset in file, and checksum, are cached in memory. Users should call * #gc() constantly to clean up the sparse PageFiles and release disk space. * * This class is multi-threads safe. Support single thread write, and multi threads read. diff --git a/dbms/src/Storages/Page/V2/PageStorage.h b/dbms/src/Storages/Page/V2/PageStorage.h index f1752545815..01633f9a052 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.h +++ b/dbms/src/Storages/Page/V2/PageStorage.h @@ -37,8 +37,8 @@ namespace PS::V2 { /** * A storage system stored pages. Pages are serialized objects referenced by PageID. Store Page with the same PageID - * will covered the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the - * latest PageFile the Page is stored , the offset in file, and checksum, are cached in memory. Users should call + * will cover the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the + * latest PageFile the Page is stored, the offset in file, and checksum, are cached in memory. Users should call * #gc() constantly to clean up the sparse PageFiles and release disk space. * * This class is multi-threads safe. Support multi threads write, and multi threads read. From 1def5be0d300d30f8944ce31f7812b70a4ce52ec Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Fri, 15 Apr 2022 14:40:36 +0800 Subject: [PATCH 21/79] Add a fail point that can hold PS snapshot for five minute. (#4620) ref pingcap/tiflash#3594 --- dbms/src/Common/FailPoint.cpp | 3 ++- dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp | 10 ++++++++++ dbms/src/Storages/Page/V3/BlobStore.cpp | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index f5d9f247f06..6da54e74e69 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -79,7 +79,8 @@ std::unordered_map> FailPointHelper::f M(force_set_dtfile_exist_when_acquire_id) \ M(force_no_local_region_for_mpp_task) \ M(force_remote_read_for_batch_cop) \ - M(force_context_path) + M(force_context_path) \ + M(force_slow_page_storage_snapshot_release) #define APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) \ M(pause_after_learner_read) \ diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 11ec13f25dd..80b1d81f817 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -94,6 +94,7 @@ extern const char force_triggle_foreground_flush[]; extern const char force_set_segment_ingest_packs_fail[]; extern const char segment_merge_after_ingest_packs[]; extern const char random_exception_after_dt_write_done[]; +extern const char force_slow_page_storage_snapshot_release[]; } // namespace FailPoints namespace DM @@ -1036,11 +1037,20 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context, auto segment_snap = segment->createSnapshot(*dm_context, false, CurrentMetrics::DT_SnapshotOfReadRaw); if (unlikely(!segment_snap)) throw Exception("Failed to get segment snap", ErrorCodes::LOGICAL_ERROR); + tasks.push_back(std::make_shared(segment, segment_snap, RowKeyRanges{segment->getRowKeyRange()})); } } } + fiu_do_on(FailPoints::force_slow_page_storage_snapshot_release, { + std::thread thread_hold_snapshots([tasks]() { + std::this_thread::sleep_for(std::chrono::seconds(5 * 60)); + (void)tasks; + }); + thread_hold_snapshots.detach(); + }); + auto after_segment_read = [&](const DMContextPtr & dm_context_, const SegmentPtr & segment_) { this->checkSegmentUpdate(dm_context_, segment_, ThreadType::Read); }; diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 6919e8081bd..0b7fb1669ff 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -978,6 +978,7 @@ void BlobStore::BlobStats::restore() for (auto & [path, stats] : stats_map) { + (void)path; for (const auto & stat : stats) { stat->recalculateSpaceMap(); From bd50b0efc629a30959c9fe908849084a891077b3 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Fri, 15 Apr 2022 17:48:35 +0800 Subject: [PATCH 22/79] Interpreter: Hand write tipb::Executor. (#4632) ref pingcap/tiflash#4609 --- dbms/src/Debug/astToExecutor.cpp | 3 - dbms/src/TestUtils/InterpreterTestUtils.cpp | 82 ++++++ dbms/src/TestUtils/InterpreterTestUtils.h | 73 +++++ dbms/src/TestUtils/mockExecutor.cpp | 259 ++++++++++++++++++ dbms/src/TestUtils/mockExecutor.h | 137 +++++++++ .../TestUtils/tests/gtest_mock_executors.cpp | 164 +++++++++++ 6 files changed, 715 insertions(+), 3 deletions(-) create mode 100644 dbms/src/TestUtils/InterpreterTestUtils.cpp create mode 100644 dbms/src/TestUtils/InterpreterTestUtils.h create mode 100644 dbms/src/TestUtils/mockExecutor.cpp create mode 100644 dbms/src/TestUtils/mockExecutor.h create mode 100644 dbms/src/TestUtils/tests/gtest_mock_executors.cpp diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index 5f7567f0eff..11b90e60fb9 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -29,9 +29,6 @@ #include #include -#include -#include - namespace DB { namespace diff --git a/dbms/src/TestUtils/InterpreterTestUtils.cpp b/dbms/src/TestUtils/InterpreterTestUtils.cpp new file mode 100644 index 00000000000..52ff5e1cb08 --- /dev/null +++ b/dbms/src/TestUtils/InterpreterTestUtils.cpp @@ -0,0 +1,82 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB::tests +{ +namespace +{ +String toTreeString(const tipb::Executor & root_executor, size_t level = 0); + +// serialize tipb::DAGRequest, print the executor name in a Tree format. +String toTreeString(std::shared_ptr dag_request) +{ + assert((dag_request->executors_size() > 0) != dag_request->has_root_executor()); + if (dag_request->has_root_executor()) + { + return toTreeString(dag_request->root_executor()); + } + else + { + FmtBuffer buffer; + String prefix; + traverseExecutors(dag_request.get(), [&buffer, &prefix](const tipb::Executor & executor) { + assert(executor.has_executor_id()); + buffer.fmtAppend("{}{}\n", prefix, executor.executor_id()); + prefix.append(" "); + return true; + }); + return buffer.toString(); + } +} + +String toTreeString(const tipb::Executor & root_executor, size_t level) +{ + FmtBuffer buffer; + + auto append_str = [&buffer, &level](const tipb::Executor & executor) { + assert(executor.has_executor_id()); + buffer.append(String(level, ' ')); + buffer.append(executor.executor_id()).append("\n"); + }; + + traverseExecutorTree(root_executor, [&](const tipb::Executor & executor) { + if (executor.has_join()) + { + append_str(executor); + ++level; + for (const auto & child : executor.join().children()) + buffer.append(toTreeString(child, level)); + return false; + } + else + { + append_str(executor); + ++level; + return true; + } + }); + + return buffer.toString(); +} +} // namespace + +void dagRequestEqual(String & expected_string, const std::shared_ptr & actual) +{ + String actual_string = toTreeString(actual); + ASSERT_EQ(Poco::trimInPlace(expected_string), Poco::trimInPlace(actual_string)); +} + +} // namespace DB::tests diff --git a/dbms/src/TestUtils/InterpreterTestUtils.h b/dbms/src/TestUtils/InterpreterTestUtils.h new file mode 100644 index 00000000000..e68bbe8ab47 --- /dev/null +++ b/dbms/src/TestUtils/InterpreterTestUtils.h @@ -0,0 +1,73 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB::tests +{ +void dagRequestEqual(String & expected_string, const std::shared_ptr & actual); +class MockExecutorTest : public ::testing::Test +{ +protected: + void SetUp() override + { + initializeContext(); + } + +public: + MockExecutorTest() + : context(TiFlashTestEnv::getContext()) + {} + + static void SetUpTestCase() + { + try + { + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } + } + + virtual void initializeContext() + { + dag_context_ptr = std::make_unique(1024); + context.setDAGContext(dag_context_ptr.get()); + mock_dag_request_context = MockDAGRequestContext(); + } + + DAGContext & getDAGContext() + { + assert(dag_context_ptr != nullptr); + return *dag_context_ptr; + } + +protected: + Context context; + MockDAGRequestContext mock_dag_request_context; + std::unique_ptr dag_context_ptr; +}; + +#define ASSERT_DAGREQUEST_EQAUL(str, request) dagRequestEqual(str, request); +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp new file mode 100644 index 00000000000..8295d161753 --- /dev/null +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -0,0 +1,259 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB::tests +{ +ASTPtr buildColumn(const String & column_name) +{ + return std::make_shared(column_name); +} + +ASTPtr buildLiteral(const Field & field) +{ + return std::make_shared(field); +} + +ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) +{ + std::vector vec; + for (auto item : order_by_items) + { + int direction = item.second ? 1 : -1; + ASTPtr locale_node; + auto order_by_item = std::make_shared(direction, direction, false, locale_node); + order_by_item->children.push_back(std::make_shared(item.first)); + vec.push_back(order_by_item); + } + auto exp_list = std::make_shared(); + exp_list->children.insert(exp_list->children.end(), vec.begin(), vec.end()); + return exp_list; +} + +// a mock DAGRequest should prepare its time_zone, flags, encode_type and output_schema. +void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request) +{ + dag_request.set_time_zone_name(properties.tz_name); + dag_request.set_time_zone_offset(properties.tz_offset); + dag_request.set_flags(dag_request.flags() | (1u << 1u /* TRUNCATE_AS_WARNING */) | (1u << 6u /* OVERFLOW_AS_WARNING */)); + + if (properties.encode_type == "chunk") + dag_request.set_encode_type(tipb::EncodeType::TypeChunk); + else if (properties.encode_type == "chblock") + dag_request.set_encode_type(tipb::EncodeType::TypeCHBlock); + else + dag_request.set_encode_type(tipb::EncodeType::TypeDefault); + + for (size_t i = 0; i < root->output_schema.size(); ++i) + dag_request.add_output_offsets(i); +} + +// traval the AST tree to build tipb::Executor recursively. +std::shared_ptr DAGRequestBuilder::build(Context & context) +{ + MPPInfo mpp_info(properties.start_ts, -1, -1, {}, {}); + std::shared_ptr dag_request_ptr = std::make_shared(); + tipb::DAGRequest & dag_request = *dag_request_ptr; + initDAGRequest(dag_request); + root->toTiPBExecutor(dag_request.mutable_root_executor(), properties.collator, mpp_info, context); + root.reset(); + executor_index = 0; + return dag_request_ptr; +} + +DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String & table, const MockColumnInfos & columns) +{ + assert(!columns.empty()); + TableInfo table_info; + table_info.name = db + "." + table; + for (const auto & column : columns) + { + TiDB::ColumnInfo ret; + ret.tp = column.second; + ret.name = column.first; + table_info.columns.push_back(std::move(ret)); + } + String empty_alias; + root = compileTableScan(getExecutorIndex(), table_info, empty_alias, false); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const std::vector> & columns) +{ + return mockTable(name.first, name.second, columns); +} + +DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfoList & columns) +{ + return mockTable(name.first, name.second, columns); +} + +DAGRequestBuilder & DAGRequestBuilder::filter(ASTPtr filter_expr) +{ + assert(root); + root = compileSelection(root, getExecutorIndex(), filter_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::limit(int limit) +{ + assert(root); + root = compileLimit(root, getExecutorIndex(), buildLiteral(Field(static_cast(limit)))); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::limit(ASTPtr limit_expr) +{ + assert(root); + root = compileLimit(root, getExecutorIndex(), limit_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::topN(ASTPtr order_exprs, ASTPtr limit_expr) +{ + assert(root); + root = compileTopN(root, getExecutorIndex(), order_exprs, limit_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::topN(const String & col_name, bool desc, int limit) +{ + assert(root); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemList({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, int limit) +{ + return topN(order_by_items, buildLiteral(Field(static_cast(limit)))); +} + +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, ASTPtr limit_expr) +{ + assert(root); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemList(order_by_items), limit_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::project(const String & col_name) +{ + assert(root); + root = compileProject(root, getExecutorIndex(), buildColumn(col_name)); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::project(MockAsts exprs) +{ + assert(root); + auto exp_list = std::make_shared(); + for (const auto & expr : exprs) + { + exp_list->children.push_back(expr); + } + root = compileProject(root, getExecutorIndex(), exp_list); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNames col_names) +{ + assert(root); + auto exp_list = std::make_shared(); + for (const auto & name : col_names) + { + exp_list->children.push_back(col(name)); + } + + root = compileProject(root, getExecutorIndex(), exp_list); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, ASTPtr using_expr_list) +{ + return join(right, using_expr_list, ASTTableJoin::Kind::Inner); +} + +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, ASTPtr using_expr_list, ASTTableJoin::Kind kind) +{ + assert(root); + assert(right.root); + auto join_ast = std::make_shared(); + join_ast->using_expression_list = using_expr_list; + join_ast->strictness = ASTTableJoin::Strictness::All; + join_ast->kind = kind; + root = compileJoin(getExecutorIndex(), root, right.root, join_ast); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::aggregation(ASTPtr agg_func, ASTPtr group_by_expr) +{ + auto agg_funcs = std::make_shared(); + auto group_by_exprs = std::make_shared(); + agg_funcs->children.push_back(agg_func); + group_by_exprs->children.push_back(group_by_expr); + return buildAggregation(agg_funcs, group_by_exprs); +} + +DAGRequestBuilder & DAGRequestBuilder::aggregation(MockAsts agg_funcs, MockAsts group_by_exprs) +{ + auto agg_func_list = std::make_shared(); + auto group_by_expr_list = std::make_shared(); + for (const auto & func : agg_funcs) + agg_func_list->children.push_back(func); + for (const auto & group_by : group_by_exprs) + group_by_expr_list->children.push_back(group_by); + + return buildAggregation(agg_func_list, group_by_expr_list); +} + +DAGRequestBuilder & DAGRequestBuilder::buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs) +{ + assert(root); + root = compileAggregation(root, getExecutorIndex(), agg_funcs, group_by_exprs); + return *this; +} + + +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columns) +{ + std::vector v_column_info; + for (const auto & info : columns) + { + v_column_info.push_back(std::move(info)); + } + mock_tables[name.first + "." + name.second] = v_column_info; +} + +void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfos & columns) +{ + mock_tables[db + "." + table] = columns; +} + +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfos & columns) +{ + mock_tables[name.first + "." + name.second] = columns; +} + +DAGRequestBuilder MockDAGRequestContext::scan(String db_name, String table_name) +{ + return DAGRequestBuilder(index).mockTable({db_name, table_name}, mock_tables[db_name + "." + table_name]); +} + +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h new file mode 100644 index 00000000000..24d2df21f4a --- /dev/null +++ b/dbms/src/TestUtils/mockExecutor.h @@ -0,0 +1,137 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include +#include + +namespace DB::tests +{ +using MockColumnInfo = std::pair; +using MockColumnInfos = std::vector; +using MockColumnInfoList = std::initializer_list; +using MockTableName = std::pair; +using MockOrderByItem = std::pair; +using MockOrderByItems = std::initializer_list; +using MockColumnNames = std::initializer_list; +using MockAsts = std::initializer_list; + + +/** Responsible for Hand write tipb::DAGRequest + * Use this class to mock DAGRequest, then feed the DAGRequest into + * the Interpreter for test purpose. + * The mockTable() method must called first in order to generate the table schema. + * After construct all necessary operators in DAGRequest, call build() to generate DAGRequest。 + */ +class DAGRequestBuilder +{ +public: + size_t & executor_index; + + size_t & getExecutorIndex() const + { + return executor_index; + } + + explicit DAGRequestBuilder(size_t & index) + : executor_index(index) + {} + + std::shared_ptr build(Context & context); + + DAGRequestBuilder & mockTable(const String & db, const String & table, const MockColumnInfos & columns); + DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfos & columns); + DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfoList & columns); + + DAGRequestBuilder & filter(ASTPtr filter_expr); + + DAGRequestBuilder & limit(int limit); + DAGRequestBuilder & limit(ASTPtr limit_expr); + + DAGRequestBuilder & topN(ASTPtr order_exprs, ASTPtr limit_expr); + DAGRequestBuilder & topN(const String & col_name, bool desc, int limit); + DAGRequestBuilder & topN(MockOrderByItems order_by_items, int limit); + DAGRequestBuilder & topN(MockOrderByItems order_by_items, ASTPtr limit_expr); + + DAGRequestBuilder & project(const String & col_name); + DAGRequestBuilder & project(MockAsts expr); + DAGRequestBuilder & project(MockColumnNames col_names); + + // Currentlt only support inner join, left join and right join. + // TODO support more types of join. + DAGRequestBuilder & join(const DAGRequestBuilder & right, ASTPtr using_expr_list); + DAGRequestBuilder & join(const DAGRequestBuilder & right, ASTPtr using_expr_list, ASTTableJoin::Kind kind); + + // aggregation + DAGRequestBuilder & aggregation(ASTPtr agg_func, ASTPtr group_by_expr); + DAGRequestBuilder & aggregation(MockAsts agg_funcs, MockAsts group_by_exprs); + +private: + void initDAGRequest(tipb::DAGRequest & dag_request); + DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs); + + ExecutorPtr root; + DAGProperties properties; +}; + +/** Responsible for storing necessary arguments in order to Mock DAGRequest + * index: used in DAGRequestBuilder to identify executors + * mock_tables: DAGRequestBuilder uses it to mock TableScan executors + */ +class MockDAGRequestContext +{ +public: + MockDAGRequestContext() + { + index = 0; + } + + DAGRequestBuilder createDAGRequestBuilder() + { + return DAGRequestBuilder(index); + } + + void addMockTable(const MockTableName & name, const MockColumnInfoList & columns); + void addMockTable(const String & db, const String & table, const MockColumnInfos & columns); + void addMockTable(const MockTableName & name, const MockColumnInfos & columns); + + DAGRequestBuilder scan(String db_name, String table_name); + +private: + size_t index; + std::unordered_map mock_tables; +}; + +ASTPtr buildColumn(const String & column_name); +ASTPtr buildLiteral(const Field & field); +ASTPtr buildFunction(MockAsts exprs, const String & name); +ASTPtr buildOrderByItemList(MockOrderByItems order_by_items); + +#define col(name) buildColumn((name)) +#define lit(field) buildLiteral((field)) +#define eq(expr1, expr2) makeASTFunction("equals", (expr1), (expr2)) +#define Not_eq(expr1, expr2) makeASTFunction("notEquals", (expr1), (expr2)) +#define lt(expr1, expr2) makeASTFunction("less", (expr1), (expr2)) +#define gt(expr1, expr2) makeASTFunction("greater", (expr1), (expr2)) +#define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2)) +#define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2)) +#define NOT(expr) makeASTFunction("not", (expr1), (expr2)) +#define Max(expr) makeASTFunction("max", expr) + +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp new file mode 100644 index 00000000000..2be63311034 --- /dev/null +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -0,0 +1,164 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB +{ +namespace tests +{ +class MockDAGRequestTest : public DB::tests::MockExecutorTest +{ +public: + void initializeContext() override + { + dag_context_ptr = std::make_unique(1024); + context.setDAGContext(dag_context_ptr.get()); + mock_dag_request_context = MockDAGRequestContext(); + mock_dag_request_context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); + mock_dag_request_context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeLong}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); + mock_dag_request_context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"r_c", TiDB::TP::TypeString}}); + mock_dag_request_context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"l_c", TiDB::TP::TypeString}}); + } +}; + +TEST_F(MockDAGRequestTest, MockTable) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table").build(context); + String expected_string_1 = "table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_1, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1").build(context); + String expected_string_2 = "table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Filter) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table").filter(eq(col("s1"), col("s2"))).build(context); + String expected_string = "selection_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .filter(And(eq(col("s1"), col("s2")), lt(col("s2"), col("s3")))) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Projection) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .project("s1") + .build(context); + String expected_string = "project_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .project({col("s3"), eq(col("s1"), col("s2"))}) + .build(context); + String expected_string_2 = "project_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .project({"s1", "s2"}) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Limit) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .limit(10) + .build(context); + String expected_string = "limit_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .limit(lit(Field(static_cast(10)))) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, TopN) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .topN({{"s1", false}}, 10) + .build(context); + String expected_string = "topn_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table") + .topN("s1", false, 10) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Aggregation) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .aggregation(Max(col("s1")), col("s2")) + .build(context); + String expected_string = "aggregation_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Join) +try +{ + DAGRequestBuilder right_builder = mock_dag_request_context.scan("test_db", "r_table") + .filter(eq(col("r_a"), col("r_b"))) + .project({col("r_a"), col("r_b")}) + .aggregation(Max(col("r_a")), col("r_b")); + + + DAGRequestBuilder left_builder = mock_dag_request_context.scan("test_db", "l_table") + .topN({{"l_a", false}}, 10) + .join(right_builder, col("l_a"), ASTTableJoin::Kind::Left) + .limit(10); + + auto request = left_builder.build(context); + String expected_string = "limit_7\n" + " Join_6\n" + " topn_5\n" + " table_scan_4\n" + " aggregation_3\n" + " project_2\n" + " selection_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +} // namespace tests +} // namespace DB \ No newline at end of file From 66f45c76692e941bc845c01349ea89de0f2cc210 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Mon, 18 Apr 2022 10:48:03 +0800 Subject: [PATCH 23/79] refine `SubqueryForSet` (#4623) ref pingcap/tiflash#4118 --- .../CreatingSetsBlockInputStream.h | 2 +- dbms/src/Flash/Coprocessor/DAGContext.cpp | 7 +++ dbms/src/Flash/Coprocessor/DAGContext.h | 8 +++ .../Coprocessor/DAGQueryBlockInterpreter.cpp | 9 +-- .../Coprocessor/DAGQueryBlockInterpreter.h | 5 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 30 +++++----- dbms/src/Flash/Coprocessor/InterpreterDAG.h | 13 +---- dbms/src/Interpreters/ExpressionAnalyzer.h | 34 +---------- dbms/src/Interpreters/SubqueryForSet.h | 57 +++++++++++++++++++ 9 files changed, 96 insertions(+), 69 deletions(-) create mode 100644 dbms/src/Interpreters/SubqueryForSet.h diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h index 2f9ad61e4c8..b8e2ee6fe87 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h @@ -17,7 +17,7 @@ #include #include #include -#include /// SubqueriesForSets +#include namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index 1f6618d3170..17fb6553eab 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -63,6 +63,13 @@ bool DAGContext::allowInvalidDate() const return sql_mode & TiDBSQLMode::ALLOW_INVALID_DATES; } +void DAGContext::addSubquery(const String & subquery_id, SubqueryForSet && subquery) +{ + SubqueriesForSets subqueries_for_sets; + subqueries_for_sets[subquery_id] = std::move(subquery); + subqueries.push_back(std::move(subqueries_for_sets)); +} + std::unordered_map & DAGContext::getProfileStreamsMap() { return profile_streams_map; diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 30397dc496a..18ad73ec207 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -29,6 +29,7 @@ #include #include #include +#include #include namespace DB @@ -279,6 +280,10 @@ class DAGContext void initExchangeReceiverIfMPP(Context & context, size_t max_streams); const std::unordered_map> & getMPPExchangeReceiverMap() const; + void addSubquery(const String & subquery_id, SubqueryForSet && subquery); + bool hasSubquery() const { return !subqueries.empty(); } + std::vector && moveSubqueries() { return std::move(subqueries); } + const tipb::DAGRequest * dag_request; Int64 compile_time_ns = 0; size_t final_concurrency = 1; @@ -337,6 +342,9 @@ class DAGContext /// key: executor_id of ExchangeReceiver nodes in dag. std::unordered_map> mpp_exchange_receiver_map; bool mpp_exchange_receiver_map_inited = false; + /// vector of SubqueriesForSets(such as join build subquery). + /// The order of the vector is also the order of the subquery. + std::vector subqueries; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 51cd1bf671f..b4832ff4f17 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -56,13 +56,11 @@ DAGQueryBlockInterpreter::DAGQueryBlockInterpreter( Context & context_, const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, - size_t max_streams_, - std::vector & subqueries_for_sets_) + size_t max_streams_) : context(context_) , input_streams_vec(input_streams_vec_) , query_block(query_block_) , max_streams(max_streams_) - , subqueries_for_sets(subqueries_for_sets_) , log(Logger::get("DAGQueryBlockInterpreter", dagContext().log ? dagContext().log->identifier() : "")) {} @@ -1023,10 +1021,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) SubqueryForSet right_query; handleJoin(query_block.source->join(), pipeline, right_query); recordProfileStreams(pipeline, query_block.source_name); - - SubqueriesForSets subquries; - subquries[query_block.source_name] = right_query; - subqueries_for_sets.emplace_back(subquries); + dagContext().addSubquery(query_block.source_name, std::move(right_query)); } else if (query_block.source->tp() == tipb::ExecType::TypeExchangeReceiver) { diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 35627cd19ee..b681d22188c 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -47,8 +47,7 @@ class DAGQueryBlockInterpreter Context & context_, const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, - size_t max_streams_, - std::vector & subqueries_for_sets_); + size_t max_streams_); ~DAGQueryBlockInterpreter() = default; @@ -117,8 +116,6 @@ class DAGQueryBlockInterpreter std::unique_ptr analyzer; - std::vector & subqueries_for_sets; - LoggerPtr log; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 1bfe87e5695..4c67d67e4f9 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -13,10 +13,11 @@ // limitations under the License. #include -#include +#include +#include #include #include -#include +#include namespace DB { @@ -35,23 +36,27 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) } } +DAGContext & InterpreterDAG::dagContext() const +{ + return *context.getDAGContext(); +} + /** executeQueryBlock recursively converts all the children of the DAGQueryBlock and itself (Coprocessor DAG request) * into an array of IBlockInputStream (element of physical executing plan of TiFlash) */ -BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block, std::vector & subqueries_for_sets) +BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) { std::vector input_streams_vec; for (auto & child : query_block.children) { - BlockInputStreams child_streams = executeQueryBlock(*child, subqueries_for_sets); + BlockInputStreams child_streams = executeQueryBlock(*child); input_streams_vec.push_back(child_streams); } DAGQueryBlockInterpreter query_block_interpreter( context, input_streams_vec, query_block, - max_streams, - subqueries_for_sets); + max_streams); return query_block_interpreter.execute(); } @@ -60,26 +65,23 @@ BlockIO InterpreterDAG::execute() /// Due to learner read, DAGQueryBlockInterpreter may take a long time to build /// the query plan, so we init mpp exchange receiver before executeQueryBlock dagContext().initExchangeReceiverIfMPP(context, max_streams); - /// region_info should base on the source executor, however - /// tidb does not support multi-table dag request yet, so - /// it is ok to use the same region_info for the whole dag request - std::vector subqueries_for_sets; - BlockInputStreams streams = executeQueryBlock(*dag.getRootQueryBlock(), subqueries_for_sets); + + BlockInputStreams streams = executeQueryBlock(*dag.getRootQueryBlock()); DAGPipeline pipeline; pipeline.streams = streams; /// add union to run in parallel if needed - if (context.getDAGContext()->isMPPTask()) + if (dagContext().isMPPTask()) /// MPPTask do not need the returned blocks. executeUnion(pipeline, max_streams, dagContext().log, /*ignore_block=*/true); else executeUnion(pipeline, max_streams, dagContext().log); - if (!subqueries_for_sets.empty()) + if (dagContext().hasSubquery()) { const Settings & settings = context.getSettingsRef(); pipeline.firstStream() = std::make_shared( pipeline.firstStream(), - std::move(subqueries_for_sets), + std::move(dagContext().moveSubqueries()), SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), dagContext().log->identifier()); } diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.h b/dbms/src/Flash/Coprocessor/InterpreterDAG.h index 46b995ef9a6..40f7d8c62cf 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.h +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.h @@ -21,16 +21,9 @@ #pragma GCC diagnostic pop #include -#include -#include +#include #include -#include -#include -#include #include -#include -#include -#include namespace DB { @@ -50,9 +43,9 @@ class InterpreterDAG : public IInterpreter BlockIO execute() override; private: - BlockInputStreams executeQueryBlock(DAGQueryBlock & query_block, std::vector & subqueries_for_sets); + BlockInputStreams executeQueryBlock(DAGQueryBlock & query_block); - DAGContext & dagContext() const { return *context.getDAGContext(); } + DAGContext & dagContext() const; Context & context; const DAGQuerySource & dag; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index fb8bea20a8a..3558b0ffc90 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -26,21 +27,8 @@ class Context; class ExpressionActions; struct ExpressionActionsChain; -class Join; -using JoinPtr = std::shared_ptr; - -class IAST; -using ASTPtr = std::shared_ptr; - -class Set; -using SetPtr = std::shared_ptr; using PreparedSets = std::unordered_map; -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr; - -class IStorage; -using StoragePtr = std::shared_ptr; using Tables = std::map; class ASTFunction; @@ -48,26 +36,6 @@ class ASTExpressionList; class ASTSelectQuery; -/** Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. - */ -struct SubqueryForSet -{ - /// The source is obtained using the InterpreterSelectQuery subquery. - BlockInputStreamPtr source; - - /// If set, build it from result. - SetPtr set; - JoinPtr join; - - /// If set, put the result into the table. - /// This is a temporary table for transferring to remote servers for distributed query processing. - StoragePtr table; -}; - -/// ID of subquery -> what to do with it. -using SubqueriesForSets = std::unordered_map; - - /** Transforms an expression from a syntax tree into a sequence of actions to execute it. * * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. diff --git a/dbms/src/Interpreters/SubqueryForSet.h b/dbms/src/Interpreters/SubqueryForSet.h new file mode 100644 index 00000000000..b3c45e948e1 --- /dev/null +++ b/dbms/src/Interpreters/SubqueryForSet.h @@ -0,0 +1,57 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include + +namespace DB +{ +class Join; +using JoinPtr = std::shared_ptr; + +class IAST; +using ASTPtr = std::shared_ptr; + +class Set; +using SetPtr = std::shared_ptr; + +class IBlockInputStream; +using BlockInputStreamPtr = std::shared_ptr; + +class IStorage; +using StoragePtr = std::shared_ptr; + +/** Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. + */ +struct SubqueryForSet +{ + /// The source is obtained using the InterpreterSelectQuery subquery. + BlockInputStreamPtr source; + + /// If set, build it from result. + SetPtr set; + JoinPtr join; + + /// If set, put the result into the table. + /// This is a temporary table for transferring to remote servers for distributed query processing. + StoragePtr table; +}; + +/// ID of subquery -> what to do with it. +using SubqueriesForSets = std::unordered_map; +} // namespace DB From 605ddc0ebe68f90503877e90e4f676914a2e4055 Mon Sep 17 00:00:00 2001 From: yibin Date: Mon, 18 Apr 2022 15:34:03 +0800 Subject: [PATCH 24/79] Add gtests for MPPTunnel (#4553) close pingcap/tiflash#4481 --- dbms/src/Flash/Mpp/MPPTunnel.cpp | 22 + dbms/src/Flash/Mpp/MPPTunnel.h | 17 + dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp | 630 +++++++++++++++++++ 3 files changed, 669 insertions(+) create mode 100644 dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index 78d7312d919..6db39e61586 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -51,6 +51,28 @@ MPPTunnelBase::MPPTunnelBase( GET_METRIC(tiflash_object_count, type_count_of_mpptunnel).Increment(); } +template +MPPTunnelBase::MPPTunnelBase( + const String & tunnel_id_, + const std::chrono::seconds timeout_, + int input_steams_num_, + bool is_local_, + bool is_async_, + const String & req_id) + : connected(false) + , finished(false) + , is_local(is_local_) + , is_async(is_async_) + , timeout(timeout_) + , tunnel_id(tunnel_id_) + , input_streams_num(input_steams_num_) + , send_queue(std::max(5, input_steams_num_ * 5)) // MPMCQueue can benefit from a slightly larger queue size + , thread_manager(newThreadManager()) + , log(Logger::get("MPPTunnel", req_id, tunnel_id)) +{ + RUNTIME_ASSERT(!(is_local && is_async), log, "is_local: {}, is_async: {}.", is_local, is_async); +} + template MPPTunnelBase::~MPPTunnelBase() { diff --git a/dbms/src/Flash/Mpp/MPPTunnel.h b/dbms/src/Flash/Mpp/MPPTunnel.h index be2fbadfd38..bdc60a97f5a 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.h +++ b/dbms/src/Flash/Mpp/MPPTunnel.h @@ -42,6 +42,12 @@ namespace DB { +namespace tests +{ +class MPPTunnelTest; +class TestMPPTunnelBase; +} // namespace tests + class EstablishCallData; /** @@ -123,6 +129,17 @@ class MPPTunnelBase : private boost::noncopyable void sendJob(bool need_lock = true); private: + friend class tests::MPPTunnelTest; + friend class tests::TestMPPTunnelBase; + // For gtest usage + MPPTunnelBase( + const String & tunnel_id_, + std::chrono::seconds timeout_, + int input_steams_num_, + bool is_local_, + bool is_async_, + const String & req_id); + void finishSendQueue(); void waitUntilConnectedOrFinished(std::unique_lock & lk); diff --git a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp new file mode 100644 index 00000000000..133142cc867 --- /dev/null +++ b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp @@ -0,0 +1,630 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ +class MPPTunnelTest : public MPPTunnelBase +{ +public: + using Base = MPPTunnelBase; + using Base::Base; + MPPTunnelTest( + const String & tunnel_id_, + std::chrono::seconds timeout_, + int input_steams_num_, + bool is_local_, + bool is_async_, + const String & req_id) + : Base(tunnel_id_, timeout_, input_steams_num_, is_local_, is_async_, req_id) + {} + void setFinishFlag(bool flag) + { + finished = flag; + } + bool getFinishFlag() + { + return finished; + } + bool getConnectFlag() + { + return connected; + } + std::shared_ptr getThreadManager() + { + return thread_manager; + } +}; + +using MPPTunnelTestPtr = std::shared_ptr; + +class MockWriter : public PacketWriter +{ + bool write(const mpp::MPPDataPacket & packet) override + { + write_packet_vec.push_back(packet.data()); + return true; + } + +public: + std::vector write_packet_vec; +}; + +class MockFailedWriter : public PacketWriter +{ + bool write(const mpp::MPPDataPacket &) override + { + return false; + } +}; + +struct MockLocalReader +{ + MPPTunnelTestPtr tunnel; + std::vector write_packet_vec; + + explicit MockLocalReader(const MPPTunnelTestPtr & tunnel_) + : tunnel(tunnel_) + {} + + ~MockLocalReader() + { + if (tunnel) + { + // In case that ExchangeReceiver throw error before finish reading from mpp_tunnel + tunnel->consumerFinish("Receiver closed"); + } + } + + void read() + { + while (true) + { + MPPDataPacketPtr tmp_packet = tunnel->readForLocal(); + bool success = tmp_packet != nullptr; + if (success) + { + write_packet_vec.push_back(tmp_packet->data()); + } + else + { + break; + } + } + } +}; +using MockLocalReaderPtr = std::shared_ptr; + +struct MockTerminateLocalReader +{ + MPPTunnelTestPtr tunnel; + + explicit MockTerminateLocalReader(const MPPTunnelTestPtr & tunnel_) + : tunnel(tunnel_) + {} + + ~MockTerminateLocalReader() + { + if (tunnel) + { + // In case that ExchangeReceiver throw error before finish reading from mpp_tunnel + tunnel->consumerFinish("Receiver closed"); + } + } + + void read() const + { + MPPDataPacketPtr tmp_packet = tunnel->readForLocal(); + tunnel->consumerFinish("Receiver closed"); + } +}; +using MockTerminateLocalReaderPtr = std::shared_ptr; + + +class MockAsyncWriter : public PacketWriter +{ +public: + explicit MockAsyncWriter(MPPTunnelTestPtr tunnel_) + : tunnel(tunnel_) + {} + bool write(const mpp::MPPDataPacket & packet) override + { + write_packet_vec.push_back(packet.data()); + // Simulate the async process, write success then check if exist msg, then write again + if (tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + return true; + } + + void tryFlushOne() override + { + if (ready && tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + ready = true; + } + MPPTunnelTestPtr tunnel; + std::vector write_packet_vec; + bool ready = false; +}; + +class MockFailedAsyncWriter : public PacketWriter +{ +public: + explicit MockFailedAsyncWriter(MPPTunnelTestPtr tunnel_) + : tunnel(tunnel_) + {} + bool write(const mpp::MPPDataPacket & packet) override + { + write_packet_vec.push_back(packet.data()); + // Simulate the async process, write success then check if exist msg, then write again + if (tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + return false; + } + + void tryFlushOne() override + { + if (ready && tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + ready = true; + } + MPPTunnelTestPtr tunnel; + std::vector write_packet_vec; + bool ready = false; +}; + +class TestMPPTunnelBase : public testing::Test +{ +protected: + virtual void SetUp() override { timeout = std::chrono::seconds(10); } + virtual void TearDown() override {} + std::chrono::seconds timeout; + +public: + MPPTunnelTestPtr constructRemoteSyncTunnel() + { + auto tunnel = std::make_shared(String("0000_0001"), timeout, 2, false, false, String("0")); + return tunnel; + } + + MPPTunnelTestPtr constructLocalSyncTunnel() + { + auto tunnel = std::make_shared(String("0000_0001"), timeout, 2, true, false, String("0")); + return tunnel; + } + + static MockLocalReaderPtr connectLocalSyncTunnel(MPPTunnelTestPtr mpp_tunnel_ptr) + { + mpp_tunnel_ptr->connect(nullptr); + MockLocalReaderPtr local_reader_ptr = std::make_shared(mpp_tunnel_ptr); + mpp_tunnel_ptr->getThreadManager()->schedule(true, "LocalReader", [local_reader_ptr] { + local_reader_ptr->read(); + }); + return local_reader_ptr; + } + + MPPTunnelTestPtr constructRemoteAsyncTunnel() + { + auto tunnel = std::make_shared(String("0000_0001"), timeout, 2, false, true, String("0")); + return tunnel; + } +}; + +TEST_F(TestMPPTunnelBase, ConnectWhenFinished) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + mpp_tunnel_ptr->setFinishFlag(true); + mpp_tunnel_ptr->connect(nullptr); + GTEST_FAIL(); +} +catch (Exception & e) +{ + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has finished"); +} + +TEST_F(TestMPPTunnelBase, ConnectWhenConnected) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has connected"); + } +} + +TEST_F(TestMPPTunnelBase, CloseBeforeConnect) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), false); +} +CATCH + +TEST_F(TestMPPTunnelBase, CloseAfterClose) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConnectWriteCancel) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->close("Cancel"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 2); //Second for err msg + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConnectWriteWithCloseFlag) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr, true); + mpp_tunnel_ptr->waitForFinish(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConnectWriteWriteDone) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->writeDone(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConsumerFinish) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->consumerFinish(""); + mpp_tunnel_ptr->getThreadManager()->wait(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, WriteError) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + } +} + +TEST_F(TestMPPTunnelBase, WriteAfterFinished) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->close("Canceled"); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "write to tunnel which is already closed,"); + } +} + +/// Test Local MPPTunnel +TEST_F(TestMPPTunnelBase, LocalConnectWhenFinished) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->setFinishFlag(true); + mpp_tunnel_ptr->connect(nullptr); + GTEST_FAIL(); +} +catch (Exception & e) +{ + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has finished"); +} + +TEST_F(TestMPPTunnelBase, LocalConnectWhenConnected) +{ + try + { + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->connect(nullptr); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has connected"); + } +} + +TEST_F(TestMPPTunnelBase, LocalCloseBeforeConnect) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), false); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalCloseAfterClose) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalConnectWriteCancel) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->close("Cancel"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 2); //Second for err msg + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalConnectWriteWriteDone) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->writeDone(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalConsumerFinish) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->consumerFinish(""); + mpp_tunnel_ptr->getThreadManager()->wait(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalReadTerminate) +{ + try + { + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->connect(nullptr); + MockTerminateLocalReaderPtr local_reader_ptr = std::make_shared(mpp_tunnel_ptr); + mpp_tunnel_ptr->getThreadManager()->schedule(true, "LocalReader", [local_reader_ptr] { + local_reader_ptr->read(); + }); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, Receiver closed"); + } +} + +TEST_F(TestMPPTunnelBase, LocalWriteAfterFinished) +{ + try + { + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->close(""); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "write to tunnel which is already closed,"); + } +} + +/// Test Async MPPTunnel +TEST_F(TestMPPTunnelBase, AsyncConnectWriteCancel) +try +{ + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + data_packet_ptr->set_data("Second"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->close("Cancel"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec.size(), 3); //Third for err msg + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec[0], "First"); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec[1], "Second"); +} +CATCH + +TEST_F(TestMPPTunnelBase, AsyncConnectWriteWriteDone) +try +{ + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->writeDone(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, AsyncConsumerFinish) +try +{ + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->consumerFinish(""); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec.size(), 0); +} +CATCH + +TEST_F(TestMPPTunnelBase, AsyncWriteError) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + data_packet_ptr->set_data("Second"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + } +} + +} // namespace tests +} // namespace DB From cf8ab9567759409b9f4630282a0a008087d16157 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Mon, 18 Apr 2022 18:20:03 +0800 Subject: [PATCH 25/79] Fix background_pool_size not take effect and BackgroundProcessingPool::getThreadIds may misses some thread_ids. (#4686) close pingcap/tiflash#4684, ref pingcap/tiflash#4685 --- dbms/CMakeLists.txt | 1 + dbms/src/Encryption/RateLimiter.cpp | 1 + dbms/src/Interpreters/Context.cpp | 6 ++--- dbms/src/Interpreters/Context.h | 2 +- dbms/src/Server/Server.cpp | 25 ++++++++++------- .../src/Storages/BackgroundProcessingPool.cpp | 27 +++++++++++++++---- dbms/src/Storages/BackgroundProcessingPool.h | 2 ++ libs/libcommon/include/common/logger_useful.h | 1 + 8 files changed, 47 insertions(+), 18 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 598f0c8d001..91f9aeb93c6 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -206,6 +206,7 @@ target_link_libraries (dbms ${RE2_ST_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY} ${BTRIE_LIBRARIES} + absl::synchronization ) if (NOT USE_INTERNAL_RE2_LIBRARY) diff --git a/dbms/src/Encryption/RateLimiter.cpp b/dbms/src/Encryption/RateLimiter.cpp index 0b5d90d8d09..38fd8468341 100644 --- a/dbms/src/Encryption/RateLimiter.cpp +++ b/dbms/src/Encryption/RateLimiter.cpp @@ -523,6 +523,7 @@ void IORateLimiter::setBackgroundThreadIds(std::vector thread_ids) { std::lock_guard lock(bg_thread_ids_mtx); bg_thread_ids.swap(thread_ids); + LOG_FMT_INFO(log, "bg_thread_ids {} => {}", bg_thread_ids.size(), bg_thread_ids); } std::pair IORateLimiter::getReadWriteBytes(const std::string & fname [[maybe_unused]]) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 04882d97d3f..ac959158490 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1548,11 +1548,11 @@ FileProviderPtr Context::getFileProvider() const return shared->file_provider; } -void Context::initializeRateLimiter(Poco::Util::AbstractConfiguration & config) +void Context::initializeRateLimiter(Poco::Util::AbstractConfiguration & config, BackgroundProcessingPool & bg_pool, BackgroundProcessingPool & blockable_bg_pool) const { getIORateLimiter().init(config); - auto tids = getBackgroundPool().getThreadIds(); - auto blockable_tids = getBlockableBackgroundPool().getThreadIds(); + auto tids = bg_pool.getThreadIds(); + auto blockable_tids = blockable_bg_pool.getThreadIds(); tids.insert(tids.end(), blockable_tids.begin(), blockable_tids.end()); getIORateLimiter().setBackgroundThreadIds(tids); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index d9f89c6cfa9..ebf7d8c82e2 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -404,7 +404,7 @@ class Context void initializeFileProvider(KeyManagerPtr key_manager, bool enable_encryption); FileProviderPtr getFileProvider() const; - void initializeRateLimiter(Poco::Util::AbstractConfiguration & config); + void initializeRateLimiter(Poco::Util::AbstractConfiguration & config, BackgroundProcessingPool & bg_pool, BackgroundProcessingPool & blockable_bg_pool) const; WriteLimiterPtr getWriteLimiter() const; ReadLimiterPtr getReadLimiter() const; IORateLimiter & getIORateLimiter() const; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 84afee9af58..44e8ea29c29 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -1215,8 +1215,22 @@ int Server::main(const std::vector & /*args*/) /// Init TiFlash metrics. global_context->initializeTiFlashMetrics(); - /// Init Rate Limiter - global_context->initializeRateLimiter(config()); + /// Initialize users config reloader. + auto users_config_reloader = UserConfig::parseSettings(config(), config_path, global_context, log); + + /// Load global settings from default_profile and system_profile. + /// It internally depends on UserConfig::parseSettings. + global_context->setDefaultProfiles(config()); + Settings & settings = global_context->getSettingsRef(); + + /// Initialize the background thread pool. + /// It internally depends on settings.background_pool_size, + /// so must be called after settings has been load. + auto & bg_pool = global_context->getBackgroundPool(); + auto & blockable_bg_pool = global_context->getBlockableBackgroundPool(); + + /// Initialize RateLimiter. + global_context->initializeRateLimiter(config(), bg_pool, blockable_bg_pool); /// Initialize main config reloader. auto main_config_reloader = std::make_unique( @@ -1230,9 +1244,6 @@ int Server::main(const std::vector & /*args*/) }, /* already_loaded = */ true); - /// Initialize users config reloader. - auto users_config_reloader = UserConfig::parseSettings(config(), config_path, global_context, log); - /// Reload config in SYSTEM RELOAD CONFIG query. global_context->setConfigReloadCallback([&]() { main_config_reloader->reload(); @@ -1254,10 +1265,6 @@ int Server::main(const std::vector & /*args*/) bool use_l0_opt = config().getBool("l0_optimize", false); global_context->setUseL0Opt(use_l0_opt); - /// Load global settings from default_profile and system_profile. - global_context->setDefaultProfiles(config()); - Settings & settings = global_context->getSettingsRef(); - /// Size of cache for marks (index of MergeTree family of tables). It is necessary. size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_SIZE); if (mark_cache_size) diff --git a/dbms/src/Storages/BackgroundProcessingPool.cpp b/dbms/src/Storages/BackgroundProcessingPool.cpp index 601cb31746c..c0c3d8f0e48 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/BackgroundProcessingPool.cpp @@ -29,6 +29,15 @@ #ifdef __linux__ #include #include +inline static pid_t gettid() +{ + return syscall(SYS_gettid); +} +#elif +inline static pid_t gettid() +{ + return -1; +} #endif namespace CurrentMetrics @@ -76,6 +85,7 @@ void BackgroundProcessingPool::TaskInfo::wake() BackgroundProcessingPool::BackgroundProcessingPool(int size_) : size(size_) + , thread_ids_counter(size_) { LOG_FMT_INFO(&Poco::Logger::get("BackgroundProcessingPool"), "Create BackgroundProcessingPool with {} threads", size); @@ -140,9 +150,7 @@ void BackgroundProcessingPool::threadFunction() const auto name = "BkgPool" + std::to_string(tid++); setThreadName(name.data()); is_background_thread = true; -#ifdef __linux__ - addThreadId(syscall(SYS_gettid)); -#endif + addThreadId(gettid()); } MemoryTracker memory_tracker; @@ -272,14 +280,23 @@ void BackgroundProcessingPool::threadFunction() std::vector BackgroundProcessingPool::getThreadIds() { + thread_ids_counter.Wait(); std::lock_guard lock(thread_ids_mtx); + if (thread_ids.size() != size) + { + LOG_FMT_ERROR(&Poco::Logger::get("BackgroundProcessingPool"), "thread_ids.size is {}, but {} is required", thread_ids.size(), size); + throw Exception("Background threads' number not match"); + } return thread_ids; } void BackgroundProcessingPool::addThreadId(pid_t tid) { - std::lock_guard lock(thread_ids_mtx); - thread_ids.push_back(tid); + { + std::lock_guard lock(thread_ids_mtx); + thread_ids.push_back(tid); + } + thread_ids_counter.DecrementCount(); } } // namespace DB diff --git a/dbms/src/Storages/BackgroundProcessingPool.h b/dbms/src/Storages/BackgroundProcessingPool.h index 770ef833800..1ba6c4efcf8 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.h +++ b/dbms/src/Storages/BackgroundProcessingPool.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,7 @@ class BackgroundProcessingPool Threads threads; std::vector thread_ids; // Linux Thread ID std::mutex thread_ids_mtx; + absl::BlockingCounter thread_ids_counter; std::atomic shutdown{false}; std::condition_variable wake_event; diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h index 21604dd5470..e3981baf34c 100644 --- a/libs/libcommon/include/common/logger_useful.h +++ b/libs/libcommon/include/common/logger_useful.h @@ -18,6 +18,7 @@ #include #include +#include #ifndef QUERY_PREVIEW_LENGTH #define QUERY_PREVIEW_LENGTH 160 From afdd2e0ca23ccd6a19a604d90b9d75c971a3fe7c Mon Sep 17 00:00:00 2001 From: Zhi Qi <30543181+LittleFall@users.noreply.github.com> Date: Mon, 18 Apr 2022 22:08:03 +0800 Subject: [PATCH 26/79] =?UTF-8?q?fix:=20fix=20build=20issue=20`=E2=80=98ma?= =?UTF-8?q?ybe=5Funused=E2=80=99=20attribute=20ignored`=20on=20centos=20se?= =?UTF-8?q?rver=20=20(#4700)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close pingcap/tiflash#4701 --- dbms/src/Common/CPUAffinityManager.h | 7 +++++-- libs/libcommon/include/common/defines.h | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/CPUAffinityManager.h b/dbms/src/Common/CPUAffinityManager.h index 8c88c3ab1fd..5de62cf8368 100644 --- a/dbms/src/Common/CPUAffinityManager.h +++ b/dbms/src/Common/CPUAffinityManager.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include #include @@ -115,8 +117,9 @@ class CPUAffinityManager #endif // unused except Linux - [[maybe_unused]] int query_cpu_percent; - [[maybe_unused]] int cpu_cores; + MAYBE_UNUSED_MEMBER int query_cpu_percent; + MAYBE_UNUSED_MEMBER int cpu_cores; + std::vector query_threads; Poco::Logger * log; diff --git a/libs/libcommon/include/common/defines.h b/libs/libcommon/include/common/defines.h index 51b99a2d0ba..ff79a4d2077 100644 --- a/libs/libcommon/include/common/defines.h +++ b/libs/libcommon/include/common/defines.h @@ -199,3 +199,9 @@ static ALWAYS_INLINE inline void TIFLASH_NO_OPTIMIZE(T && var) #define TIFLASH_DUMMY_FUNCTION_DEFINITION #define tiflash_compiler_builtin_memcpy __builtin_memcpy #endif + +#ifdef __clang__ +#define MAYBE_UNUSED_MEMBER [[maybe_unused]] +#else +#define MAYBE_UNUSED_MEMBER +#endif From 873d3ff3eb71d30d3f456be2300f289e5e8482ff Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 19 Apr 2022 18:08:03 +0800 Subject: [PATCH 27/79] *: fix tableScan incorrect trace. (#4699) close pingcap/tiflash#4692 --- dbms/src/Flash/Statistics/TableScanImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Flash/Statistics/TableScanImpl.cpp b/dbms/src/Flash/Statistics/TableScanImpl.cpp index 9d301b69db8..a1f011de695 100644 --- a/dbms/src/Flash/Statistics/TableScanImpl.cpp +++ b/dbms/src/Flash/Statistics/TableScanImpl.cpp @@ -55,7 +55,7 @@ void TableScanStatistics::collectExtraRuntimeDetail() { auto * p_stream = dynamic_cast(io_stream.get()); assert(p_stream); - cop_table_scan_detail.bytes += p_stream->getProfileInfo().bytes; + local_table_scan_detail.bytes += p_stream->getProfileInfo().bytes; } } } From feee96afe1534a8d1c11421f764b0e13de5b9a1e Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 19 Apr 2022 19:40:03 +0800 Subject: [PATCH 28/79] Make performance of TPCH q15 stable (#4570) close pingcap/tiflash#4451 --- dbms/src/Common/MemoryTracker.cpp | 5 +++++ dbms/src/Common/MemoryTracker.h | 1 + .../ParallelAggregatingBlockInputStream.cpp | 2 ++ .../ParallelAggregatingBlockInputStream.h | 1 + dbms/src/Interpreters/Aggregator.cpp | 19 ++++++++++++++++--- dbms/src/Interpreters/Aggregator.h | 13 +++++++++++-- 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/MemoryTracker.cpp b/dbms/src/Common/MemoryTracker.cpp index e79e6077366..f64881ae35a 100644 --- a/dbms/src/Common/MemoryTracker.cpp +++ b/dbms/src/Common/MemoryTracker.cpp @@ -226,6 +226,11 @@ void submitLocalDeltaMemory() local_delta = 0; } +Int64 getLocalDeltaMemory() +{ + return local_delta; +} + void alloc(Int64 size) { checkSubmitAndUpdateLocalDelta(local_delta + size); diff --git a/dbms/src/Common/MemoryTracker.h b/dbms/src/Common/MemoryTracker.h index 457377a7ce0..c87ec713dda 100644 --- a/dbms/src/Common/MemoryTracker.h +++ b/dbms/src/Common/MemoryTracker.h @@ -111,6 +111,7 @@ namespace CurrentMemoryTracker { void disableThreshold(); void submitLocalDeltaMemory(); +Int64 getLocalDeltaMemory(); void alloc(Int64 size); void realloc(Int64 old_size, Int64 new_size); void free(Int64 size); diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 62a7e7c4c46..3163975108f 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -157,6 +157,7 @@ void ParallelAggregatingBlockInputStream::Handler::onBlock(Block & block, size_t parent.file_provider, parent.threads_data[thread_num].key_columns, parent.threads_data[thread_num].aggregate_columns, + parent.threads_data[thread_num].local_delta_memory, parent.no_more_keys); parent.threads_data[thread_num].src_rows += block.rows(); @@ -270,6 +271,7 @@ void ParallelAggregatingBlockInputStream::execute() file_provider, threads_data[0].key_columns, threads_data[0].aggregate_columns, + threads_data[0].local_delta_memory, no_more_keys); } diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h index 3f486d2e35f..398c3d35bbc 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h @@ -106,6 +106,7 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream { size_t src_rows = 0; size_t src_bytes = 0; + Int64 local_delta_memory = 0; ColumnRawPtrs key_columns; Aggregator::AggregateColumns aggregate_columns; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index ed640ce5d08..6e067b88d81 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -522,7 +522,14 @@ void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns } } -bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & result, const FileProviderPtr & file_provider, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys) +bool Aggregator::executeOnBlock( + const Block & block, + AggregatedDataVariants & result, + const FileProviderPtr & file_provider, + ColumnRawPtrs & key_columns, + AggregateColumns & aggregate_columns, + Int64 & local_delta_memory, + bool & no_more_keys) { if (isCancelled()) return true; @@ -600,7 +607,13 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re size_t result_size = result.sizeWithoutOverflowRow(); Int64 current_memory_usage = 0; if (current_memory_tracker) + { current_memory_usage = current_memory_tracker->get(); + auto updated_local_delta_memory = CurrentMemoryTracker::getLocalDeltaMemory(); + auto local_delta_memory_diff = updated_local_delta_memory - local_delta_memory; + current_memory_usage += (local_memory_usage.fetch_add(local_delta_memory_diff) + local_delta_memory_diff); + local_delta_memory = updated_local_delta_memory; + } auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation; /// Here all the results in the sum are taken into account, from different threads. @@ -815,14 +828,14 @@ void Aggregator::execute(const BlockInputStreamPtr & stream, AggregatedDataVaria src_rows += block.rows(); src_bytes += block.bytes(); - if (!executeOnBlock(block, result, file_provider, key_columns, aggregate_columns, no_more_keys)) + if (!executeOnBlock(block, result, file_provider, key_columns, aggregate_columns, params.local_delta_memory, no_more_keys)) break; } /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation. /// To do this, we pass a block with zero rows to aggregate. if (result.empty() && params.keys_size == 0 && !params.empty_result_for_aggregation_by_empty_set) - executeOnBlock(stream->getHeader(), result, file_provider, key_columns, aggregate_columns, no_more_keys); + executeOnBlock(stream->getHeader(), result, file_provider, key_columns, aggregate_columns, params.local_delta_memory, no_more_keys); double elapsed_seconds = watch.elapsedSeconds(); size_t rows = result.sizeWithoutOverflowRow(); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 672a0951465..b3bb537dc2e 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -706,6 +706,7 @@ class Aggregator AggregateDescriptions aggregates; size_t keys_size; size_t aggregates_size; + Int64 local_delta_memory = 0; /// The settings of approximate calculation of GROUP BY. const bool overflow_row; /// Do we need to put into AggregatedDataVariants::without_key aggregates for keys that are not in max_rows_to_group_by. @@ -799,8 +800,14 @@ class Aggregator using AggregateFunctionsPlainPtrs = std::vector; /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break'). - bool executeOnBlock(const Block & block, AggregatedDataVariants & result, const FileProviderPtr & file_provider, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block - bool & no_more_keys); + bool executeOnBlock( + const Block & block, + AggregatedDataVariants & result, + const FileProviderPtr & file_provider, + ColumnRawPtrs & key_columns, + AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block + Int64 & local_delta_memory, + bool & no_more_keys); /** Convert the aggregation data structure into a block. * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. @@ -906,6 +913,8 @@ class Aggregator /// How many RAM were used to process the query before processing the first block. Int64 memory_usage_before_aggregation = 0; + std::atomic local_memory_usage = 0; + std::mutex mutex; const LoggerPtr log; From 636fcd22371266ee2792b4e0636cf96b4cacaa0c Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 19 Apr 2022 21:06:03 +0800 Subject: [PATCH 29/79] *: fix gettid compile error (#4704) close pingcap/tiflash#4703 --- dbms/src/Storages/BackgroundProcessingPool.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/BackgroundProcessingPool.cpp b/dbms/src/Storages/BackgroundProcessingPool.cpp index c0c3d8f0e48..96c2c6cc622 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/BackgroundProcessingPool.cpp @@ -29,12 +29,12 @@ #ifdef __linux__ #include #include -inline static pid_t gettid() +inline static pid_t getTid() { return syscall(SYS_gettid); } -#elif -inline static pid_t gettid() +#else +inline static pid_t getTid() { return -1; } @@ -150,7 +150,7 @@ void BackgroundProcessingPool::threadFunction() const auto name = "BkgPool" + std::to_string(tid++); setThreadName(name.data()); is_background_thread = true; - addThreadId(gettid()); + addThreadId(getTid()); } MemoryTracker memory_tracker; From 7149736cb21343c065c4950153b0871d237415c8 Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Wed, 20 Apr 2022 12:46:03 +0800 Subject: [PATCH 30/79] clear old range before apply snapshot (#4668) close pingcap/tiflash#4414 --- .../Storages/Transaction/ApplySnapshot.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index ae1f00403ef..6106dda6f4b 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -153,20 +153,33 @@ void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_re // Acquire `drop_lock` so that no other threads can drop the storage. `alter_lock` is not required. auto table_lock = storage->lockForShare(getThreadName()); auto dm_storage = std::dynamic_pointer_cast(storage); - auto key_range = DM::RowKeyRange::fromRegionRange( + auto new_key_range = DM::RowKeyRange::fromRegionRange( new_region_wrap->getRange(), table_id, storage->isCommonHandle(), storage->getRowKeyColumnSize()); + if (old_region) + { + auto old_key_range = DM::RowKeyRange::fromRegionRange( + old_region->getRange(), + table_id, + storage->isCommonHandle(), + storage->getRowKeyColumnSize()); + if (old_key_range != new_key_range) + { + LOG_FMT_INFO(log, "clear region {} old range {} before apply snapshot of new range {}", region_id, old_key_range.toDebugString(), new_key_range.toDebugString()); + dm_storage->deleteRange(old_key_range, context.getSettingsRef()); + } + } if constexpr (std::is_same_v) { // Call `ingestFiles` to delete data for range and ingest external DTFiles. - dm_storage->ingestFiles(key_range, new_region_wrap.ingest_ids, /*clear_data_in_range=*/true, context.getSettingsRef()); + dm_storage->ingestFiles(new_key_range, new_region_wrap.ingest_ids, /*clear_data_in_range=*/true, context.getSettingsRef()); } else { // Call `deleteRange` to delete data for range - dm_storage->deleteRange(key_range, context.getSettingsRef()); + dm_storage->deleteRange(new_key_range, context.getSettingsRef()); } } catch (DB::Exception & e) From 7bb8e33336e111700b551037c803cdc71c7ad0da Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Wed, 20 Apr 2022 13:20:03 +0800 Subject: [PATCH 31/79] update client-c to latest version (#4681) close pingcap/tiflash#4680 --- contrib/client-c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/client-c b/contrib/client-c index 184cde7ae4f..2c6c5fed8d7 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 184cde7ae4f83c0e9aaaaf825f3e0e7d600e62fa +Subproject commit 2c6c5fed8d7c48bcb52198f1107d4c58dd22f7e2 From 8a2e62dddec6adda31a836867298a319cf025ecb Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Wed, 20 Apr 2022 16:06:04 +0800 Subject: [PATCH 32/79] reuse old index file format for empty dmfile and just ignore it's size in statistical data (#4711) close pingcap/tiflash#4708 --- .../Storages/DeltaMerge/File/DMFileWriter.cpp | 18 ++++++++++++------ .../Storages/DeltaMerge/File/DMFileWriter.h | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp index 3701e9c6cca..4ea3e398aaa 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp @@ -360,10 +360,13 @@ void DMFileWriter::finalizeColumn(ColId col_id, DataTypePtr type) dmfile->encryptionIndexPath(stream_name), false, write_limiter); - if (!is_empty_file) - stream->minmaxes->write(*type, buf); + stream->minmaxes->write(*type, buf); buf.sync(); - bytes_written += buf.getMaterializedBytes(); + // Ignore data written in index file when the dmfile is empty. + // This is ok because the index file in this case is tiny, and we already ignore other small files like meta and pack stat file. + // The motivation to do this is to show a zero `stable_size_on_disk` for empty segments, + // and we cannot change the index file format for empty dmfile because of backward compatibility. + bytes_written += is_empty_file ? 0 : buf.getMaterializedBytes(); } else { @@ -374,10 +377,13 @@ void DMFileWriter::finalizeColumn(ColId col_id, DataTypePtr type) write_limiter, dmfile->configuration->getChecksumAlgorithm(), dmfile->configuration->getChecksumFrameLength()); - if (!is_empty_file) - stream->minmaxes->write(*type, *buf); + stream->minmaxes->write(*type, *buf); buf->sync(); - bytes_written += buf->getMaterializedBytes(); + // Ignore data written in index file when the dmfile is empty. + // This is ok because the index file in this case is tiny, and we already ignore other small files like meta and pack stat file. + // The motivation to do this is to show a zero `stable_size_on_disk` for empty segments, + // and we cannot change the index file format for empty dmfile because of backward compatibility. + bytes_written += is_empty_file ? 0 : buf->getMaterializedBytes(); #ifndef NDEBUG examine_buffer_size(*buf, *this->file_provider); #endif diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h index b9868444162..79fd688c0b8 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h @@ -250,7 +250,7 @@ class DMFileWriter FileProviderPtr file_provider; WriteLimiterPtr write_limiter; - // use to avoid write index data for empty file + // use to avoid count data written in index file for empty dmfile bool is_empty_file = true; }; From 6fe26bd13a141d3f85d49db4c904bf5a1935f1d3 Mon Sep 17 00:00:00 2001 From: yibin Date: Thu, 21 Apr 2022 10:54:03 +0800 Subject: [PATCH 33/79] Add some debug friendly utilities for dag request based test framework (#4698) close pingcap/tiflash#4304 --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 101 +++++++++++++++++--------- dbms/src/Debug/dbgNaturalDag.cpp | 15 ++++ dbms/src/Debug/dbgNaturalDag.h | 10 +++ 3 files changed, 93 insertions(+), 33 deletions(-) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index c17f26d45b7..a4a1f6730c9 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -131,6 +131,7 @@ class UniqRawResReformatBlockOutputStream : public IProfilingBlockInputStream }; tipb::SelectResponse executeDAGRequest(Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version, Timestamp start_ts, std::vector> & key_ranges); +bool runAndCompareDagReq(const coprocessor::Request & req, const coprocessor::Response & res, Context & context, String & unequal_msg); BlockInputStreamPtr outputDAGResponse(Context & context, const DAGSchema & schema, const tipb::SelectResponse & dag_response); DAGSchema getSelectSchema(Context & context); bool dagRspEqual(Context & context, const tipb::SelectResponse & expected, const tipb::SelectResponse & actual, String & unequal_msg); @@ -339,56 +340,90 @@ void dbgFuncTiDBQueryFromNaturalDag(Context & context, const ASTs & args, DBGInv auto dag = NaturalDag(json_dag_path, &Poco::Logger::get("MockDAG")); dag.init(); dag.build(context); - bool unequal_flag = false; - String unequal_msg; + std::vector> failed_req_msg_vec; int req_idx = 0; for (const auto & it : dag.getReqAndRspVec()) { auto && req = it.first; auto && res = it.second; - kvrpcpb::Context req_context = req.context(); - RegionID region_id = req_context.region_id(); - tipb::DAGRequest dag_request = getDAGRequestFromStringWithRetry(req.data()); - RegionPtr region = context.getTMTContext().getKVStore()->getRegion(region_id); - if (!region) - throw Exception(fmt::format("No such region: {}", region_id), ErrorCodes::BAD_ARGUMENTS); - - DAGProperties properties = getDAGProperties(""); - std::vector> key_ranges = CoprocessorHandler::GenCopKeyRange(req.ranges()); + int32_t req_id = dag.getReqIDVec()[req_idx]; + bool unequal_flag = false; + bool failed_flag = false; + String unequal_msg; static auto log = Logger::get("MockDAG"); - LOG_FMT_INFO(log, "Handling DAG request: {}", dag_request.DebugString()); - tipb::SelectResponse dag_response; - TablesRegionsInfo tables_regions_info(true); - auto & table_regions_info = tables_regions_info.getSingleTableRegions(); - table_regions_info.local_regions.emplace(region_id, RegionInfo(region_id, region->version(), region->confVer(), std::move(key_ranges), nullptr)); - - DAGContext dag_context(dag_request); - dag_context.tables_regions_info = std::move(tables_regions_info); - dag_context.log = log; - context.setDAGContext(&dag_context); - DAGDriver driver(context, properties.start_ts, DEFAULT_UNSPECIFIED_SCHEMA_VERSION, &dag_response, true); - driver.execute(); - - auto resp_ptr = std::make_shared(); - if (!resp_ptr->ParseFromString(res.data())) + try { - throw Exception("Incorrect json response data!", ErrorCodes::BAD_ARGUMENTS); + unequal_flag = runAndCompareDagReq(req, res, context, unequal_msg); } - else + catch (const Exception & e) + { + failed_flag = true; + unequal_msg = e.message(); + } + catch (...) + { + failed_flag = true; + unequal_msg = "Unknown execution exception!"; + } + + if (unequal_flag || failed_flag) { - unequal_flag |= (!dagRspEqual(context, *resp_ptr, dag_response, unequal_msg)); - if (unequal_flag) + failed_req_msg_vec.push_back(std::make_pair(req_id, unequal_msg)); + if (!dag.continueWhenError()) break; } ++req_idx; } - // It is all right to throw exception above, dag.clean is only to make it better dag.clean(context); - if (unequal_flag) + if (!failed_req_msg_vec.empty()) { output("Invalid"); - throw Exception(fmt::format("{}th request results are not equal, msg: {}", req_idx, unequal_msg), ErrorCodes::LOGICAL_ERROR); + FmtBuffer fmt_buf; + fmt_buf.joinStr( + failed_req_msg_vec.begin(), + failed_req_msg_vec.end(), + [](const auto & pair, FmtBuffer & fb) { fb.fmtAppend("request {} failed, msg: {}", pair.first, pair.second); }, + "\n"); + throw Exception(fmt_buf.toString(), ErrorCodes::LOGICAL_ERROR); + } +} + +bool runAndCompareDagReq(const coprocessor::Request & req, const coprocessor::Response & res, Context & context, String & unequal_msg) +{ + const kvrpcpb::Context & req_context = req.context(); + RegionID region_id = req_context.region_id(); + tipb::DAGRequest dag_request = getDAGRequestFromStringWithRetry(req.data()); + RegionPtr region = context.getTMTContext().getKVStore()->getRegion(region_id); + if (!region) + throw Exception(fmt::format("No such region: {}", region_id), ErrorCodes::BAD_ARGUMENTS); + + bool unequal_flag = false; + DAGProperties properties = getDAGProperties(""); + std::vector> key_ranges = CoprocessorHandler::GenCopKeyRange(req.ranges()); + static auto log = Logger::get("MockDAG"); + LOG_FMT_INFO(log, "Handling DAG request: {}", dag_request.DebugString()); + tipb::SelectResponse dag_response; + TablesRegionsInfo tables_regions_info(true); + auto & table_regions_info = tables_regions_info.getSingleTableRegions(); + table_regions_info.local_regions.emplace(region_id, RegionInfo(region_id, region->version(), region->confVer(), std::move(key_ranges), nullptr)); + + DAGContext dag_context(dag_request); + dag_context.tables_regions_info = std::move(tables_regions_info); + dag_context.log = log; + context.setDAGContext(&dag_context); + DAGDriver driver(context, properties.start_ts, DEFAULT_UNSPECIFIED_SCHEMA_VERSION, &dag_response, true); + driver.execute(); + + auto resp_ptr = std::make_shared(); + if (!resp_ptr->ParseFromString(res.data())) + { + throw Exception("Incorrect json response data!", ErrorCodes::BAD_ARGUMENTS); + } + else + { + unequal_flag |= (!dagRspEqual(context, *resp_ptr, dag_response, unequal_msg)); } + return unequal_flag; } BlockInputStreamPtr dbgFuncTiDBQuery(Context & context, const ASTs & args) diff --git a/dbms/src/Debug/dbgNaturalDag.cpp b/dbms/src/Debug/dbgNaturalDag.cpp index 828cadfb1e6..e1901454c5b 100644 --- a/dbms/src/Debug/dbgNaturalDag.cpp +++ b/dbms/src/Debug/dbgNaturalDag.cpp @@ -35,6 +35,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } // namespace ErrorCodes +static const String CONTINUE_WHEN_ERROR = "continue_when_error"; static const String TABLE_IDS = "table_of_interest"; static const String TABLE_DATA = "table_data"; static const String TABLE_META = "meta"; @@ -47,6 +48,7 @@ static const String TIKV_KEY = "key"; static const String TIKV_VALUE = "value"; static const String REQ_RSP_DATA = "request_data"; static const String REQ_TYPE = "type"; +static const String REQ_ID = "req_id"; static const String REQUEST = "request"; static const String RESPONSE = "response"; static const String DEFAULT_DATABASE_NAME = "test"; @@ -87,6 +89,12 @@ void NaturalDag::init() LOG_FMT_INFO(log, "Succeed parsing json file: {}", json_dag_path); const auto & obj = result.extract(); + + if (obj->has(CONTINUE_WHEN_ERROR)) + { + continue_when_error = obj->getValue(CONTINUE_WHEN_ERROR); + LOG_FMT_INFO(log, "Succeed load continue_when_error flag: {}!", continue_when_error); + } loadTables(obj); LOG_FMT_INFO(log, "Succeed loading table data!"); loadReqAndRsp(obj); @@ -96,6 +104,7 @@ void NaturalDag::init() void NaturalDag::loadReqAndRsp(const NaturalDag::JSONObjectPtr & obj) { auto req_data_json = obj->getArray(REQ_RSP_DATA); + int32_t default_req_id = 0; for (const auto & req_data_json_obj : *req_data_json) { auto req_data_obj = req_data_json_obj.extract(); @@ -114,6 +123,12 @@ void NaturalDag::loadReqAndRsp(const NaturalDag::JSONObjectPtr & obj) if (!cop_response.ParseFromString(response)) throw Exception("Incorrect response data!", ErrorCodes::BAD_ARGUMENTS); req_rsp.emplace_back(std::make_pair(std::move(cop_request), std::move(cop_response))); + + if (req_data_obj->has(REQ_ID)) + req_id_vec.push_back(req_data_obj->getValue(REQ_ID)); + else + req_id_vec.push_back(default_req_id); + ++default_req_id; } } void NaturalDag::loadTables(const NaturalDag::JSONObjectPtr & obj) diff --git a/dbms/src/Debug/dbgNaturalDag.h b/dbms/src/Debug/dbgNaturalDag.h index 7d5086b0fca..f7c1d850ebe 100644 --- a/dbms/src/Debug/dbgNaturalDag.h +++ b/dbms/src/Debug/dbgNaturalDag.h @@ -62,6 +62,14 @@ class NaturalDag { return mpp_req_rsp; } + const std::vector & getReqIDVec() const + { + return req_id_vec; + } + bool continueWhenError() const + { + return continue_when_error; + } static void clean(Context & context); private: @@ -97,6 +105,8 @@ class NaturalDag Poco::Logger * log; LoadedTableMap tables; TableIDVec table_ids; + bool continue_when_error = false; + std::vector req_id_vec; ReqRspVec req_rsp; BatchReqRspVec batch_req_rsp; MPPReqRspVec mpp_req_rsp; From ebb28dcb373ba6408b9229dbb74fb916e8f4fd2c Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Thu, 21 Apr 2022 12:34:04 +0800 Subject: [PATCH 34/79] Add a background metrics for both v2 and v3 (#4593) ref pingcap/tiflash#3594 --- dbms/src/Common/ProfileEvents.cpp | 2 ++ dbms/src/Storages/Page/PageUtil.h | 15 +++++++++++- dbms/src/Storages/Page/V2/PageFile.cpp | 24 +++++++++++-------- dbms/src/Storages/Page/V2/PageFile.h | 18 +++++++++----- .../src/Storages/Page/V2/gc/DataCompactor.cpp | 7 +++--- .../Storages/Page/V2/gc/LegacyCompactor.cpp | 6 ++--- .../Page/V2/tests/gtest_page_util.cpp | 4 ++-- dbms/src/Storages/Page/V3/BlobFile.cpp | 10 ++++---- dbms/src/Storages/Page/V3/BlobFile.h | 4 ++-- dbms/src/Storages/Page/V3/BlobStore.cpp | 8 +++---- dbms/src/Storages/Page/V3/BlobStore.h | 2 +- .../Storages/Page/V3/LogFile/LogWriter.cpp | 6 ++--- dbms/src/Storages/Page/V3/LogFile/LogWriter.h | 2 +- dbms/src/Storages/Page/V3/WALStore.cpp | 2 +- metrics/grafana/tiflash_summary.json | 18 ++++++++++++-- 15 files changed, 84 insertions(+), 44 deletions(-) diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index 67019be0a0f..1b9b62dd2c6 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -111,7 +111,9 @@ M(PSMWritePages) \ M(PSMWriteIOCalls) \ M(PSMWriteBytes) \ + M(PSMBackgroundWriteBytes) \ M(PSMReadPages) \ + M(PSMBackgroundReadBytes) \ \ M(PSMReadIOCalls) \ M(PSMReadBytes) \ diff --git a/dbms/src/Storages/Page/PageUtil.h b/dbms/src/Storages/Page/PageUtil.h index ace22c28d31..7edae303be7 100644 --- a/dbms/src/Storages/Page/PageUtil.h +++ b/dbms/src/Storages/Page/PageUtil.h @@ -49,7 +49,9 @@ extern const Event FileOpenFailed; extern const Event PSMWritePages; extern const Event PSMWriteIOCalls; extern const Event PSMWriteBytes; +extern const Event PSMBackgroundWriteBytes; extern const Event PSMReadPages; +extern const Event PSMBackgroundReadBytes; extern const Event PSMReadIOCalls; extern const Event PSMReadBytes; extern const Event PSMWriteFailed; @@ -158,6 +160,7 @@ void writeFile( char * data, size_t to_write, const WriteLimiterPtr & write_limiter = nullptr, + const bool background = false, [[maybe_unused]] bool enable_failpoint = false) { if (write_limiter) @@ -212,6 +215,11 @@ void writeFile( } ProfileEvents::increment(ProfileEvents::PSMWriteIOCalls, write_io_calls); ProfileEvents::increment(ProfileEvents::PSMWriteBytes, bytes_written); + + if (background) + { + ProfileEvents::increment(ProfileEvents::PSMBackgroundWriteBytes, bytes_written); + } } template @@ -219,7 +227,8 @@ void readFile(T & file, const off_t offset, const char * buf, size_t expected_bytes, - const ReadLimiterPtr & read_limiter = nullptr) + const ReadLimiterPtr & read_limiter = nullptr, + const bool background = false) { if (unlikely(expected_bytes == 0)) return; @@ -257,6 +266,10 @@ void readFile(T & file, } ProfileEvents::increment(ProfileEvents::PSMReadIOCalls, read_io_calls); ProfileEvents::increment(ProfileEvents::PSMReadBytes, bytes_read); + if (background) + { + ProfileEvents::increment(ProfileEvents::PSMBackgroundReadBytes, bytes_read); + } if (unlikely(bytes_read != expected_bytes)) throw DB::TiFlashException(fmt::format("No enough data in file {}, read bytes: {} , expected bytes: {}", file->getFileName(), bytes_read, expected_bytes), diff --git a/dbms/src/Storages/Page/V2/PageFile.cpp b/dbms/src/Storages/Page/V2/PageFile.cpp index 7e27fcd926b..cec02aa2d85 100644 --- a/dbms/src/Storages/Page/V2/PageFile.cpp +++ b/dbms/src/Storages/Page/V2/PageFile.cpp @@ -471,24 +471,28 @@ PageFile::MetaMergingReader::~MetaMergingReader() page_file.free(meta_buffer, meta_size); } -PageFile::MetaMergingReaderPtr PageFile::MetaMergingReader::createFrom(PageFile & page_file, size_t max_meta_offset, const ReadLimiterPtr & read_limiter) +PageFile::MetaMergingReaderPtr PageFile::MetaMergingReader::createFrom( + PageFile & page_file, + size_t max_meta_offset, + const ReadLimiterPtr & read_limiter, + const bool background) { auto reader = std::make_shared(page_file); - reader->initialize(max_meta_offset, read_limiter); + reader->initialize(max_meta_offset, read_limiter, background); return reader; } -PageFile::MetaMergingReaderPtr PageFile::MetaMergingReader::createFrom(PageFile & page_file, const ReadLimiterPtr & read_limiter) +PageFile::MetaMergingReaderPtr PageFile::MetaMergingReader::createFrom(PageFile & page_file, const ReadLimiterPtr & read_limiter, const bool background) { auto reader = std::make_shared(page_file); - reader->initialize(std::nullopt, read_limiter); + reader->initialize(std::nullopt, read_limiter, background); return reader; } // Try to initiallize access to meta, read the whole metadata to memory. // Status -> Finished if metadata size is zero. // -> Opened if metadata successfully load from disk. -void PageFile::MetaMergingReader::initialize(std::optional max_meta_offset, const ReadLimiterPtr & read_limiter) +void PageFile::MetaMergingReader::initialize(std::optional max_meta_offset, const ReadLimiterPtr & read_limiter, const bool background) { if (status == Status::Opened) return; @@ -523,7 +527,7 @@ void PageFile::MetaMergingReader::initialize(std::optional max_meta_offs throw Exception("Try to read meta of " + page_file.toString() + ", but open file error. Path: " + path, ErrorCodes::LOGICAL_ERROR); SCOPE_EXIT({ underlying_file->close(); }); meta_buffer = static_cast(page_file.alloc(meta_size)); - PageUtil::readFile(underlying_file, 0, meta_buffer, meta_size, read_limiter); + PageUtil::readFile(underlying_file, 0, meta_buffer, meta_size, read_limiter, background); status = Status::Opened; } @@ -770,7 +774,7 @@ const String & PageFile::Writer::parentPath() const return page_file.parent_path; } -size_t PageFile::Writer::write(DB::WriteBatch & wb, PageEntriesEdit & edit, const WriteLimiterPtr & write_limiter) +size_t PageFile::Writer::write(DB::WriteBatch & wb, PageEntriesEdit & edit, const WriteLimiterPtr & write_limiter, bool background) { ProfileEvents::increment(ProfileEvents::PSMWritePages, wb.putWriteCount()); @@ -788,7 +792,7 @@ size_t PageFile::Writer::write(DB::WriteBatch & wb, PageEntriesEdit & edit, cons SCOPE_EXIT({ page_file.free(data_buf.begin(), data_buf.size()); }); auto write_buf = [&](WritableFilePtr & file, UInt64 offset, ByteBuffer buf, bool enable_failpoint) { - PageUtil::writeFile(file, offset, buf.begin(), buf.size(), write_limiter, enable_failpoint); + PageUtil::writeFile(file, offset, buf.begin(), buf.size(), write_limiter, background, enable_failpoint); if (sync_on_write) PageUtil::syncFile(file); }; @@ -865,7 +869,7 @@ PageFile::Reader::~Reader() data_file->close(); } -PageMap PageFile::Reader::read(PageIdAndEntries & to_read, const ReadLimiterPtr & read_limiter) +PageMap PageFile::Reader::read(PageIdAndEntries & to_read, const ReadLimiterPtr & read_limiter, bool background) { ProfileEvents::increment(ProfileEvents::PSMReadPages, to_read.size()); @@ -892,7 +896,7 @@ PageMap PageFile::Reader::read(PageIdAndEntries & to_read, const ReadLimiterPtr PageMap page_map; for (const auto & [page_id, entry] : to_read) { - PageUtil::readFile(data_file, entry.offset, pos, entry.size, read_limiter); + PageUtil::readFile(data_file, entry.offset, pos, entry.size, read_limiter, background); if constexpr (PAGE_CHECKSUM_ON_READ) { diff --git a/dbms/src/Storages/Page/V2/PageFile.h b/dbms/src/Storages/Page/V2/PageFile.h index 7f1c41f9e8f..78d4063732d 100644 --- a/dbms/src/Storages/Page/V2/PageFile.h +++ b/dbms/src/Storages/Page/V2/PageFile.h @@ -47,7 +47,7 @@ class PageFile : public Allocator Writer(PageFile &, bool sync_on_write, bool truncate_if_exists = true); ~Writer(); - [[nodiscard]] size_t write(DB::WriteBatch & wb, PageEntriesEdit & edit, const WriteLimiterPtr & write_limiter = nullptr); + [[nodiscard]] size_t write(DB::WriteBatch & wb, PageEntriesEdit & edit, const WriteLimiterPtr & write_limiter = nullptr, bool background = false); void tryCloseIdleFd(const Seconds & max_idle_time); const String & parentPath() const; @@ -80,7 +80,7 @@ class PageFile : public Allocator /// Read pages from files. /// After return, the items in to_read could be reordered, but won't be removed or added. - PageMap read(PageIdAndEntries & to_read, const ReadLimiterPtr & read_limiter = nullptr); + PageMap read(PageIdAndEntries & to_read, const ReadLimiterPtr & read_limiter = nullptr, bool background = false); void read(PageIdAndEntries & to_read, const PageHandler & handler, const ReadLimiterPtr & read_limiter = nullptr); @@ -134,8 +134,14 @@ class PageFile : public Allocator class MetaMergingReader : private boost::noncopyable { public: - static MetaMergingReaderPtr createFrom(PageFile & page_file, size_t max_meta_offset, const ReadLimiterPtr & read_limiter = nullptr); - static MetaMergingReaderPtr createFrom(PageFile & page_file, const ReadLimiterPtr & read_limiter = nullptr); + static MetaMergingReaderPtr createFrom(PageFile & page_file, + size_t max_meta_offset, + const ReadLimiterPtr & read_limiter = nullptr, + const bool background = false); + + static MetaMergingReaderPtr createFrom(PageFile & page_file, + const ReadLimiterPtr & read_limiter = nullptr, + const bool background = false); MetaMergingReader(PageFile & page_file_); // should only called by `createFrom` @@ -184,7 +190,7 @@ class PageFile : public Allocator } private: - void initialize(std::optional max_meta_offset, const ReadLimiterPtr & read_limiter); + void initialize(std::optional max_meta_offset, const ReadLimiterPtr & read_limiter, const bool background = false); private: PageFile & page_file; @@ -270,7 +276,7 @@ class PageFile : public Allocator case Type::Checkpoint: return "Checkpoint"; default: - throw Exception("Unexpected PageFile::Type: " + DB::toString((int)type)); + throw Exception(fmt::format("Unexpected PageFile::Type: {}", static_cast(type))); } } diff --git a/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp b/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp index fdf50bfaf98..8a5cc5a3146 100644 --- a/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp +++ b/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp @@ -339,7 +339,7 @@ DataCompactor::migratePages( // } // Create meta reader and update `compact_seq` - auto meta_reader = PageFile::MetaMergingReader::createFrom(const_cast(page_file), read_limiter); + auto meta_reader = PageFile::MetaMergingReader::createFrom(const_cast(page_file), read_limiter, /*background*/ true); while (meta_reader->hasNext()) { meta_reader->moveNext(); @@ -456,7 +456,7 @@ DataCompactor::mergeValidPages( // // The changes will be recorded by `gc_file_edit` and the bytes written will be return. auto migrate_entries = [compact_sequence, &data_reader, &gc_file_id, &gc_file_writer, &gc_file_edit, this](PageIdAndEntries & entries) -> size_t { - const PageMap pages = data_reader->read(entries, read_limiter); + const PageMap pages = data_reader->read(entries, read_limiter, /*background*/ true); // namespace id in v2 is useless WriteBatch wb{MAX_NAMESPACE_ID}; wb.setSequence(compact_sequence); @@ -471,7 +471,8 @@ DataCompactor::mergeValidPages( // page.data.size(), entry.field_offsets); } - return gc_file_writer->write(wb, gc_file_edit, write_limiter); + + return gc_file_writer->write(wb, gc_file_edit, write_limiter, true); }; #ifndef NDEBUG diff --git a/dbms/src/Storages/Page/V2/gc/LegacyCompactor.cpp b/dbms/src/Storages/Page/V2/gc/LegacyCompactor.cpp index 5522615fd70..d6b9182bde6 100644 --- a/dbms/src/Storages/Page/V2/gc/LegacyCompactor.cpp +++ b/dbms/src/Storages/Page/V2/gc/LegacyCompactor.cpp @@ -138,11 +138,11 @@ LegacyCompactor::collectPageFilesToCompact(const PageFileSet & page_files, const if (auto iter = writing_files.find(page_file.fileIdLevel()); iter != writing_files.end()) { // create reader with max meta reading offset - reader = PageFile::MetaMergingReader::createFrom(const_cast(page_file), iter->second.meta_offset, read_limiter); + reader = PageFile::MetaMergingReader::createFrom(const_cast(page_file), iter->second.meta_offset, read_limiter, /*background*/ true); } else { - reader = PageFile::MetaMergingReader::createFrom(const_cast(page_file), read_limiter); + reader = PageFile::MetaMergingReader::createFrom(const_cast(page_file), read_limiter, /*background*/ true); } if (reader->hasNext()) { @@ -290,7 +290,7 @@ size_t LegacyCompactor::writeToCheckpoint(const String & storage_path, auto checkpoint_writer = checkpoint_file.createWriter(false, true); PageEntriesEdit edit; - bytes_written += checkpoint_writer->write(wb, edit, write_limiter); + bytes_written += checkpoint_writer->write(wb, edit, write_limiter, /*background*/ true); } // drop "data" part for checkpoint file. bytes_written -= checkpoint_file.setCheckpoint(); diff --git a/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp b/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp index 85b254bb574..8f55a67e9f2 100644 --- a/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp +++ b/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp @@ -41,7 +41,7 @@ TEST(PageUtils_test, ReadWriteFile) buff_write[i] = i % 0xFF; } WritableFilePtr file_for_write = std::make_shared(FileName, true, -1, 0666); - PageUtil::writeFile(file_for_write, 0, buff_write, buff_size, nullptr, true); + PageUtil::writeFile(file_for_write, 0, buff_write, buff_size, /*write_limiter*/ nullptr, /*background*/ false, /*enable_failpoint*/ true); PageUtil::syncFile(file_for_write); file_for_write->close(); @@ -78,7 +78,7 @@ TEST(PageUtils_test, BigReadWriteFile) buff_write[i] = i % 0xFF; } - PageUtil::writeFile(file_for_write, 0, buff_write, buff_size, nullptr, false); + PageUtil::writeFile(file_for_write, 0, buff_write, buff_size, nullptr, /*background*/ false, /*enable_failpoint*/ false); PageUtil::syncFile(file_for_write); file_for_write->close(); diff --git a/dbms/src/Storages/Page/V3/BlobFile.cpp b/dbms/src/Storages/Page/V3/BlobFile.cpp index 322292bd9f5..27b1c60f5d3 100644 --- a/dbms/src/Storages/Page/V3/BlobFile.cpp +++ b/dbms/src/Storages/Page/V3/BlobFile.cpp @@ -58,7 +58,7 @@ BlobFile::BlobFile(String parent_path_, } } -void BlobFile::read(char * buffer, size_t offset, size_t size, const ReadLimiterPtr & read_limiter) +void BlobFile::read(char * buffer, size_t offset, size_t size, const ReadLimiterPtr & read_limiter, bool background) { if (unlikely(wrfile->isClosed())) { @@ -66,10 +66,10 @@ void BlobFile::read(char * buffer, size_t offset, size_t size, const ReadLimiter ErrorCodes::LOGICAL_ERROR); } - PageUtil::readFile(wrfile, offset, buffer, size, read_limiter); + PageUtil::readFile(wrfile, offset, buffer, size, read_limiter, background); } -void BlobFile::write(char * buffer, size_t offset, size_t size, const WriteLimiterPtr & write_limiter) +void BlobFile::write(char * buffer, size_t offset, size_t size, const WriteLimiterPtr & write_limiter, bool background) { /** * Precautions: @@ -92,9 +92,9 @@ void BlobFile::write(char * buffer, size_t offset, size_t size, const WriteLimit }); #ifndef NDEBUG - PageUtil::writeFile(wrfile, offset, buffer, size, write_limiter, true); + PageUtil::writeFile(wrfile, offset, buffer, size, write_limiter, background, true); #else - PageUtil::writeFile(wrfile, offset, buffer, size, write_limiter, false); + PageUtil::writeFile(wrfile, offset, buffer, size, write_limiter, background, false); #endif PageUtil::syncFile(wrfile); diff --git a/dbms/src/Storages/Page/V3/BlobFile.h b/dbms/src/Storages/Page/V3/BlobFile.h index a9dfa26a679..4aee0695ee0 100644 --- a/dbms/src/Storages/Page/V3/BlobFile.h +++ b/dbms/src/Storages/Page/V3/BlobFile.h @@ -49,9 +49,9 @@ class BlobFile return EncryptionPath(getPath(), ""); } - void read(char * buffer, size_t offset, size_t size, const ReadLimiterPtr & read_limiter); + void read(char * buffer, size_t offset, size_t size, const ReadLimiterPtr & read_limiter, bool background = false); - void write(char * buffer, size_t offset, size_t size, const WriteLimiterPtr & write_limiter); + void write(char * buffer, size_t offset, size_t size, const WriteLimiterPtr & write_limiter, bool background = false); void truncate(size_t size); diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 0b7fb1669ff..362a82b354f 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -593,11 +593,11 @@ Page BlobStore::read(const PageIDAndEntryV3 & id_entry, const ReadLimiterPtr & r return page; } -BlobFilePtr BlobStore::read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter) +BlobFilePtr BlobStore::read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter, bool background) { assert(buffers != nullptr); auto blob_file = getBlobFile(blob_id); - blob_file->read(buffers, offset, size, read_limiter); + blob_file->read(buffers, offset, size, read_limiter, background); return blob_file; } @@ -790,7 +790,7 @@ PageEntriesEdit BlobStore::gc(std::map & file_offset, data_size, total_page_size); - blob_file->write(data_beg, file_offset, data_size, write_limiter); + blob_file->write(data_beg, file_offset, data_size, write_limiter, /*background*/ true); } catch (DB::Exception & e) { @@ -845,7 +845,7 @@ PageEntriesEdit BlobStore::gc(std::map & PageEntryV3 new_entry; - read(file_id, entry.offset, data_pos, entry.size, read_limiter); + read(file_id, entry.offset, data_pos, entry.size, read_limiter, /*background*/ true); // No need do crc again, crc won't be changed. new_entry.checksum = entry.checksum; diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index a289081acab..ce980c6edc3 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -249,7 +249,7 @@ class BlobStore : private Allocator private: #endif - BlobFilePtr read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter = nullptr); + BlobFilePtr read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter = nullptr, bool background = false); /** * Ask BlobStats to get a span from BlobStat. diff --git a/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp b/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp index 2eaf01949ac..8bbfb0d51cd 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp +++ b/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp @@ -67,9 +67,9 @@ size_t LogWriter::writtenBytes() const return written_bytes; } -void LogWriter::flush(const WriteLimiterPtr & write_limiter) +void LogWriter::flush(const WriteLimiterPtr & write_limiter, const bool background) { - PageUtil::writeFile(log_file, written_bytes, write_buffer.buffer().begin(), write_buffer.offset(), write_limiter, false); + PageUtil::writeFile(log_file, written_bytes, write_buffer.buffer().begin(), write_buffer.offset(), write_limiter, /*background*/ background, /*enable_failpoint*/ false); log_file->fsync(); written_bytes += write_buffer.offset(); @@ -140,7 +140,7 @@ void LogWriter::addRecord(ReadBuffer & payload, const size_t payload_size, const if (!manual_flush) { - flush(write_limiter); + flush(write_limiter, /* background */ false); } } diff --git a/dbms/src/Storages/Page/V3/LogFile/LogWriter.h b/dbms/src/Storages/Page/V3/LogFile/LogWriter.h index f2bf3bcb8f6..4599c6105fb 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogWriter.h +++ b/dbms/src/Storages/Page/V3/LogFile/LogWriter.h @@ -87,7 +87,7 @@ class LogWriter final : private Allocator void addRecord(ReadBuffer & payload, size_t payload_size, const WriteLimiterPtr & write_limiter = nullptr); - void flush(const WriteLimiterPtr & write_limiter = nullptr); + void flush(const WriteLimiterPtr & write_limiter = nullptr, const bool background = false); void close(); diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 8164ea21944..4b563cbe4a0 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -189,7 +189,7 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire ReadBufferFromString payload(serialized); compact_log->addRecord(payload, serialized.size()); - compact_log->flush(write_limiter); + compact_log->flush(write_limiter, /*background*/ true); compact_log.reset(); // close fd explicitly before renaming file. // Rename it to be a normal log file. diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 807209a56c9..364216a28db 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -4426,13 +4426,20 @@ "legendFormat": "Page", "refId": "B" }, + { + "expr": "sum(rate(tiflash_system_profile_event_PSMBackgroundWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PageBackGround", + "refId": "C" + }, { "expr": "sum(rate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "AIO", - "refId": "C" + "refId": "D" } ], "thresholds": [], @@ -4549,12 +4556,19 @@ "legendFormat": "Page", "refId": "B" }, + { + "expr": "sum(rate(tiflash_system_profile_event_PSMBackgroundReadBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PageBackGround", + "refId": "C" + }, { "expr": "sum(rate(tiflash_system_profile_event_ReadBufferAIOReadBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "AIO", - "refId": "C" + "refId": "D" } ], "thresholds": [], From 28a97fd61472cec770c10641cce9f3b46789f9e3 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Thu, 21 Apr 2022 17:36:05 +0800 Subject: [PATCH 35/79] Interpreter: Mock exchange executors. (#4706) ref pingcap/tiflash#4609 --- dbms/src/Debug/astToExecutor.cpp | 14 +++ dbms/src/Debug/astToExecutor.h | 4 + dbms/src/TestUtils/InterpreterTestUtils.cpp | 1 + dbms/src/TestUtils/InterpreterTestUtils.h | 7 +- dbms/src/TestUtils/mockExecutor.cpp | 77 +++++++++++++--- dbms/src/TestUtils/mockExecutor.h | 33 ++++++- .../TestUtils/tests/gtest_mock_executors.cpp | 89 ++++++++++++++----- 7 files changed, 186 insertions(+), 39 deletions(-) diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index 11b90e60fb9..b6003bbc710 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -1525,4 +1525,18 @@ ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr r return join; } +ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type) +{ + ExecutorPtr exchange_sender = std::make_shared(executor_index, input->output_schema, exchange_type); + exchange_sender->children.push_back(input); + return exchange_sender; +} + + +ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema) +{ + ExecutorPtr exchange_receiver = std::make_shared(executor_index, schema); + return exchange_receiver; +} + } // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index 0de229bccfa..d4e50a0c32b 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -290,4 +290,8 @@ ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr se ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params); +ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); + +ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema); + } // namespace DB \ No newline at end of file diff --git a/dbms/src/TestUtils/InterpreterTestUtils.cpp b/dbms/src/TestUtils/InterpreterTestUtils.cpp index 52ff5e1cb08..dd1a981c504 100644 --- a/dbms/src/TestUtils/InterpreterTestUtils.cpp +++ b/dbms/src/TestUtils/InterpreterTestUtils.cpp @@ -48,6 +48,7 @@ String toTreeString(const tipb::Executor & root_executor, size_t level) auto append_str = [&buffer, &level](const tipb::Executor & executor) { assert(executor.has_executor_id()); + buffer.append(String(level, ' ')); buffer.append(executor.executor_id()).append("\n"); }; diff --git a/dbms/src/TestUtils/InterpreterTestUtils.h b/dbms/src/TestUtils/InterpreterTestUtils.h index e68bbe8ab47..37616911f87 100644 --- a/dbms/src/TestUtils/InterpreterTestUtils.h +++ b/dbms/src/TestUtils/InterpreterTestUtils.h @@ -37,7 +37,6 @@ class MockExecutorTest : public ::testing::Test MockExecutorTest() : context(TiFlashTestEnv::getContext()) {} - static void SetUpTestCase() { try @@ -53,8 +52,7 @@ class MockExecutorTest : public ::testing::Test virtual void initializeContext() { dag_context_ptr = std::make_unique(1024); - context.setDAGContext(dag_context_ptr.get()); - mock_dag_request_context = MockDAGRequestContext(); + context = MockDAGRequestContext(TiFlashTestEnv::getContext()); } DAGContext & getDAGContext() @@ -64,8 +62,7 @@ class MockExecutorTest : public ::testing::Test } protected: - Context context; - MockDAGRequestContext mock_dag_request_context; + MockDAGRequestContext context; std::unique_ptr dag_context_ptr; }; diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 8295d161753..c26c0bd4d87 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include namespace DB::tests { ASTPtr buildColumn(const String & column_name) @@ -34,14 +36,15 @@ ASTPtr buildLiteral(const Field & field) ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) { - std::vector vec; + std::vector vec(order_by_items.size()); + size_t i = 0; for (auto item : order_by_items) { - int direction = item.second ? 1 : -1; + int direction = item.second ? 1 : -1; // todo ASTPtr locale_node; auto order_by_item = std::make_shared(direction, direction, false, locale_node); order_by_item->children.push_back(std::make_shared(item.first)); - vec.push_back(order_by_item); + vec[i++] = order_by_item; } auto exp_list = std::make_shared(); exp_list->children.insert(exp_list->children.end(), vec.begin(), vec.end()); @@ -67,13 +70,13 @@ void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request) } // traval the AST tree to build tipb::Executor recursively. -std::shared_ptr DAGRequestBuilder::build(Context & context) +std::shared_ptr DAGRequestBuilder::build(MockDAGRequestContext & mock_context) { - MPPInfo mpp_info(properties.start_ts, -1, -1, {}, {}); + MPPInfo mpp_info(properties.start_ts, -1, -1, {}, mock_context.receiver_source_task_ids_map); std::shared_ptr dag_request_ptr = std::make_shared(); tipb::DAGRequest & dag_request = *dag_request_ptr; initDAGRequest(dag_request); - root->toTiPBExecutor(dag_request.mutable_root_executor(), properties.collator, mpp_info, context); + root->toTiPBExecutor(dag_request.mutable_root_executor(), properties.collator, mpp_info, mock_context.context); root.reset(); executor_index = 0; return dag_request_ptr; @@ -96,7 +99,7 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String return *this; } -DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const std::vector> & columns) +DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfos & columns) { return mockTable(name.first, name.second, columns); } @@ -106,6 +109,31 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, con return mockTable(name.first, name.second, columns); } +DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfos & columns) +{ + return buildExchangeReceiver(columns); +} + +DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfoList & columns) +{ + return buildExchangeReceiver(columns); +} + +DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInfos & columns) +{ + DAGSchema schema; + for (const auto & column : columns) + { + TiDB::ColumnInfo info; + info.tp = column.second; + info.name = column.first; + schema.push_back({column.first, info}); + } + + root = compileExchangeReceiver(getExecutorIndex(), schema); + return *this; +} + DAGRequestBuilder & DAGRequestBuilder::filter(ASTPtr filter_expr) { assert(root); @@ -185,6 +213,13 @@ DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNames col_names) return *this; } +DAGRequestBuilder & DAGRequestBuilder::exchangeSender(tipb::ExchangeType exchange_type) +{ + assert(root); + root = compileExchangeSender(root, getExecutorIndex(), exchange_type); + return *this; +} + DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, ASTPtr using_expr_list) { return join(right, using_expr_list, ASTTableJoin::Kind::Inner); @@ -230,13 +265,13 @@ DAGRequestBuilder & DAGRequestBuilder::buildAggregation(ASTPtr agg_funcs, ASTPtr return *this; } - void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columns) { - std::vector v_column_info; + std::vector v_column_info(columns.size()); + size_t i = 0; for (const auto & info : columns) { - v_column_info.push_back(std::move(info)); + v_column_info[i++] = std::move(info); } mock_tables[name.first + "." + name.second] = v_column_info; } @@ -251,9 +286,31 @@ void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockC mock_tables[name.first + "." + name.second] = columns; } +void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfos & columns) +{ + exchange_schemas[name] = columns; +} + +void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfoList & columns) +{ + std::vector v_column_info(columns.size()); + size_t i = 0; + for (const auto & info : columns) + { + v_column_info[i++] = std::move(info); + } + exchange_schemas[name] = v_column_info; +} + DAGRequestBuilder MockDAGRequestContext::scan(String db_name, String table_name) { return DAGRequestBuilder(index).mockTable({db_name, table_name}, mock_tables[db_name + "." + table_name]); } +DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name) +{ + auto builder = DAGRequestBuilder(index).exchangeReceiver(exchange_schemas[exchange_name]); + receiver_source_task_ids_map[builder.getRoot()->name] = {}; + return builder; +} } // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 24d2df21f4a..9a3507cc603 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ using MockOrderByItems = std::initializer_list; using MockColumnNames = std::initializer_list; using MockAsts = std::initializer_list; +class MockDAGRequestContext; /** Responsible for Hand write tipb::DAGRequest * Use this class to mock DAGRequest, then feed the DAGRequest into @@ -51,14 +53,23 @@ class DAGRequestBuilder explicit DAGRequestBuilder(size_t & index) : executor_index(index) - {} + { + } + + ExecutorPtr getRoot() + { + return root; + } - std::shared_ptr build(Context & context); + std::shared_ptr build(MockDAGRequestContext & mock_context); DAGRequestBuilder & mockTable(const String & db, const String & table, const MockColumnInfos & columns); DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfos & columns); DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfoList & columns); + DAGRequestBuilder & exchangeReceiver(const MockColumnInfos & columns); + DAGRequestBuilder & exchangeReceiver(const MockColumnInfoList & columns); + DAGRequestBuilder & filter(ASTPtr filter_expr); DAGRequestBuilder & limit(int limit); @@ -73,6 +84,8 @@ class DAGRequestBuilder DAGRequestBuilder & project(MockAsts expr); DAGRequestBuilder & project(MockColumnNames col_names); + DAGRequestBuilder & exchangeSender(tipb::ExchangeType exchange_type); + // Currentlt only support inner join, left join and right join. // TODO support more types of join. DAGRequestBuilder & join(const DAGRequestBuilder & right, ASTPtr using_expr_list); @@ -85,6 +98,7 @@ class DAGRequestBuilder private: void initDAGRequest(tipb::DAGRequest & dag_request); DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs); + DAGRequestBuilder & buildExchangeReceiver(const MockColumnInfos & columns); ExecutorPtr root; DAGProperties properties; @@ -97,7 +111,8 @@ class DAGRequestBuilder class MockDAGRequestContext { public: - MockDAGRequestContext() + explicit MockDAGRequestContext(Context context_) + : context(context_) { index = 0; } @@ -110,12 +125,22 @@ class MockDAGRequestContext void addMockTable(const MockTableName & name, const MockColumnInfoList & columns); void addMockTable(const String & db, const String & table, const MockColumnInfos & columns); void addMockTable(const MockTableName & name, const MockColumnInfos & columns); - + void addExchangeRelationSchema(String name, const MockColumnInfos & columns); + void addExchangeRelationSchema(String name, const MockColumnInfoList & columns); DAGRequestBuilder scan(String db_name, String table_name); + DAGRequestBuilder receive(String exchange_name); private: size_t index; std::unordered_map mock_tables; + std::unordered_map exchange_schemas; + +public: + // Currently don't support task_id, so the following to structure is useless, + // but we need it to contruct the TaskMeta. + // In TiFlash, we use task_id to identify an Mpp Task. + std::unordered_map> receiver_source_task_ids_map; + Context context; }; ASTPtr buildColumn(const String & column_name); diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 2be63311034..1f48ebd0230 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB { @@ -26,23 +27,24 @@ class MockDAGRequestTest : public DB::tests::MockExecutorTest void initializeContext() override { dag_context_ptr = std::make_unique(1024); - context.setDAGContext(dag_context_ptr.get()); - mock_dag_request_context = MockDAGRequestContext(); - mock_dag_request_context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); - mock_dag_request_context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeLong}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); - mock_dag_request_context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"r_c", TiDB::TP::TypeString}}); - mock_dag_request_context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"l_c", TiDB::TP::TypeString}}); + context = MockDAGRequestContext(TiFlashTestEnv::getContext()); + + context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeLong}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"r_c", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"l_c", TiDB::TP::TypeString}}); + context.addExchangeRelationSchema("sender_1", {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); } }; TEST_F(MockDAGRequestTest, MockTable) try { - auto request = mock_dag_request_context.scan("test_db", "test_table").build(context); + auto request = context.scan("test_db", "test_table").build(context); String expected_string_1 = "table_scan_0\n"; ASSERT_DAGREQUEST_EQAUL(expected_string_1, request); - request = mock_dag_request_context.scan("test_db", "test_table_1").build(context); + request = context.scan("test_db", "test_table_1").build(context); String expected_string_2 = "table_scan_0\n"; ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); } @@ -51,12 +53,12 @@ CATCH TEST_F(MockDAGRequestTest, Filter) try { - auto request = mock_dag_request_context.scan("test_db", "test_table").filter(eq(col("s1"), col("s2"))).build(context); + auto request = context.scan("test_db", "test_table").filter(eq(col("s1"), col("s2"))).build(context); String expected_string = "selection_1\n" " table_scan_0\n"; ASSERT_DAGREQUEST_EQAUL(expected_string, request); - request = mock_dag_request_context.scan("test_db", "test_table_1") + request = context.scan("test_db", "test_table_1") .filter(And(eq(col("s1"), col("s2")), lt(col("s2"), col("s3")))) .build(context); ASSERT_DAGREQUEST_EQAUL(expected_string, request); @@ -66,21 +68,21 @@ CATCH TEST_F(MockDAGRequestTest, Projection) try { - auto request = mock_dag_request_context.scan("test_db", "test_table") + auto request = context.scan("test_db", "test_table") .project("s1") .build(context); String expected_string = "project_1\n" " table_scan_0\n"; ASSERT_DAGREQUEST_EQAUL(expected_string, request); - request = mock_dag_request_context.scan("test_db", "test_table_1") + request = context.scan("test_db", "test_table_1") .project({col("s3"), eq(col("s1"), col("s2"))}) .build(context); String expected_string_2 = "project_1\n" " table_scan_0\n"; ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); - request = mock_dag_request_context.scan("test_db", "test_table_1") + request = context.scan("test_db", "test_table_1") .project({"s1", "s2"}) .build(context); ASSERT_DAGREQUEST_EQAUL(expected_string, request); @@ -90,14 +92,14 @@ CATCH TEST_F(MockDAGRequestTest, Limit) try { - auto request = mock_dag_request_context.scan("test_db", "test_table") + auto request = context.scan("test_db", "test_table") .limit(10) .build(context); String expected_string = "limit_1\n" " table_scan_0\n"; ASSERT_DAGREQUEST_EQAUL(expected_string, request); - request = mock_dag_request_context.scan("test_db", "test_table_1") + request = context.scan("test_db", "test_table_1") .limit(lit(Field(static_cast(10)))) .build(context); ASSERT_DAGREQUEST_EQAUL(expected_string, request); @@ -107,14 +109,14 @@ CATCH TEST_F(MockDAGRequestTest, TopN) try { - auto request = mock_dag_request_context.scan("test_db", "test_table") + auto request = context.scan("test_db", "test_table") .topN({{"s1", false}}, 10) .build(context); String expected_string = "topn_1\n" " table_scan_0\n"; ASSERT_DAGREQUEST_EQAUL(expected_string, request); - request = mock_dag_request_context.scan("test_db", "test_table") + request = context.scan("test_db", "test_table") .topN("s1", false, 10) .build(context); ASSERT_DAGREQUEST_EQAUL(expected_string, request); @@ -124,7 +126,7 @@ CATCH TEST_F(MockDAGRequestTest, Aggregation) try { - auto request = mock_dag_request_context.scan("test_db", "test_table") + auto request = context.scan("test_db", "test_table") .aggregation(Max(col("s1")), col("s2")) .build(context); String expected_string = "aggregation_1\n" @@ -136,13 +138,13 @@ CATCH TEST_F(MockDAGRequestTest, Join) try { - DAGRequestBuilder right_builder = mock_dag_request_context.scan("test_db", "r_table") + DAGRequestBuilder right_builder = context.scan("test_db", "r_table") .filter(eq(col("r_a"), col("r_b"))) .project({col("r_a"), col("r_b")}) .aggregation(Max(col("r_a")), col("r_b")); - DAGRequestBuilder left_builder = mock_dag_request_context.scan("test_db", "l_table") + DAGRequestBuilder left_builder = context.scan("test_db", "l_table") .topN({{"l_a", false}}, 10) .join(right_builder, col("l_a"), ASTTableJoin::Kind::Left) .limit(10); @@ -160,5 +162,52 @@ try } CATCH +TEST_F(MockDAGRequestTest, ExchangeSender) +try +{ + auto request = context.scan("test_db", "test_table") + .exchangeSender(tipb::PassThrough) + .build(context); + String expected_string = "exchange_sender_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = context.scan("test_db", "test_table") + .topN("s1", false, 10) + .exchangeSender(tipb::Broadcast) + .build(context); + String expected_string_2 = "exchange_sender_2\n" + " topn_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); + + request = context.scan("test_db", "test_table") + .project({col("s1"), col("s2")}) + .exchangeSender(tipb::Hash) + .build(context); + String expected_string_3 = "exchange_sender_2\n" + " project_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_3, request); +} +CATCH + +TEST_F(MockDAGRequestTest, ExchangeReceiver) +try +{ + auto request = context.receive("sender_1") + .build(context); + String expected_string = "exchange_receiver_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = context.receive("sender_1") + .topN("s1", false, 10) + .build(context); + String expected_string_2 = "topn_1\n" + " exchange_receiver_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); +} +CATCH + } // namespace tests } // namespace DB \ No newline at end of file From d1e2584ee01880cbbff6fab3e2acc825573ce452 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Thu, 21 Apr 2022 18:54:04 +0800 Subject: [PATCH 36/79] Fix pagestorage v3 ddl problem. (#4691) ref pingcap/tiflash#3594 --- dbms/src/Server/StorageConfigParser.cpp | 15 +++- dbms/src/Storages/Page/PageDefines.h | 6 +- dbms/src/Storages/Page/V3/BlobStore.cpp | 59 ++++++++++++++- .../Storages/Page/V3/LogFile/LogWriter.cpp | 2 +- dbms/src/Storages/Page/V3/PageDirectory.cpp | 4 +- dbms/src/Storages/Page/V3/PageEntriesEdit.h | 3 + dbms/src/Storages/Page/V3/PageEntry.h | 14 ++++ .../Page/V3/tests/gtest_blob_store.cpp | 74 ++++++++++++++++++- .../Page/V3/tests/gtest_page_storage.cpp | 13 ++++ dbms/src/Storages/Page/WriteBatch.h | 44 +++++++---- 10 files changed, 206 insertions(+), 28 deletions(-) diff --git a/dbms/src/Server/StorageConfigParser.cpp b/dbms/src/Server/StorageConfigParser.cpp index a3c2d7aa9e9..89bb49da33a 100644 --- a/dbms/src/Server/StorageConfigParser.cpp +++ b/dbms/src/Server/StorageConfigParser.cpp @@ -202,11 +202,20 @@ void TiFlashStorageConfig::parseMisc(const String & storage_section, Poco::Logge lazily_init_store = (*lazily_init != 0); } - // config for experimental feature, may remove later - if (auto enable_v3 = table->get_qualified_as("enable_ps_v3"); enable_v3) + + if (table->contains("enable_ps_v3")) { - enable_ps_v3 = (*enable_v3 != 0); + if (auto enable_v3 = table->get_qualified_as("enable_ps_v3"); enable_v3) + { + enable_ps_v3 = (*enable_v3 != 0); + } } + else + { + // default open enable_ps_v3 + enable_ps_v3 = true; + } + LOG_FMT_INFO(log, "format_version {} lazily_init_store {} enable_ps_v3 {}", format_version, lazily_init_store, enable_ps_v3); } diff --git a/dbms/src/Storages/Page/PageDefines.h b/dbms/src/Storages/Page/PageDefines.h index bd23ad89278..1b3d3a90331 100644 --- a/dbms/src/Storages/Page/PageDefines.h +++ b/dbms/src/Storages/Page/PageDefines.h @@ -80,7 +80,11 @@ struct ByteBuffer { using Pos = char *; - ByteBuffer() = default; + ByteBuffer() + : begin_pos(nullptr) + , end_pos(nullptr) + {} + ByteBuffer(Pos begin_pos_, Pos end_pos_) : begin_pos(begin_pos_) , end_pos(end_pos_) diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 362a82b354f..526c0d27e25 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -372,6 +372,21 @@ void BlobStore::read(PageIDAndEntriesV3 & entries, const PageHandler & handler, for (const auto & p : entries) buf_size = std::max(buf_size, p.second.size); + // When we read `WriteBatch` which is `WriteType::PUT_EXTERNAL`. + // The `buf_size` will be 0, we need avoid calling malloc/free with size 0. + if (buf_size == 0) + { + for (const auto & [page_id_v3, entry] : entries) + { + (void)entry; + LOG_FMT_DEBUG(log, "Read entry [page_id={}] without entry size.", page_id_v3); + Page page; + page.page_id = page_id_v3.low; + handler(page_id_v3.low, page); + } + return; + } + char * data_buf = static_cast(alloc(buf_size)); MemHolder mem_holder = createMemHolder(data_buf, [&, buf_size](char * p) { free(p, buf_size); @@ -418,11 +433,23 @@ PageMap BlobStore::read(FieldReadInfos & to_read, const ReadLimiterPtr & read_li [](const FieldReadInfo & a, const FieldReadInfo & b) { return a.entry.offset < b.entry.offset; }); // allocate data_buf that can hold all pages with specify fields + size_t buf_size = 0; for (auto & [page_id, entry, fields] : to_read) { (void)page_id; - buf_size += entry.size; + // Sort fields to get better read on disk + std::sort(fields.begin(), fields.end()); + for (const auto field_index : fields) + { + buf_size += entry.getFieldSize(field_index); + } + } + + // Read with `FieldReadInfos`, buf_size must not be 0. + if (buf_size == 0) + { + throw Exception("Reading with fields but entry size is 0.", ErrorCodes::LOGICAL_ERROR); } char * data_buf = static_cast(alloc(buf_size)); @@ -476,13 +503,13 @@ PageMap BlobStore::read(FieldReadInfos & to_read, const ReadLimiterPtr & read_li Page page; page.page_id = page_id_v3.low; - page.data = ByteBuffer(pos, pos + entry.size); + page.data = ByteBuffer(pos, write_offset); page.mem_holder = mem_holder; page.field_offsets.swap(fields_offset_in_page); fields_offset_in_page.clear(); page_map.emplace(page_id_v3.low, std::move(page)); - pos += entry.size; + pos = write_offset; } if (unlikely(pos != data_buf + buf_size)) @@ -509,6 +536,22 @@ PageMap BlobStore::read(PageIDAndEntriesV3 & entries, const ReadLimiterPtr & rea buf_size += p.second.size; } + // When we read `WriteBatch` which is `WriteType::PUT_EXTERNAL`. + // The `buf_size` will be 0, we need avoid calling malloc/free with size 0. + if (buf_size == 0) + { + PageMap page_map; + for (const auto & [page_id_v3, entry] : entries) + { + (void)entry; + LOG_FMT_DEBUG(log, "Read entry [page_id={}] without entry size.", page_id_v3); + Page page; + page.page_id = page_id_v3.low; + page_map.emplace(page_id_v3.low, page); + } + return page_map; + } + char * data_buf = static_cast(alloc(buf_size)); MemHolder mem_holder = createMemHolder(data_buf, [&, buf_size](char * p) { free(p, buf_size); @@ -561,6 +604,16 @@ Page BlobStore::read(const PageIDAndEntryV3 & id_entry, const ReadLimiterPtr & r const auto & [page_id_v3, entry] = id_entry; const size_t buf_size = entry.size; + // When we read `WriteBatch` which is `WriteType::PUT_EXTERNAL`. + // The `buf_size` will be 0, we need avoid calling malloc/free with size 0. + if (buf_size == 0) + { + LOG_FMT_DEBUG(log, "Read entry [page_id={}] without entry size.", page_id_v3); + Page page; + page.page_id = page_id_v3.low; + return page; + } + char * data_buf = static_cast(alloc(buf_size)); MemHolder mem_holder = createMemHolder(data_buf, [&, buf_size](char * p) { free(p, buf_size); diff --git a/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp b/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp index 8bbfb0d51cd..be14637415e 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp +++ b/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp @@ -152,7 +152,7 @@ void LogWriter::emitPhysicalRecord(Format::RecordType type, ReadBuffer & payload static_assert(Format::RECYCLABLE_HEADER_SIZE > Format::CHECKSUM_FIELD_SIZE, "Header size must be greater than the checksum size"); static_assert(Format::RECYCLABLE_HEADER_SIZE > Format::HEADER_SIZE, "Ensure the min buffer size for physical record"); constexpr static size_t HEADER_BUFF_SIZE = Format::RECYCLABLE_HEADER_SIZE - Format::CHECKSUM_FIELD_SIZE; - char buf[HEADER_BUFF_SIZE]; + char buf[HEADER_BUFF_SIZE] = {0}; WriteBuffer header_buff(buf, HEADER_BUFF_SIZE); // Format the header diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index de9ec0cc03e..4c7214da466 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -322,7 +322,7 @@ VersionedPageEntries::resolveToPageId(UInt64 seq, bool check_prev, PageEntryV3 * else if (type == EditRecordType::VAR_EXTERNAL) { // We may add reference to an external id even if it is logically deleted. - bool ok = check_prev ? true : (!is_deleted || (is_deleted && seq < delete_ver.sequence)); + bool ok = check_prev ? true : (!is_deleted || seq < delete_ver.sequence); if (create_ver.sequence <= seq && ok) { return {RESOLVE_TO_NORMAL, buildV3Id(0, 0), PageVersionType(0)}; @@ -330,7 +330,7 @@ VersionedPageEntries::resolveToPageId(UInt64 seq, bool check_prev, PageEntryV3 * } else if (type == EditRecordType::VAR_REF) { - if (create_ver.sequence <= seq && (!is_deleted || (is_deleted && seq < delete_ver.sequence))) + if (create_ver.sequence <= seq && (!is_deleted || seq < delete_ver.sequence)) { return {RESOLVE_TO_REF, ori_page_id, create_ver}; } diff --git a/dbms/src/Storages/Page/V3/PageEntriesEdit.h b/dbms/src/Storages/Page/V3/PageEntriesEdit.h index 0db95850b8e..84fa18bd5d8 100644 --- a/dbms/src/Storages/Page/V3/PageEntriesEdit.h +++ b/dbms/src/Storages/Page/V3/PageEntriesEdit.h @@ -104,6 +104,8 @@ inline const char * typeToString(EditRecordType t) return "VAR_EXT"; case EditRecordType::VAR_DELETE: return "VAR_DEL"; + default: + return "INVALID"; } } @@ -220,6 +222,7 @@ class PageEntriesEdit EditRecord() : page_id(0) , ori_page_id(0) + , version(0, 0) , being_ref_count(1) {} }; diff --git a/dbms/src/Storages/Page/V3/PageEntry.h b/dbms/src/Storages/Page/V3/PageEntry.h index 0faf4a7dd29..d8ed1b6e9a5 100644 --- a/dbms/src/Storages/Page/V3/PageEntry.h +++ b/dbms/src/Storages/Page/V3/PageEntry.h @@ -40,6 +40,20 @@ struct PageEntryV3 PageFieldOffsetChecksums field_offsets{}; public: + size_t getFieldSize(size_t index) const + { + if (unlikely(index >= field_offsets.size())) + throw Exception(fmt::format("Try to getFieldData of PageEntry [blob_id={}] with invalid [index={}] [fields size={}]", + file_id, + index, + field_offsets.size()), + ErrorCodes::LOGICAL_ERROR); + else if (index == field_offsets.size() - 1) + return size - field_offsets.back().first; + else + return field_offsets[index + 1].first - field_offsets[index].first; + } + // Return field{index} offsets: [begin, end) of page data. std::pair getFieldOffsets(size_t index) const { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index 22c81cc76f3..a141e21ee2a 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -572,6 +572,74 @@ TEST_F(BlobStoreTest, testWriteRead) ASSERT_EQ(index, buff_nums); } +TEST_F(BlobStoreTest, testWriteReadWithFiled) +try +{ + const auto file_provider = DB::tests::TiFlashTestEnv::getContext().getFileProvider(); + + PageId page_id1 = 50; + PageId page_id2 = 51; + PageId page_id3 = 53; + + size_t buff_size = 120; + WriteBatch wb; + + auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); + char c_buff[buff_size]; + + for (size_t j = 0; j < buff_size; ++j) + { + c_buff[j] = static_cast(j & 0xff); + } + + ReadBufferPtr buff1 = std::make_shared(const_cast(c_buff), buff_size); + ReadBufferPtr buff2 = std::make_shared(const_cast(c_buff), buff_size); + ReadBufferPtr buff3 = std::make_shared(const_cast(c_buff), buff_size); + wb.putPage(page_id1, /* tag */ 0, buff1, buff_size, {20, 40, 40, 20}); + wb.putPage(page_id2, /* tag */ 0, buff2, buff_size, {10, 50, 20, 20, 20}); + wb.putPage(page_id3, /* tag */ 0, buff3, buff_size, {10, 5, 20, 20, 15, 5, 15, 30}); + PageEntriesEdit edit = blob_store.write(wb, nullptr); + ASSERT_EQ(edit.size(), 3); + + BlobStore::FieldReadInfo read_info1(buildV3Id(TEST_NAMESPACE_ID, page_id1), edit.getRecords()[0].entry, {0, 1, 2, 3}); + BlobStore::FieldReadInfo read_info2(buildV3Id(TEST_NAMESPACE_ID, page_id2), edit.getRecords()[1].entry, {2, 4}); + BlobStore::FieldReadInfo read_info3(buildV3Id(TEST_NAMESPACE_ID, page_id3), edit.getRecords()[2].entry, {1, 3}); + + BlobStore::FieldReadInfos read_infos = {read_info1, read_info2, read_info3}; + + const auto & page_map = blob_store.read(read_infos, nullptr); + ASSERT_EQ(page_map.size(), 3); + + for (const auto & [pageid, page] : page_map) + { + if (pageid == page_id1) + { + ASSERT_EQ(page.page_id, page_id1); + ASSERT_EQ(page.data.size(), buff_size); + ASSERT_EQ(strncmp(page.data.begin(), c_buff, buff_size), 0); + } + else if (pageid == page_id2) + { + ASSERT_EQ(page.page_id, page_id2); + // the buffer size read is equal to the fields size we read + // field {2, 4} + ASSERT_EQ(page.data.size(), 40); + ASSERT_EQ(strncmp(page.data.begin(), &c_buff[60], 20), 0); + ASSERT_EQ(strncmp(&page.data.begin()[20], &c_buff[100], 20), 0); + } + else if (pageid == page_id3) + { + ASSERT_EQ(page.page_id, page_id3); + // the buffer size read is equal to the fields size we read + // field {1, 3} + ASSERT_EQ(page.data.size(), 25); + ASSERT_EQ(strncmp(page.data.begin(), &c_buff[10], 5), 0); + ASSERT_EQ(strncmp(&page.data.begin()[5], &c_buff[35], 20), 0); + } + } +} +CATCH + TEST_F(BlobStoreTest, testFeildOffsetWriteRead) { const auto file_provider = DB::tests::TiFlashTestEnv::getContext().getFileProvider(); @@ -651,8 +719,8 @@ try const size_t buff_size = 1024; WriteBatch wb; { - char c_buff1[buff_size]; - char c_buff2[buff_size]; + char c_buff1[buff_size] = {0}; + char c_buff2[buff_size] = {0}; for (size_t i = 0; i < buff_size; ++i) { @@ -777,7 +845,7 @@ TEST_F(BlobStoreTest, testWriteOutOfLimitSize) config.file_limit_size = buff_size; size_t buffer_sizes[] = {buff_size, buff_size - 1, buff_size / 2 + 1}; - for (auto & buf_size : buffer_sizes) + for (const auto & buf_size : buffer_sizes) { auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 5b6ffe51779..ee23f244725 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -109,6 +109,19 @@ try } CATCH +TEST_F(PageStorageTest, ReadNULL) +try +{ + { + WriteBatch batch; + batch.putExternal(0, 0); + page_storage->write(std::move(batch)); + } + const auto & page = page_storage->read(0); + ASSERT_EQ(page.data.begin(), nullptr); +} +CATCH + TEST_F(PageStorageTest, WriteMultipleBatchRead1) try { diff --git a/dbms/src/Storages/Page/WriteBatch.h b/dbms/src/Storages/Page/WriteBatch.h index 69da8d96cef..bde03c4de57 100644 --- a/dbms/src/Storages/Page/WriteBatch.h +++ b/dbms/src/Storages/Page/WriteBatch.h @@ -203,21 +203,35 @@ class WriteBatch : private boost::noncopyable String toString() const { - String str; - for (const auto & w : writes) - { - if (w.type == WriteType::PUT) - str += DB::toString(w.page_id) + ","; - else if (w.type == WriteType::REF) - str += DB::toString(w.page_id) + ">" + DB::toString(w.ori_page_id) + ","; - else if (w.type == WriteType::DEL) - str += "X" + DB::toString(w.page_id) + ","; - else if (w.type == WriteType::UPSERT) - str += "U" + DB::toString(w.page_id) + ","; - } - if (!str.empty()) - str.erase(str.size() - 1); - return str; + FmtBuffer fmt_buffer; + fmt_buffer.joinStr( + writes.begin(), + writes.end(), + [this](const auto w, FmtBuffer & fb) { + switch (w.type) + { + case WriteType::PUT: + fb.fmtAppend("{}.{}", namespace_id, w.page_id); + break; + case WriteType::REF: + fb.fmtAppend("{}.{} > {}.{}", namespace_id, w.page_id, namespace_id, w.ori_page_id); + break; + case WriteType::DEL: + fb.fmtAppend("X{}.{}", namespace_id, w.page_id); + break; + case WriteType::UPSERT: + fb.fmtAppend("U{}.{}", namespace_id, w.page_id); + break; + case WriteType::PUT_EXTERNAL: + fb.fmtAppend("E{}.{}", namespace_id, w.page_id); + break; + default: + fb.fmtAppend("Unknow {}.{}", namespace_id, w.page_id); + break; + }; + }, + ","); + return fmt_buffer.toString(); } private: From abb313953973dd955daad63724a2cf22bb94e264 Mon Sep 17 00:00:00 2001 From: Meng Xin Date: Fri, 22 Apr 2022 11:58:04 +0800 Subject: [PATCH 37/79] support row_number, rank, dense_rank in tiflash (#4552) close pingcap/tiflash#4200 --- contrib/tipb | 2 +- dbms/CMakeLists.txt | 1 + dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Common/Exception.h | 9 + dbms/src/Common/TiFlashMetrics.h | 3 +- dbms/src/Core/Block.cpp | 11 + dbms/src/Core/Block.h | 1 + .../DataStreams/WindowBlockInputStream.cpp | 577 ++++++++++++++++++ dbms/src/DataStreams/WindowBlockInputStream.h | 252 ++++++++ dbms/src/Debug/astToExecutor.h | 2 + .../Coprocessor/DAGExpressionAnalyzer.cpp | 201 +++++- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 21 +- dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp | 17 +- dbms/src/Flash/Coprocessor/DAGQueryBlock.h | 1 + .../Coprocessor/DAGQueryBlockInterpreter.cpp | 73 ++- .../Coprocessor/DAGQueryBlockInterpreter.h | 9 + dbms/src/Flash/Coprocessor/DAGUtils.cpp | 65 +- dbms/src/Flash/Coprocessor/DAGUtils.h | 3 + dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 10 + .../Coprocessor/collectOutputFieldTypes.cpp | 5 + .../src/Flash/Statistics/CommonExecutorImpl.h | 26 + .../ExecutorStatisticsCollector.cpp | 4 +- .../Flash/Statistics/traverseExecutors.cpp | 4 + dbms/src/Interpreters/WindowDescription.cpp | 63 ++ dbms/src/Interpreters/WindowDescription.h | 117 ++++ dbms/src/Interpreters/convertFieldToType.cpp | 30 +- dbms/src/Interpreters/convertFieldToType.h | 4 + dbms/src/Server/Client.cpp | 426 +++++++------ dbms/src/Server/Server.cpp | 2 + dbms/src/TestUtils/FunctionTestUtils.cpp | 18 + dbms/src/TestUtils/FunctionTestUtils.h | 5 + .../MockTableScanBlockInputStream.cpp | 62 ++ .../TestUtils/MockTableScanBlockInputStream.h | 40 ++ dbms/src/WindowFunctions/IWindowFunction.cpp | 109 ++++ dbms/src/WindowFunctions/IWindowFunction.h | 58 ++ .../WindowFunctions/WindowFunctionFactory.cpp | 80 +++ .../WindowFunctions/WindowFunctionFactory.h | 70 +++ .../registerWindowFunctions.cpp | 27 + .../WindowFunctions/registerWindowFunctions.h | 21 + .../tests/gtest_window_functions.cpp | 345 +++++++++++ 40 files changed, 2556 insertions(+), 219 deletions(-) create mode 100644 dbms/src/DataStreams/WindowBlockInputStream.cpp create mode 100644 dbms/src/DataStreams/WindowBlockInputStream.h create mode 100644 dbms/src/Interpreters/WindowDescription.cpp create mode 100644 dbms/src/Interpreters/WindowDescription.h create mode 100644 dbms/src/TestUtils/MockTableScanBlockInputStream.cpp create mode 100644 dbms/src/TestUtils/MockTableScanBlockInputStream.h create mode 100644 dbms/src/WindowFunctions/IWindowFunction.cpp create mode 100644 dbms/src/WindowFunctions/IWindowFunction.h create mode 100644 dbms/src/WindowFunctions/WindowFunctionFactory.cpp create mode 100644 dbms/src/WindowFunctions/WindowFunctionFactory.h create mode 100644 dbms/src/WindowFunctions/registerWindowFunctions.cpp create mode 100644 dbms/src/WindowFunctions/registerWindowFunctions.h create mode 100644 dbms/src/WindowFunctions/tests/gtest_window_functions.cpp diff --git a/contrib/tipb b/contrib/tipb index d12dec7a760..bfb5c2c5518 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit d12dec7a76095ac7c1db102948e7bf9ebaa970c1 +Subproject commit bfb5c2c55188c254018d3cf77bfad73b4d4b77ec diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 91f9aeb93c6..718d18c4954 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -89,6 +89,7 @@ add_headers_and_sources(dbms src/Storages/Page/V2) add_headers_and_sources(dbms src/Storages/Page/V2/mvcc) add_headers_and_sources(dbms src/Storages/Page/V2/VersionSet) add_headers_and_sources(dbms src/Storages/Page/V2/gc) +add_headers_and_sources(dbms src/WindowFunctions) if (ENABLE_V3_PAGESTORAGE) add_headers_and_sources(dbms src/Storages/Page/V3) add_headers_and_sources(dbms src/Storages/Page/V3/LogFile) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index a59c5edc1d0..40c14539644 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -389,6 +389,7 @@ extern const int CANNOT_MPROTECT = 445; extern const int DECIMAL_OVERFLOW = 446; extern const int CANNOT_PARSE_BOOL = 447; extern const int CANNOT_FTRUNCATE = 448; +extern const int UNKNOWN_WINDOW_FUNCTION = 449; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index ab2d0b15ae0..3322c99bce0 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -16,11 +16,13 @@ #include #include +#include #include #include #include + namespace Poco { class Logger; @@ -39,6 +41,13 @@ class Exception : public Poco::Exception explicit Exception(const std::string & msg, int code = 0) : Poco::Exception(msg, code) {} + + // Format message with fmt::format, like the logging functions. + template + Exception(int code, const std::string & fmt, Args &&... args) + : Exception(fmt::format(fmt, std::forward(args)...), code) + {} + Exception(const std::string & msg, const std::string & arg, int code = 0) : Poco::Exception(msg, arg, code) {} diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 11268bdf035..b67b263a617 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -55,7 +55,8 @@ namespace DB F(type_sel, {"type", "selection"}), F(type_agg, {"type", "aggregation"}), F(type_topn, {"type", "top_n"}), \ F(type_limit, {"type", "limit"}), F(type_join, {"type", "join"}), F(type_exchange_sender, {"type", "exchange_sender"}), \ F(type_exchange_receiver, {"type", "exchange_receiver"}), F(type_projection, {"type", "projection"}), \ - F(type_partition_ts, {"type", "partition_table_scan"})) \ + F(type_partition_ts, {"type", "partition_table_scan"}), \ + F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"})) \ M(tiflash_coprocessor_request_duration_seconds, "Bucketed histogram of request duration", Histogram, \ F(type_batch, {{"type", "batch"}}, ExpBuckets{0.0005, 2, 30}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.0005, 2, 30}), \ F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.0005, 2, 30}), \ diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 3ade8d9ffff..28db7af82e1 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -369,6 +369,17 @@ Block Block::cloneWithColumns(MutableColumns && columns) const return res; } +Block Block::cloneWithColumns(Columns && columns) const +{ + Block res; + + size_t num_columns = data.size(); + for (size_t i = 0; i < num_columns; ++i) + res.insert({std::move(columns[i]), data[i].type, data[i].name, data[i].column_id}); + + return res; +} + Block Block::sortColumns() const { diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index dcdae4d1baa..713ae85d082 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -132,6 +132,7 @@ class Block /** Replace columns in a block */ void setColumns(MutableColumns && columns); Block cloneWithColumns(MutableColumns && columns) const; + Block cloneWithColumns(Columns && columns) const; /** Get a block with columns that have been rearranged in the order of their names. */ Block sortColumns() const; diff --git a/dbms/src/DataStreams/WindowBlockInputStream.cpp b/dbms/src/DataStreams/WindowBlockInputStream.cpp new file mode 100644 index 00000000000..8d9fb13cbc5 --- /dev/null +++ b/dbms/src/DataStreams/WindowBlockInputStream.cpp @@ -0,0 +1,577 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int NOT_IMPLEMENTED; +} // namespace ErrorCodes + +WindowBlockInputStream::WindowBlockInputStream(const BlockInputStreamPtr & input, const WindowDescription & window_description_, const String & req_id) + : log(Logger::get(NAME, req_id)) + , window_description(window_description_) +{ + children.push_back(input); + output_header = input->getHeader(); + for (const auto & add_column : window_description_.add_columns) + { + output_header.insert({add_column.type, add_column.name}); + } + + initialWorkspaces(); + + initialPartitionAndOrderColumnIndices(); +} + + +void WindowBlockInputStream::initialPartitionAndOrderColumnIndices() +{ + partition_column_indices.reserve(window_description.partition_by.size()); + for (const auto & column : window_description.partition_by) + { + partition_column_indices.push_back( + output_header.getPositionByName(column.column_name)); + } + + order_column_indices.reserve(window_description.order_by.size()); + for (const auto & column : window_description.order_by) + { + order_column_indices.push_back( + output_header.getPositionByName(column.column_name)); + } +} + +void WindowBlockInputStream::initialWorkspaces() +{ + // Initialize window function workspaces. + workspaces.reserve(window_description.window_functions_descriptions.size()); + + for (const auto & window_function_description : window_description.window_functions_descriptions) + { + WindowFunctionWorkspace workspace; + workspace.window_function = window_function_description.window_function; + workspaces.push_back(std::move(workspace)); + } + only_have_row_number = onlyHaveRowNumber(); + only_have_pure_window = onlyHaveRowNumberAndRank(); +} + +Block WindowBlockInputStream::readImpl() +{ + const auto & stream = children.back(); + while (!input_is_finished) + { + if (Block output_block = tryGetOutputBlock()) + { + return output_block; + } + + Block block = stream->read(); + if (!block) + input_is_finished = true; + else + appendBlock(block); + tryCalculate(); + } + + // return last partition block, if already return then return null + return tryGetOutputBlock(); +} + +// Judge whether current_partition_row is end row of partition in current block +bool WindowBlockInputStream::isDifferentFromPrevPartition(UInt64 current_partition_row) +{ + const auto reference_columns = inputAt(prev_frame_start); + const auto compared_columns = inputAt(partition_end); + + for (size_t i = 0; i < partition_column_indices.size(); ++i) + { + const auto reference_column = reference_columns[partition_column_indices[i]]; + const auto * compared_column = compared_columns[partition_column_indices[i]].get(); + if (window_description.partition_by[i].collator) + { + if (compared_column->compareAtWithCollation(current_partition_row, + prev_frame_start.row, + *reference_column, + 1 /* nan_direction_hint */, + *window_description.partition_by[i].collator) + != 0) + { + return true; + } + } + else + { + if (compared_column->compareAt(current_partition_row, + prev_frame_start.row, + *reference_column, + 1 /* nan_direction_hint */) + != 0) + { + return true; + } + } + } + return false; +} + +void WindowBlockInputStream::advancePartitionEnd() +{ + RUNTIME_ASSERT(!partition_ended, log, "partition_ended should be false here."); + const RowNumber end = blocksEnd(); + + // If we're at the total end of data, we must end the partition. This is one + // of the few places in calculations where we need special handling for end + // of data, other places will work as usual based on + // `partition_ended` = true, because end of data is logically the same as + // any other end of partition. + // We must check this first, because other calculations might not be valid + // when we're at the end of data. + if (input_is_finished) + { + partition_ended = true; + // We receive empty chunk at the end of data, so the partition_end must + // be already at the end of data. + assert(partition_end == end); + return; + } + + // If we got to the end of the block already, but we are going to get more + // input data, wait for it. + if (partition_end == end) + { + return; + } + + // We process one block at a time, but we can process each block many times, + // if it contains multiple partitions. The `partition_end` is a + // past-the-end pointer, so it must be already in the "next" block we haven't + // processed yet. This is also the last block we have. + // The exception to this rule is end of data, for which we checked above. + assert(end.block == partition_end.block + 1); + + // Try to advance the partition end pointer. + const size_t partition_by_columns = partition_column_indices.size(); + if (partition_by_columns == 0) + { + // No PARTITION BY. All input is one partition, which will end when the + // input ends. + partition_end = end; + return; + } + + // Check for partition end. + // The partition ends when the PARTITION BY columns change. We need + // some reference columns for comparison. We might have already + // dropped the blocks where the partition starts, but any other row in the + // partition will do. We can't use frame_start or frame_end or current_row (the next row + // for which we are calculating the window functions), because they all might be + // past the end of the partition. prev_frame_start is suitable, because it + // is a pointer to the first row of the previous frame that must have been + // valid, or to the first row of the partition, and we make sure not to drop + // its block. + assert(partition_start <= prev_frame_start); + // The frame start should be inside the prospective partition, except the + // case when it still has no rows. + assert(prev_frame_start < partition_end || partition_start == partition_end); + assert(first_block_number <= prev_frame_start.block); + const auto block_rows = blockRowsNumber(partition_end); + + // if the last partition row of block is same as prev, there should be no partition end in this block + if (isDifferentFromPrevPartition(block_rows - 1)) + { + partition_end.row = getPartitionEndRow(block_rows); + partition_ended = true; + return; + } + + // go to the next. + ++partition_end.block; + partition_end.row = 0; + + // Went until the end of data and didn't find the new partition. + assert(!partition_ended && partition_end == blocksEnd()); +} +Int64 WindowBlockInputStream::getPartitionEndRow(size_t block_rows) +{ + Int64 left = partition_end.row; + Int64 right = block_rows - 1; + + while (left <= right) + { + Int64 middle = left + (right - left) / 2; + if (isDifferentFromPrevPartition(middle)) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + return left; +} + +void WindowBlockInputStream::advanceFrameStart() +{ + if (frame_started) + { + return; + } + + if (only_have_pure_window) + { + frame_start = current_row; + frame_started = true; + return; + } + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "window function only support pure window function now."); +} + +bool WindowBlockInputStream::arePeers(const RowNumber & x, const RowNumber & y) const +{ + if (x == y) + { + // For convenience, a row is always its own peer. + return true; + } + + switch (window_description.frame.type) + { + case WindowFrame::FrameType::Rows: + // For ROWS frame, row is only peers with itself (checked above); + return false; + case WindowFrame::FrameType::Ranges: + { + // For RANGE frames, rows that compare equal w/ORDER BY are peers. + const size_t n = order_column_indices.size(); + if (n == 0) + { + // No ORDER BY, so all rows are peers. + return true; + } + + for (size_t i = 0; i < n; ++i) + { + const auto * column_x = inputAt(x)[order_column_indices[i]].get(); + const auto * column_y = inputAt(y)[order_column_indices[i]].get(); + if (window_description.order_by[i].collator) + { + if (column_x->compareAtWithCollation(x.row, y.row, *column_y, 1 /* nan_direction_hint */, *window_description.order_by[i].collator) != 0) + { + return false; + } + } + else + { + if (column_x->compareAt(x.row, y.row, *column_y, 1 /* nan_direction_hint */) != 0) + { + return false; + } + } + } + return true; + } + case WindowFrame::FrameType::Groups: + default: + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "window function only support frame type row and range."); + } +} + +void WindowBlockInputStream::advanceFrameEndCurrentRow() +{ + assert(frame_end.block == partition_end.block + || frame_end.block + 1 == partition_end.block); + + // If window only have row_number or rank/dense_rank functions, set frame_end to the next row of current_row and frame_ended to true + if (only_have_pure_window) + { + frame_end = current_row; + advanceRowNumber(frame_end); + frame_ended = true; + return; + } + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "window function only support pure window function now."); +} + +void WindowBlockInputStream::advanceFrameEnd() +{ + // frame_end must be greater or equal than frame_start, so if the + // frame_start is already past the current frame_end, we can start + // from it to save us some work. + if (frame_end < frame_start) + { + frame_end = frame_start; + } + + // No reason for this function to be called again after it succeeded. + assert(!frame_ended); + + // switch for another frame type + switch (window_description.frame.end_type) + { + case WindowFrame::BoundaryType::Current: + advanceFrameEndCurrentRow(); + break; + case WindowFrame::BoundaryType::Unbounded: + case WindowFrame::BoundaryType::Offset: + default: + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "The frame end type '{}' is not implemented", + window_description.frame.end_type); + } +} + +void WindowBlockInputStream::writeOutCurrentRow() +{ + assert(current_row < partition_end); + assert(current_row.block >= first_block_number); + + for (size_t wi = 0; wi < workspaces.size(); ++wi) + { + auto & ws = workspaces[wi]; + ws.window_function->windowInsertResultInto(this->shared_from_this(), wi); + } +} + +Block WindowBlockInputStream::tryGetOutputBlock() +{ + if (isCancelledOrThrowIfKilled()) + { + if (!window_blocks.empty()) + window_blocks.erase(window_blocks.begin(), window_blocks.end()); + input_is_finished = true; + return {}; + } + + assert(first_not_ready_row.block >= first_block_number); + // The first_not_ready_row might be past-the-end if we have already + // calculated the window functions for all input rows. That's why the + // equality is also valid here. + assert(first_not_ready_row.block <= first_block_number + window_blocks.size()); + assert(next_output_block_number >= first_block_number); + + if (next_output_block_number < first_not_ready_row.block) + { + const auto i = next_output_block_number - first_block_number; + auto & block = window_blocks[i]; + auto columns = block.input_columns; + for (auto & res : block.output_columns) + { + columns.push_back(ColumnPtr(std::move(res))); + } + ++next_output_block_number; + + auto output_block = output_header.cloneWithColumns(std::move(columns)); + releaseAlreadyOutputWindowBlock(); + return output_block; + } + return {}; +} + +bool WindowBlockInputStream::onlyHaveRowNumber() +{ + for (const auto & workspace : workspaces) + { + if (workspace.window_function->getName() != "row_number") + return false; + } + return true; +} + +bool WindowBlockInputStream::onlyHaveRowNumberAndRank() +{ + for (const auto & workspace : workspaces) + { + if (workspace.window_function->getName() != "row_number" && workspace.window_function->getName() != "rank" && workspace.window_function->getName() != "dense_rank") + return false; + } + return true; +} + +void WindowBlockInputStream::releaseAlreadyOutputWindowBlock() +{ + // We don't really have to keep the entire partition, and it can be big, so + // we want to drop the starting blocks to save memory. We can drop the old + // blocks if we already returned them as output, and the frame and the + // current row are already past them. We also need to keep the previous + // frame start because we use it as the partition standard. It is always less + // than the current frame start, so we don't have to check the latter. Note + // that the frame start can be further than current row for some frame specs + // (e.g. EXCLUDE CURRENT ROW), so we have to check both. + assert(prev_frame_start <= frame_start); + const auto first_used_block = std::min(std::min(next_output_block_number, peer_group_last.block), + std::min(prev_frame_start.block, current_row.block)); + + + if (first_block_number < first_used_block) + { + window_blocks.erase(window_blocks.begin(), + window_blocks.begin() + (first_used_block - first_block_number)); + first_block_number = first_used_block; + + assert(next_output_block_number >= first_block_number); + assert(frame_start.block >= first_block_number); + assert(prev_frame_start.block >= first_block_number); + assert(current_row.block >= first_block_number); + } +} + +void WindowBlockInputStream::appendBlock(Block & current_block) +{ + assert(!input_is_finished); + assert(current_block); + + if (current_block.rows() == 0) + { + return; + } + + window_blocks.push_back({}); + auto & window_block = window_blocks.back(); + window_block.rows = current_block.rows(); + + // Initialize output columns and add new columns to output block. + for (auto & ws : workspaces) + { + MutableColumnPtr res = ws.window_function->getReturnType()->createColumn(); + res->reserve(window_block.rows); + window_block.output_columns.push_back(std::move(res)); + } + + window_block.input_columns = current_block.getColumns(); +} + +void WindowBlockInputStream::tryCalculate() +{ + // Start the calculations. First, advance the partition end. + for (;;) + { + advancePartitionEnd(); + + // Either we ran out of data or we found the end of partition (maybe + // both, but this only happens at the total end of data). + assert(partition_ended || partition_end == blocksEnd()); + if (partition_ended && partition_end == blocksEnd()) + { + assert(input_is_finished); + } + + + while (current_row < partition_end) + { + // if window only have row_number function, we can ignore judging peers + if (!only_have_row_number) + { + // peer_group_last save the row before current_row + if (!arePeers(peer_group_last, current_row)) + { + peer_group_start_row_number = current_row_number; + ++peer_group_number; + } + } + peer_group_last = current_row; + + // Advance the frame start. + advanceFrameStart(); + + if (!frame_started) + { + // Wait for more input data to find the start of frame. + assert(!input_is_finished); + assert(!partition_ended); + return; + } + + // Advance the frame end. + advanceFrameEnd(); + + if (!frame_ended) + { + // Wait for more input data to find the end of frame. + assert(!input_is_finished); + assert(!partition_ended); + return; + } + + // The frame can be empty sometimes, e.g. the boundaries coincide + // or the start is after the partition end. But hopefully start is + // not after end. + assert(frame_started); + assert(frame_ended); + assert(frame_start <= frame_end); + + // Write out the results. + writeOutCurrentRow(); + + prev_frame_start = frame_start; + + // Move to the next row. The frame will have to be recalculated. + // The peer group start is updated at the beginning of the loop, + // because current_row might now be past-the-end. + advanceRowNumber(current_row); + ++current_row_number; + first_not_ready_row = current_row; + frame_ended = false; + frame_started = false; + } + + if (input_is_finished) + { + // We finalized the last partition in the above loop, and don't have + // to do anything else. + assert(current_row == blocksEnd()); + return; + } + + if (!partition_ended) + { + // Wait for more input data to find the end of partition. + // Assert that we processed all the data we currently have, and that + // we are going to receive more data. + assert(partition_end == blocksEnd()); + assert(!input_is_finished); + break; + } + + // Start the next partition. + partition_start = partition_end; + advanceRowNumber(partition_end); + partition_ended = false; + // We have to reset the frame and other pointers when the new partition + // starts. + frame_start = partition_start; + frame_end = partition_start; + prev_frame_start = partition_start; + assert(current_row == partition_start); + current_row_number = 1; + peer_group_last = partition_start; + peer_group_start_row_number = 1; + peer_group_number = 1; + } +} +} // namespace DB diff --git a/dbms/src/DataStreams/WindowBlockInputStream.h b/dbms/src/DataStreams/WindowBlockInputStream.h new file mode 100644 index 00000000000..46b18dec1ee --- /dev/null +++ b/dbms/src/DataStreams/WindowBlockInputStream.h @@ -0,0 +1,252 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include +#include + +namespace DB +{ +// Runtime data for computing one window function. +struct WindowFunctionWorkspace +{ + // TODO add aggregation function + WindowFunctionPtr window_function = nullptr; +}; + +struct WindowBlock +{ + Columns input_columns; + MutableColumns output_columns; + + size_t rows = 0; +}; + +struct RowNumber +{ + UInt64 block = 0; + UInt64 row = 0; + + bool operator<(const RowNumber & other) const + { + return block < other.block + || (block == other.block && row < other.row); + } + + bool operator==(const RowNumber & other) const + { + return block == other.block && row == other.row; + } + + bool operator<=(const RowNumber & other) const + { + return *this < other || *this == other; + } +}; + +class WindowBlockInputStream : public IProfilingBlockInputStream + , public std::enable_shared_from_this +{ + static constexpr auto NAME = "Window"; + +public: + WindowBlockInputStream(const BlockInputStreamPtr & input, const WindowDescription & window_description_, const String & req_id); + + Block getHeader() const override { return output_header; }; + + String getName() const override { return NAME; } + + /* Implementation details.*/ + void advancePartitionEnd(); + bool isDifferentFromPrevPartition(UInt64 current_partition_row); + + bool arePeers(const RowNumber & x, const RowNumber & y) const; + + void advanceFrameStart(); + void advanceFrameEndCurrentRow(); + void advanceFrameEnd(); + + void writeOutCurrentRow(); + + Block tryGetOutputBlock(); + void releaseAlreadyOutputWindowBlock(); + + void initialWorkspaces(); + void initialPartitionAndOrderColumnIndices(); + + Columns & inputAt(const RowNumber & x) + { + assert(x.block >= first_block_number); + assert(x.block - first_block_number < window_blocks.size()); + return window_blocks[x.block - first_block_number].input_columns; + } + + const Columns & inputAt(const RowNumber & x) const + { + return const_cast(this)->inputAt(x); + } + + auto & blockAt(const UInt64 block_number) + { + assert(block_number >= first_block_number); + assert(block_number - first_block_number < window_blocks.size()); + return window_blocks[block_number - first_block_number]; + } + + const auto & blockAt(const UInt64 block_number) const + { + return const_cast(this)->blockAt(block_number); + } + + auto & blockAt(const RowNumber & x) + { + return blockAt(x.block); + } + + const auto & blockAt(const RowNumber & x) const + { + return const_cast(this)->blockAt(x); + } + + size_t blockRowsNumber(const RowNumber & x) const + { + return blockAt(x).rows; + } + + MutableColumns & outputAt(const RowNumber & x) + { + assert(x.block >= first_block_number); + assert(x.block - first_block_number < window_blocks.size()); + return window_blocks[x.block - first_block_number].output_columns; + } + + void advanceRowNumber(RowNumber & x) const + { + assert(x.block >= first_block_number); + assert(x.block - first_block_number < window_blocks.size()); + + const auto block_rows = blockAt(x).rows; + assert(x.row < block_rows); + + ++x.row; + if (x.row < block_rows) + { + return; + } + + x.row = 0; + ++x.block; + } + + RowNumber blocksEnd() const + { + return RowNumber{first_block_number + window_blocks.size(), 0}; + } + + void appendBlock(Block & current_block); + + void tryCalculate(); + + bool onlyHaveRowNumber(); + + bool onlyHaveRowNumberAndRank(); + +protected: + Block readImpl() override; + + LoggerPtr log; + +public: + bool input_is_finished = false; + + Block output_header; + + WindowDescription window_description; + + // Indices of the PARTITION BY columns in block. + std::vector partition_column_indices; + // Indices of the ORDER BY columns in block; + std::vector order_column_indices; + + // Per-window-function scratch spaces. + std::vector workspaces; + + std::unique_ptr arena; + + // A sliding window of blocks we currently need. We add the input blocks as + // they arrive, and discard the blocks we don't need anymore. The blocks + // have an always-incrementing index. The index of the first block is in + // `first_block_number`. + std::deque window_blocks; + UInt64 first_block_number = 0; + // The next block we are going to pass to the consumer. + UInt64 next_output_block_number = 0; + // The first row for which we still haven't calculated the window functions. + // Used to determine which resulting blocks we can pass to the consumer. + RowNumber first_not_ready_row; + + // Boundaries of the current partition. + // partition_start doesn't point to a valid block, because we want to drop + // the blocks early to save memory. We still have to track it so that we can + // cut off a PRECEDING frame at the partition start. + // The `partition_end` is past-the-end, as usual. When + // partition_ended = false, it still haven't ended, and partition_end is the + // next row to check. + RowNumber partition_start; + RowNumber partition_end; + bool partition_ended = false; + + // The row for which we are now computing the window functions. + RowNumber current_row; + // The start of current peer group, needed for CURRENT ROW frame start. + // For ROWS frame, always equal to the current row, and for RANGE and GROUP + // frames may be earlier. + RowNumber peer_group_last; + + // Row and group numbers in partition for calculating rank() and friends. + UInt64 current_row_number = 1; + UInt64 peer_group_start_row_number = 1; + UInt64 peer_group_number = 1; + + // The frame is [frame_start, frame_end) if frame_ended && frame_started, + // and unknown otherwise. Note that when we move to the next row, both the + // frame_start and the frame_end may jump forward by an unknown amount of + // blocks, e.g. if we use a RANGE frame. This means that sometimes we don't + // know neither frame_end nor frame_start. + // We update the states of the window functions after we find the final frame + // boundaries. + // After we have found the final boundaries of the frame, we can immediately + // output the result for the current row, w/o waiting for more data. + RowNumber frame_start; + RowNumber frame_end; + bool frame_ended = false; + bool frame_started = false; + + // The previous frame boundaries that correspond to the current state of the + // aggregate function. We use them to determine how to update the aggregation + // state after we find the new frame. + RowNumber prev_frame_start; + + //TODO: used as template parameters + bool only_have_row_number = false; + bool only_have_pure_window = false; + Int64 getPartitionEndRow(size_t block_rows); +}; + +} // namespace DB diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index d4e50a0c32b..54839e60dc6 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -294,4 +294,6 @@ ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, ti ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema); +//TODO: add compileWindow + } // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 95e3655d9d7..cffae76cb81 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -34,12 +33,12 @@ #include #include #include +#include namespace DB { namespace ErrorCodes { -extern const int COP_BAD_DAG_REQUEST; extern const int UNSUPPORTED_METHOD; } // namespace ErrorCodes @@ -147,7 +146,7 @@ ExpressionActionsChain::Step & DAGExpressionAnalyzer::initAndGetLastStep(Express return chain.getLastStep(); } -void DAGExpressionAnalyzer::fillAggArgumentDetail( +void DAGExpressionAnalyzer::fillArgumentDetail( const ExpressionActionsPtr & actions, const tipb::Expr & arg, Names & arg_names, @@ -183,7 +182,7 @@ void DAGExpressionAnalyzer::buildGroupConcat( { /// only one arg Names arg_names; - fillAggArgumentDetail(actions, expr.children(0), arg_names, types, arg_collators); + fillArgumentDetail(actions, expr.children(0), arg_names, types, arg_collators); arg_name = arg_names.back(); all_columns_names_and_types.emplace_back(arg_name, types[0]); } @@ -284,7 +283,7 @@ void DAGExpressionAnalyzer::buildCommonAggFunc( TiDB::TiDBCollators arg_collators; for (Int32 i = 0; i < child_size; ++i) { - fillAggArgumentDetail(actions, expr.children(i), arg_names, arg_types, arg_collators); + fillArgumentDetail(actions, expr.children(i), arg_names, arg_types, arg_collators); } appendAggDescription(arg_names, arg_types, arg_collators, agg_func_name, aggregate_descriptions, aggregated_columns, empty_input_as_null); @@ -409,6 +408,137 @@ std::tuple & by_items) const +{ + NamesAndTypes by_item_columns; + by_item_columns.reserve(by_items.size()); + + for (const tipb::ByItem & by_item : by_items) + { + if (!isColumnExpr(by_item.expr())) + { + throw TiFlashException("must be column expr.", Errors::Coprocessor::BadRequest); + } + by_item_columns.emplace_back(getColumnNameAndTypeForColumnExpr(by_item.expr(), getCurrentInputColumns())); + } + + return getSortDescription(by_item_columns, by_items); +} + + +void DAGExpressionAnalyzer::appendSourceColumnsToRequireOutput(ExpressionActionsChain::Step & step) const +{ + for (const auto & col : getCurrentInputColumns()) + { + step.required_output.push_back(col.name); + } +} + +// This function will add new window function culumns to source_column +std::tuple DAGExpressionAnalyzer::appendWindowColumns(const tipb::Window & window, ExpressionActionsChain::Step & step) +{ + WindowDescription window_description; + NamesAndTypes window_columns; + + if (window.func_desc_size() == 0) + { + //should not reach here + throw TiFlashException("window executor without agg/window expression", Errors::Coprocessor::BadRequest); + } + + if (!isWindowFunctionsValid(window)) + { + throw TiFlashException("can not have window and agg functions together in one window.", Errors::Coprocessor::BadRequest); + } + + for (const tipb::Expr & expr : window.func_desc()) + { + if (isAggFunctionExpr(expr)) + { + throw TiFlashException("Unsupported agg function in window.", Errors::Coprocessor::BadRequest); + } + else if (isWindowFunctionExpr(expr)) + { + WindowFunctionDescription window_function_description; + String window_func_name = getWindowFunctionName(expr); + auto child_size = expr.children_size(); + + Names arg_names; + DataTypes arg_types; + TiDB::TiDBCollators arg_collators; + for (Int32 i = 0; i < child_size; ++i) + { + fillArgumentDetail(step.actions, expr.children(i), arg_names, arg_types, arg_collators); + } + + window_function_description.argument_names.resize(child_size); + window_function_description.argument_names = arg_names; + step.required_output.insert(step.required_output.end(), arg_names.begin(), arg_names.end()); + + String func_string = genFuncString(window_func_name, window_function_description.argument_names, arg_collators); + window_function_description.column_name = func_string; + window_function_description.window_function = WindowFunctionFactory::instance().get(window_func_name, arg_types); + DataTypePtr result_type = window_function_description.window_function->getReturnType(); + window_description.window_functions_descriptions.push_back(window_function_description); + window_columns.emplace_back(func_string, result_type); + source_columns.emplace_back(func_string, result_type); + } + else + { + throw TiFlashException("unknow function expr.", Errors::Coprocessor::BadRequest); + } + } + + return {window_description, window_columns}; +} + +WindowDescription DAGExpressionAnalyzer::buildWindowDescription(const tipb::Window & window) +{ + ExpressionActionsChain chain; + ExpressionActionsChain::Step & step = initAndGetLastStep(chain); + appendSourceColumnsToRequireOutput(step); + size_t source_size = getCurrentInputColumns().size(); + + auto [window_description, window_columns] = appendWindowColumns(window, step); + + window_description.add_columns = window_columns; + + if (window.has_frame()) + { + window_description.setWindowFrame(window.frame()); + } + + window_description.before_window = chain.getLastActions(); + window_description.partition_by = getWindowSortDescription(window.partition_by()); + window_description.order_by = getWindowSortDescription(window.order_by()); + chain.finalize(); + chain.clear(); + + + auto & after_window_step = initAndGetLastStep(chain); + appendCastAfterWindow(after_window_step.actions, window, source_size); + window_description.after_window_columns = getCurrentInputColumns(); + appendSourceColumnsToRequireOutput(after_window_step); + window_description.after_window = chain.getLastActions(); + chain.finalize(); + chain.clear(); + + return window_description; +} + String DAGExpressionAnalyzer::applyFunction( const String & func_name, const Names & arg_names, @@ -517,6 +647,26 @@ String DAGExpressionAnalyzer::convertToUInt8(const ExpressionActionsPtr & action throw TiFlashException(fmt::format("Filter on {} is not supported.", org_type->getName()), Errors::Coprocessor::Unimplemented); } +NamesAndTypes DAGExpressionAnalyzer::buildWindowOrderColumns(const tipb::Sort & window_sort) const +{ + if (window_sort.byitems_size() == 0) + { + throw TiFlashException("window executor without order by exprs", Errors::Coprocessor::BadRequest); + } + NamesAndTypes order_columns; + order_columns.reserve(window_sort.byitems_size()); + + for (const tipb::ByItem & order_by : window_sort.byitems()) + { + if (!isColumnExpr(order_by.expr())) + { + throw TiFlashException("must be column expr.", Errors::Coprocessor::BadRequest); + } + order_columns.emplace_back(getColumnNameAndTypeForColumnExpr(order_by.expr(), getCurrentInputColumns())); + } + return order_columns; +} + NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns( const ExpressionActionsPtr & actions, const ::google::protobuf::RepeatedPtrField & order_by) @@ -769,6 +919,45 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( return ret; } +void DAGExpressionAnalyzer::appendCastAfterWindow( + const ExpressionActionsPtr & actions, + const tipb::Window & window, + size_t window_columns_start_index) +{ + bool need_update_source_columns = false; + NamesAndTypes updated_window_columns; + + auto update_cast_column = [&](const tipb::Expr & expr, const NameAndTypePair & origin_column) { + String updated_name = appendCastIfNeeded(expr, actions, origin_column.name); + if (origin_column.name != updated_name) + { + DataTypePtr type = actions->getSampleBlock().getByName(updated_name).type; + updated_window_columns.emplace_back(updated_name, type); + need_update_source_columns = true; + } + else + { + updated_window_columns.emplace_back(origin_column.name, origin_column.type); + } + }; + + for (size_t i = 0; i < window_columns_start_index; ++i) + { + updated_window_columns.emplace_back(source_columns[i]); + } + + assert(window.func_desc_size() + window_columns_start_index == source_columns.size()); + for (Int32 i = 0; i < window.func_desc_size(); ++i) + { + update_cast_column(window.func_desc(i), source_columns[window_columns_start_index + i]); + } + + if (need_update_source_columns) + { + std::swap(source_columns, updated_window_columns); + } +} + void DAGExpressionAnalyzer::appendCastAfterAgg( const ExpressionActionsPtr & actions, const tipb::Aggregation & aggregation) @@ -1134,7 +1323,7 @@ String DAGExpressionAnalyzer::buildTupleFunctionForGroupConcat( collators.push_back(nullptr); } - std::vector order_columns; + NamesAndTypes order_columns; for (auto i = 0; i < expr.order_by_size(); ++i) { String name = getActions(expr.order_by(i).expr(), actions); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index eec05d1427f..f565e7a6348 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -27,6 +27,7 @@ #include #include #include +#include #include namespace DB @@ -63,6 +64,8 @@ class DAGExpressionAnalyzer : private boost::noncopyable ExpressionActionsChain & chain, const std::vector & conditions); + NamesAndTypes buildWindowOrderColumns(const tipb::Sort & window_sort) const; + std::vector appendOrderBy( ExpressionActionsChain & chain, const tipb::TopN & topN); @@ -74,6 +77,13 @@ class DAGExpressionAnalyzer : private boost::noncopyable const tipb::Aggregation & agg, bool group_by_collation_sensitive); + std::tuple appendWindowColumns(const tipb::Window & window, ExpressionActionsChain::Step & step); + + WindowDescription buildWindowDescription(const tipb::Window & window); + + SortDescription getWindowSortDescription( + const ::google::protobuf::RepeatedPtrField & by_items) const; + void initChain( ExpressionActionsChain & chain, const std::vector & columns) const; @@ -136,7 +146,16 @@ class DAGExpressionAnalyzer : private boost::noncopyable const google::protobuf::RepeatedPtrField & filters, String & filter_column_name); + void appendSourceColumnsToRequireOutput(ExpressionActionsChain::Step & step) const; + + void appendCastAfterWindow( + const ExpressionActionsPtr & actions, + const tipb::Window & window, + const size_t window_columns_start_index); + +#ifndef DBMS_PUBLIC_GTEST private: +#endif NamesAndTypes buildOrderColumns( const ExpressionActionsPtr & actions, const ::google::protobuf::RepeatedPtrField & order_by); @@ -184,7 +203,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable bool group_by_collation_sensitive, TiDB::TiDBCollators & collators); - void fillAggArgumentDetail( + void fillArgumentDetail( const ExpressionActionsPtr & actions, const tipb::Expr & arg, Names & arg_names, diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp index 512d473686d..2b5e8ede60a 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp @@ -37,12 +37,16 @@ bool isSourceNode(const tipb::Executor * root) { return root->tp() == tipb::ExecType::TypeJoin || root->tp() == tipb::ExecType::TypeTableScan || root->tp() == tipb::ExecType::TypeExchangeReceiver || root->tp() == tipb::ExecType::TypeProjection - || root->tp() == tipb::ExecType::TypePartitionTableScan; + || root->tp() == tipb::ExecType::TypePartitionTableScan + || root->tp() == tipb::ExecType::TypeWindow + || (root->tp() == tipb::ExecType::TypeSort && root->sort().ispartialsort()); } const static String SOURCE_NAME("source"); const static String SEL_NAME("selection"); const static String AGG_NAME("aggregation"); +const static String WINDOW_NAME("window"); +const static String WINDOW_SORT_NAME("window_sort"); const static String HAVING_NAME("having"); const static String TOPN_NAME("topN"); const static String LIMIT_NAME("limit"); @@ -151,6 +155,16 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator { GET_METRIC(tiflash_coprocessor_executor_count, type_partition_ts).Increment(); } + else if (current->tp() == tipb::ExecType::TypeWindow) + { + children.push_back(std::make_shared(source->window().child(), id_generator)); + GET_METRIC(tiflash_coprocessor_executor_count, type_window).Increment(); + } + else if (current->tp() == tipb::ExecType::TypeSort && current->sort().ispartialsort()) + { + children.push_back(std::make_shared(source->sort().child(), id_generator)); + GET_METRIC(tiflash_coprocessor_executor_count, type_window_sort).Increment(); + } } /// construct DAGQueryBlock from a list struct based executors, which is the @@ -216,5 +230,4 @@ DAGQueryBlock::DAGQueryBlock(UInt32 id_, const ::google::protobuf::RepeatedPtrFi } } } - } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h index 486345efa03..5a38282c02c 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h @@ -67,6 +67,7 @@ class DAGQueryBlock const tipb::Executor * root; String qb_column_prefix; std::vector> children; + bool can_restore_pipeline_concurrency = true; bool isRootQueryBlock() const { return id == 1; }; bool isTableScanSource() const { return source->tp() == tipb::ExecType::TypeTableScan || source->tp() == tipb::ExecType::TypePartitionTableScan; } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index b4832ff4f17..cb609f6887e 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -43,6 +45,7 @@ #include #include #include +#include namespace DB { @@ -76,7 +79,7 @@ struct AnalysisResult String filter_column_name; String having_column_name; - std::vector order_columns; + NamesAndTypes order_columns; Names aggregation_keys; TiDB::TiDBCollators aggregation_collators; @@ -755,6 +758,18 @@ void DAGQueryBlockInterpreter::executeWhere(DAGPipeline & pipeline, const Expres pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr, filter_column, log->identifier()); }); } +void DAGQueryBlockInterpreter::executeWindow( + DAGPipeline & pipeline, + WindowDescription & window_description) +{ + executeExpression(pipeline, window_description.before_window); + + /// If there are several streams, we merge them into one + executeUnion(pipeline, max_streams, log); + assert(pipeline.streams.size() == 1); + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), window_description, log->identifier()); +} + void DAGQueryBlockInterpreter::executeAggregation( DAGPipeline & pipeline, const ExpressionActionsPtr & expression_actions_ptr, @@ -852,11 +867,20 @@ void DAGQueryBlockInterpreter::executeExpression(DAGPipeline & pipeline, const E } } +void DAGQueryBlockInterpreter::executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc) +{ + orderStreams(pipeline, sort_desc, 0); +} + void DAGQueryBlockInterpreter::executeOrder(DAGPipeline & pipeline, const std::vector & order_columns) { - SortDescription order_descr = getSortDescription(order_columns, query_block.limit_or_topn->topn().order_by()); - const Settings & settings = context.getSettingsRef(); Int64 limit = query_block.limit_or_topn->topn().limit(); + orderStreams(pipeline, getSortDescription(order_columns, query_block.limit_or_topn->topn().order_by()), limit); +} + +void DAGQueryBlockInterpreter::orderStreams(DAGPipeline & pipeline, SortDescription order_descr, Int64 limit) +{ + const Settings & settings = context.getSettingsRef(); pipeline.transform([&](auto & stream) { auto sorting_stream = std::make_shared(stream, order_descr, log->identifier(), limit); @@ -1004,6 +1028,35 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti analyzer = std::make_unique(std::move(output_columns), context); } +void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb::Window & window) +{ + NamesAndTypes input_columns; + assert(input_streams_vec.size() == 1); + pipeline.streams = input_streams_vec.back(); + for (auto const & p : pipeline.firstStream()->getHeader()) + input_columns.emplace_back(p.name, p.type); + DAGExpressionAnalyzer dag_analyzer(input_columns, context); + WindowDescription window_description = dag_analyzer.buildWindowDescription(window); + executeWindow(pipeline, window_description); + executeExpression(pipeline, window_description.after_window); + + analyzer = std::make_unique(window_description.after_window_columns, context); +} + +void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort) +{ + NamesAndTypes input_columns; + assert(input_streams_vec.size() == 1); + pipeline.streams = input_streams_vec.back(); + for (auto const & p : pipeline.firstStream()->getHeader()) + input_columns.emplace_back(p.name, p.type); + DAGExpressionAnalyzer dag_analyzer(input_columns, context); + auto order_columns = dag_analyzer.buildWindowOrderColumns(window_sort); + executeWindowOrder(pipeline, getSortDescription(order_columns, window_sort.byitems())); + + analyzer = std::make_unique(std::move(input_columns), context); +} + // To execute a query block, you have to: // 1. generate the date stream and push it to pipeline. // 2. assign the analyzer @@ -1039,6 +1092,17 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) handleTableScan(table_scan, pipeline); dagContext().table_scan_executor_id = query_block.source_name; } + else if (query_block.source->tp() == tipb::ExecType::TypeWindow) + { + handleWindow(pipeline, query_block.source->window()); + recordProfileStreams(pipeline, query_block.source_name); + restorePipelineConcurrency(pipeline); + } + else if (query_block.source->tp() == tipb::ExecType::TypeSort) + { + handleWindowOrder(pipeline, query_block.source->sort()); + recordProfileStreams(pipeline, query_block.source_name); + } else { throw TiFlashException( @@ -1186,7 +1250,8 @@ void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline) void DAGQueryBlockInterpreter::restorePipelineConcurrency(DAGPipeline & pipeline) { - restoreConcurrency(pipeline, dagContext().final_concurrency, log); + if (query_block.can_restore_pipeline_concurrency) + restoreConcurrency(pipeline, dagContext().final_concurrency, log); } BlockInputStreams DAGQueryBlockInterpreter::execute() diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index b681d22188c..1f19e5d5569 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -53,7 +53,9 @@ class DAGQueryBlockInterpreter BlockInputStreams execute(); +#ifndef DBMS_PUBLIC_GTEST private: +#endif void executeImpl(DAGPipeline & pipeline); void handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); void executeCastAfterTableScan( @@ -74,6 +76,8 @@ class DAGQueryBlockInterpreter String & filter_column_name); void handleExchangeReceiver(DAGPipeline & pipeline); void handleProjection(DAGPipeline & pipeline, const tipb::Projection & projection); + void handleWindow(DAGPipeline & pipeline, const tipb::Window & window); + void handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort); ExpressionActionsPtr genJoinOtherConditionAction( const tipb::Join & join, std::vector & source_columns, @@ -81,8 +85,13 @@ class DAGQueryBlockInterpreter String & filter_column_for_other_eq_condition); void executeWhere(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column); void executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr); + void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc); + void orderStreams(DAGPipeline & pipeline, SortDescription order_descr, Int64 limit); void executeOrder(DAGPipeline & pipeline, const std::vector & order_columns); void executeLimit(DAGPipeline & pipeline); + void executeWindow( + DAGPipeline & pipeline, + WindowDescription & window_description); void executeAggregation( DAGPipeline & pipeline, const ExpressionActionsPtr & expression_actions_ptr, diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 3dfb189f034..69e76e2a098 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -44,6 +44,12 @@ extern const String uniq_raw_res_name; namespace { +const std::unordered_map window_func_map({ + {tipb::ExprType::Rank, "rank"}, + {tipb::ExprType::DenseRank, "dense_rank"}, + {tipb::ExprType::RowNumber, "row_number"}, +}); + const std::unordered_map agg_func_map({ {tipb::ExprType::Count, "count"}, {tipb::ExprType::Sum, "sum"}, @@ -719,7 +725,7 @@ bool isScalarFunctionExpr(const tipb::Expr & expr) bool isFunctionExpr(const tipb::Expr & expr) { - return isScalarFunctionExpr(expr) || isAggFunctionExpr(expr); + return isScalarFunctionExpr(expr) || isAggFunctionExpr(expr) || isWindowFunctionExpr(expr); } const String & getAggFunctionName(const tipb::Expr & expr) @@ -744,6 +750,19 @@ const String & getAggFunctionName(const tipb::Expr & expr) throw TiFlashException(errmsg, Errors::Coprocessor::Unimplemented); } +const String & getWindowFunctionName(const tipb::Expr & expr) +{ + auto it = window_func_map.find(expr.tp()); + if (it != window_func_map.end()) + return it->second; + + const auto errmsg = fmt::format( + "{} is not supported.", + tipb::ExprType_Name(expr.tp())); + throw TiFlashException(errmsg, Errors::Coprocessor::Unimplemented); +} + + const String & getFunctionName(const tipb::Expr & expr) { if (isAggFunctionExpr(expr)) @@ -898,6 +917,39 @@ bool isAggFunctionExpr(const tipb::Expr & expr) } } +bool isWindowFunctionExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::RowNumber: + case tipb::ExprType::Rank: + case tipb::ExprType::DenseRank: + case tipb::ExprType::Lead: + case tipb::ExprType::Lag: + // case tipb::ExprType::CumeDist: + // case tipb::ExprType::PercentRank: + // case tipb::ExprType::Ntile: + // case tipb::ExprType::FirstValue: + // case tipb::ExprType::LastValue: + // case tipb::ExprType::NthValue: + return true; + default: + return false; + } +} + +bool isWindowLagOrLeadFunctionExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::Lead: + case tipb::ExprType::Lag: + return true; + default: + return false; + } +} + bool isLiteralExpr(const tipb::Expr & expr) { switch (expr.tp()) @@ -985,6 +1037,16 @@ String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col) +{ + auto column_index = decodeDAGInt64(expr.val()); + if (column_index < 0 || column_index >= static_cast(input_col.size())) + { + throw TiFlashException("Column index out of bound", Errors::Coprocessor::BadRequest); + } + return input_col[column_index]; +} + // For some historical or unknown reasons, TiDB might set an invalid // field type. This function checks if the expr has a valid field type. // So far the known invalid field types are: @@ -1181,7 +1243,6 @@ String genFuncString( const Names & argument_names, const TiDB::TiDBCollators & collators) { - assert(!collators.empty()); FmtBuffer buf; buf.fmtAppend("{}({})_collator", func_name, fmt::join(argument_names.begin(), argument_names.end(), ", ")); for (const auto & collator : collators) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 026e0958f89..aa2b90c1bd2 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -35,10 +35,13 @@ Field decodeLiteral(const tipb::Expr & expr); bool isFunctionExpr(const tipb::Expr & expr); bool isScalarFunctionExpr(const tipb::Expr & expr); bool isAggFunctionExpr(const tipb::Expr & expr); +bool isWindowFunctionExpr(const tipb::Expr & expr); const String & getFunctionName(const tipb::Expr & expr); const String & getAggFunctionName(const tipb::Expr & expr); +const String & getWindowFunctionName(const tipb::Expr & expr); bool isColumnExpr(const tipb::Expr & expr); String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col); +NameAndTypePair getColumnNameAndTypeForColumnExpr(const tipb::Expr & expr, const std::vector & input_col); const String & getTypeName(const tipb::Expr & expr); String exprToString(const tipb::Expr & expr, const std::vector & input_col); bool exprHasValidFieldType(const tipb::Expr & expr); diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 4c67d67e4f9..6b118f1dd40 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -36,6 +36,15 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) } } +void setRestorePipelineConcurrency(DAGQueryBlock & query_block) +{ + if (query_block.source->tp() == tipb::ExecType::TypeWindow) + { + assert(query_block.children.size() == 1); + query_block.children.back()->can_restore_pipeline_concurrency = false; + } +} + DAGContext & InterpreterDAG::dagContext() const { return *context.getDAGContext(); @@ -47,6 +56,7 @@ DAGContext & InterpreterDAG::dagContext() const BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) { std::vector input_streams_vec; + setRestorePipelineConcurrency(query_block); for (auto & child : query_block.children) { BlockInputStreams child_streams = executeQueryBlock(*child); diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index 4b6c7530506..b68279faa13 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -144,6 +144,11 @@ bool collectForExecutor(std::vector & output_field_types, const case tipb::ExecType::TypeAggregation: case tipb::ExecType::TypeStreamAgg: return collectForAgg(output_field_types, executor.aggregation()); + case tipb::ExecType::TypeWindow: + // Window will only be pushed down in mpp mode. + // In mpp mode, ExchangeSender or Sender will return output_field_types directly. + // If not in mpp mode, window executor type is invalid. + throw TiFlashException("Window executor type is invalid in non-mpp mode, should not reach here.", Errors::Coprocessor::Internal); case tipb::ExecType::TypeExchangeReceiver: return collectForReceiver(output_field_types, executor.exchange_receiver()); case tipb::ExecType::TypeTableScan: diff --git a/dbms/src/Flash/Statistics/CommonExecutorImpl.h b/dbms/src/Flash/Statistics/CommonExecutorImpl.h index d7e8f8b5ca9..404fd1acbd6 100644 --- a/dbms/src/Flash/Statistics/CommonExecutorImpl.h +++ b/dbms/src/Flash/Statistics/CommonExecutorImpl.h @@ -32,6 +32,32 @@ struct AggImpl }; using AggStatistics = ExecutorStatistics; +struct WindowImpl +{ + static constexpr bool has_extra_info = false; + + static constexpr auto type = "Window"; + + static bool isMatch(const tipb::Executor * executor) + { + return executor->has_window(); + } +}; +using WindowStatistics = ExecutorStatistics; + +struct SortImpl +{ + static constexpr bool has_extra_info = false; + + static constexpr auto type = "Sort"; + + static bool isMatch(const tipb::Executor * executor) + { + return executor->has_sort(); + } +}; +using SortStatistics = ExecutorStatistics; + struct FilterImpl { static constexpr bool has_extra_info = false; diff --git a/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp b/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp index 5f9498cc857..b442104c139 100644 --- a/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp +++ b/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp @@ -61,8 +61,10 @@ void ExecutorStatisticsCollector::initialize(DAGContext * dag_context_) JoinStatistics, LimitStatistics, ProjectStatistics, + SortStatistics, TableScanStatistics, - TopNStatistics>(executor_id, &executor)) + TopNStatistics, + WindowStatistics>(executor_id, &executor)) { throw TiFlashException( fmt::format("Unknown executor type, executor_id: {}", executor_id), diff --git a/dbms/src/Flash/Statistics/traverseExecutors.cpp b/dbms/src/Flash/Statistics/traverseExecutors.cpp index 0aea64a57c1..dd720920dcd 100644 --- a/dbms/src/Flash/Statistics/traverseExecutors.cpp +++ b/dbms/src/Flash/Statistics/traverseExecutors.cpp @@ -33,6 +33,10 @@ Children getChildren(const tipb::Executor & executor) case tipb::ExecType::TypeAggregation: case tipb::ExecType::TypeStreamAgg: return Children{&executor.aggregation().child()}; + case tipb::ExecType::TypeWindow: + return Children{&executor.window().child()}; + case tipb::ExecType::TypeSort: + return Children{&executor.sort().child()}; case tipb::ExecType::TypeTopN: return Children{&executor.topn().child()}; case tipb::ExecType::TypeLimit: diff --git a/dbms/src/Interpreters/WindowDescription.cpp b/dbms/src/Interpreters/WindowDescription.cpp new file mode 100644 index 00000000000..2ab407bb18e --- /dev/null +++ b/dbms/src/Interpreters/WindowDescription.cpp @@ -0,0 +1,63 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +WindowFrame::BoundaryType getBoundaryTypeFromTipb(const tipb::WindowFrameBound & bound) +{ + if (bound.type() == tipb::WindowBoundType::CurrentRow) + return WindowFrame::BoundaryType::Current; + else if (bound.unbounded()) + return WindowFrame::BoundaryType::Unbounded; + else + return WindowFrame::BoundaryType::Offset; +} + +WindowFrame::FrameType getFrameTypeFromTipb(const tipb::WindowFrameType & type) +{ + switch (type) + { + case tipb::WindowFrameType::Ranges: + return WindowFrame::FrameType::Ranges; + case tipb::WindowFrameType::Rows: + return WindowFrame::FrameType::Rows; + case tipb::WindowFrameType::Groups: + return WindowFrame::FrameType::Groups; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unknowed frame type {}", + type); + } +} + +void WindowDescription::setWindowFrame(const tipb::WindowFrame & frame_) +{ + frame.type = getFrameTypeFromTipb(frame_.type()); + frame.begin_offset = frame_.start().offset(); + frame.begin_type = getBoundaryTypeFromTipb(frame_.start()); + frame.begin_preceding = (frame_.start().type() == tipb::WindowBoundType::Preceding); + frame.end_offset = frame_.end().offset(); + frame.end_type = getBoundaryTypeFromTipb(frame_.end()); + frame.end_preceding = (frame_.end().type() == tipb::WindowBoundType::Preceding); + frame.is_default = false; +} +} // namespace DB diff --git a/dbms/src/Interpreters/WindowDescription.h b/dbms/src/Interpreters/WindowDescription.h new file mode 100644 index 00000000000..cdcade1b750 --- /dev/null +++ b/dbms/src/Interpreters/WindowDescription.h @@ -0,0 +1,117 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +struct WindowFunctionDescription +{ + WindowFunctionPtr window_function; + Array parameters; + Names argument_names; + std::string column_name; +}; + +using WindowFunctionDescriptions = std::vector; + +struct WindowFrame +{ + enum class FrameType + { + Rows, + Groups, + Ranges + }; + enum class BoundaryType + { + Unbounded, + Current, + Offset + }; + + // This flag signifies that the frame properties were not set explicitly by + // user, but the fields of this structure still have to contain proper values + // for the default frame of RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW. + bool is_default = true; + + FrameType type = FrameType::Ranges; + + // UNBOUNDED FOLLOWING for the frame end is forbidden by the standard, but for + // uniformity the begin_preceding still has to be set to true for UNBOUNDED + // frame start. + // Offset might be both preceding and following, controlled by begin_preceding, + // but the offset value must be positive. + BoundaryType begin_type = BoundaryType::Unbounded; + Field begin_offset = Field(UInt64(0)); + bool begin_preceding = true; + + // Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding + // must be false. + BoundaryType end_type = BoundaryType::Current; + Field end_offset = Field(UInt64(0)); + bool end_preceding = false; + + bool operator==(const WindowFrame & other) const + { + // We don't compare is_default because it's not a real property of the + // frame, and only influences how we display it. + return other.type == type + && other.begin_type == begin_type + && other.begin_offset == begin_offset + && other.begin_preceding == begin_preceding + && other.end_type == end_type + && other.end_offset == end_offset + && other.end_preceding == end_preceding; + } +}; +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr; +struct WindowDescription +{ + ExpressionActionsPtr before_window; + + ExpressionActionsPtr after_window; + + NamesAndTypes add_columns; + + NamesAndTypes after_window_columns; + + // We don't care about the particular order of keys for PARTITION BY, only + // that they are sorted. For now we always require ASC, but we could be more + // flexible and match any direction, or even different order of columns. + SortDescription partition_by; + + SortDescription order_by; + + WindowFrame frame; + + // The window functions that are calculated for this window. + WindowFunctionDescriptions window_functions_descriptions; + + void setWindowFrame(const tipb::WindowFrame & frame_); +}; + +} // namespace DB diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 928d0fd7c5f..a5375a65099 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -73,7 +73,7 @@ static Field convertNumericTypeImpl(const Field & from) template static Field convertDecimalTypeImpl(const Field & from) { - auto decimal_field = from.safeGet>(); + const auto & decimal_field = from.safeGet>(); // FIXME:: There is some bugs when `to` is int; return Field(typename NearestFieldType::Type(static_cast(decimal_field))); } @@ -128,7 +128,7 @@ static Field convertDecimalToDecimalType(const Field & from, const DataTypeDecim // TODO:: Refine this, Consider overflow!! if constexpr (sizeof(From) <= sizeof(To)) { - auto field = from.get>(); + const auto & field = from.get>(); if (field.getScale() <= type.getScale()) { ScaleType scale = type.getScale() - field.getScale(); @@ -227,13 +227,13 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type) return convertNumericType(src, type); if (typeid_cast(&type)) return convertNumericType(src, type); - if (auto * ptype = typeid_cast(&type)) + if (const auto * ptype = typeid_cast(&type)) return convertDecimalType(src, *ptype); - if (auto * ptype = typeid_cast(&type)) + if (const auto * ptype = typeid_cast(&type)) return convertDecimalType(src, *ptype); - if (auto * ptype = typeid_cast(&type)) + if (const auto * ptype = typeid_cast(&type)) return convertDecimalType(src, *ptype); - if (auto * ptype = typeid_cast(&type)) + if (const auto * ptype = typeid_cast(&type)) return convertDecimalType(src, *ptype); const bool is_date = typeid_cast(&type); @@ -352,5 +352,23 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co return convertFieldToTypeImpl(from_value, to_type); } +Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint) +{ + bool is_null = from_value.isNull(); + if (is_null && !to_type.isNullable()) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert NULL to {}", to_type.getName()); + + Field converted = convertFieldToType(from_value, to_type, from_type_hint); + + if (!is_null && converted.isNull()) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Cannot convert value '{}'{}: it cannot be represented as {}", + from_value.toString(), + from_type_hint ? " from " + from_type_hint->getName() : "", + to_type.getName()); + + return converted; +} + } // namespace DB diff --git a/dbms/src/Interpreters/convertFieldToType.h b/dbms/src/Interpreters/convertFieldToType.h index ec5cf0cf9ca..ac5a519fdaf 100644 --- a/dbms/src/Interpreters/convertFieldToType.h +++ b/dbms/src/Interpreters/convertFieldToType.h @@ -30,4 +30,8 @@ class IDataType; */ Field convertFieldToType(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint = nullptr); +/// Does the same, but throws ARGUMENT_OUT_OF_BOUND if value does not fall into the range. +Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint = nullptr); + + } // namespace DB diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 2a1bb68584c..91c1cf6680b 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -12,60 +12,63 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include -#include +#include #include -#include #include -#include -#include #include -#include +#include +#include #include +#include +#include +#include #include -#include -#include #include +#include +#include +#include #include -#include -#include #include #include #include +#include +#include #include -#include -#include -#include -#include +#include +#include #include -#include -#include #include -#include +#include +#include +#include +#include +#include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "InterruptListener.h" -#include -#include /// http://en.wikipedia.org/wiki/ANSI_escape_code @@ -83,54 +86,66 @@ namespace DB { - namespace ErrorCodes { - extern const int POCO_EXCEPTION; - extern const int STD_EXCEPTION; - extern const int UNKNOWN_EXCEPTION; - extern const int NETWORK_ERROR; - extern const int NO_DATA_TO_INSERT; - extern const int BAD_ARGUMENTS; - extern const int CANNOT_READ_HISTORY; - extern const int CANNOT_APPEND_HISTORY; - extern const int UNKNOWN_PACKET_FROM_SERVER; - extern const int UNEXPECTED_PACKET_FROM_SERVER; - extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED; -} +extern const int POCO_EXCEPTION; +extern const int STD_EXCEPTION; +extern const int UNKNOWN_EXCEPTION; +extern const int NETWORK_ERROR; +extern const int NO_DATA_TO_INSERT; +extern const int BAD_ARGUMENTS; +extern const int CANNOT_READ_HISTORY; +extern const int CANNOT_APPEND_HISTORY; +extern const int UNKNOWN_PACKET_FROM_SERVER; +extern const int UNEXPECTED_PACKET_FROM_SERVER; +extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED; +} // namespace ErrorCodes class Client : public Poco::Util::Application { public: - Client() {} + Client() = default; private: using StringSet = std::unordered_set; - StringSet exit_strings - { - "exit", "quit", "logout", - "учше", "йгше", "дщпщге", - "exit;", "quit;", "logout;", - "учшеж", "йгшеж", "дщпщгеж", - "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй" - }; - bool is_interactive = true; /// Use either readline interface or batch mode. - bool need_render_progress = true; /// Render query execution progress. - bool echo_queries = false; /// Print queries before execution in batch mode. - bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode. - bool stdin_is_not_tty = false; /// stdin is not a terminal. - - winsize terminal_size {}; /// Terminal size is needed to render progress bar. - - std::unique_ptr connection; /// Connection to DB. - String query_id; /// Current query_id. - String query; /// Current query. - - String format; /// Query results output format. - bool is_default_format = true; /// false, if format is set in the config or command line. - size_t format_max_block_size = 0; /// Max block size for console output. - String insert_format; /// Format of INSERT data that is read from stdin in batch mode. + StringSet exit_strings{ + "exit", + "quit", + "logout", + "учше", + "йгше", + "дщпщге", + "exit;", + "quit;", + "logout;", + "учшеж", + "йгшеж", + "дщпщгеж", + "q", + "й", + "\\q", + "\\Q", + "\\й", + "\\Й", + ":q", + "Жй"}; + bool is_interactive = true; /// Use either readline interface or batch mode. + bool need_render_progress = true; /// Render query execution progress. + bool echo_queries = false; /// Print queries before execution in batch mode. + bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode. + bool stdin_is_not_tty = false; /// stdin is not a terminal. + + winsize terminal_size{}; /// Terminal size is needed to render progress bar. + + std::unique_ptr connection; /// Connection to DB. + String query_id; /// Current query_id. + String query; /// Current query. + + String format; /// Query results output format. + bool is_default_format = true; /// false, if format is set in the config or command line. + size_t format_max_block_size = 0; /// Max block size for console output. + String insert_format; /// Format of INSERT data that is read from stdin in batch mode. size_t insert_format_max_block_size = 0; /// Max block size when reading INSERT data. size_t max_client_network_bandwidth = 0; /// The maximum speed of data exchange over the network for the client in bytes per second. @@ -139,10 +154,10 @@ class Client : public Poco::Util::Application Context context = Context::createGlobal(); /// Buffer that reads from stdin in batch mode. - ReadBufferFromFileDescriptor std_in {STDIN_FILENO}; + ReadBufferFromFileDescriptor std_in{STDIN_FILENO}; /// Console output. - WriteBufferFromFileDescriptor std_out {STDOUT_FILENO}; + WriteBufferFromFileDescriptor std_out{STDOUT_FILENO}; std::unique_ptr pager_cmd; /// The user can specify to redirect query output to a file. std::optional out_file_buf; @@ -195,9 +210,9 @@ class Client : public Poco::Util::Application Protocol::Compression compression; ConnectionTimeouts timeouts; - ConnectionParameters() {} + ConnectionParameters() = default; - ConnectionParameters(const Poco::Util::AbstractConfiguration & config) + explicit ConnectionParameters(const Poco::Util::AbstractConfiguration & config) { bool is_secure = config.getBool("secure", false); security = is_secure @@ -206,8 +221,8 @@ class Client : public Poco::Util::Application host = config.getString("host", "localhost"); port = config.getInt("port", - config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", - is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); + config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", + is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); default_database = config.getString("database", ""); user = config.getString("user", ""); @@ -249,12 +264,11 @@ class Client : public Poco::Util::Application context.setApplicationType(Context::ApplicationType::CLIENT); /// settings and limits could be specified in config file, but passed settings has higher priority -#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - if (config().has(#NAME) && !context.getSettingsRef().NAME.changed) \ - context.setSetting(#NAME, config().getString(#NAME)); +#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ + if (config().has(#NAME) && !context.getSettingsRef().NAME.changed) \ + context.setSetting(#NAME, config().getString(#NAME)); APPLY_FOR_SETTINGS(EXTRACT_SETTING) #undef EXTRACT_SETTING - } @@ -278,7 +292,8 @@ class Client : public Poco::Util::Application if (std::string::npos != embedded_stack_trace_pos && !print_stack_trace) text.resize(embedded_stack_trace_pos); - std::cerr << "Code: " << e.code() << ". " << text << std::endl << std::endl; + std::cerr << "Code: " << e.code() << ". " << text << std::endl + << std::endl; /// Don't print the stack trace on the client if it was logged on the server. /// Also don't print the stack trace in case of network errors. @@ -287,7 +302,7 @@ class Client : public Poco::Util::Application && std::string::npos == embedded_stack_trace_pos) { std::cerr << "Stack trace:" << std::endl - << e.getStackTrace().toString(); + << e.getStackTrace().toString(); } /// If exception code isn't zero, we should return non-zero return code anyway. @@ -311,7 +326,7 @@ class Client : public Poco::Util::Application } /// Should we celebrate a bit? - bool isNewYearMode() + static bool isNewYearMode() { time_t current_time = time(nullptr); @@ -329,6 +344,7 @@ class Client : public Poco::Util::Application { registerFunctions(); registerAggregateFunctions(); + registerWindowFunctions(); /// Batch mode is enabled if one of the following is true: /// - -e (--query) command line option is present. @@ -379,16 +395,18 @@ class Client : public Poco::Util::Application catch (...) { std::cerr << "Warning: could not switch to server time zone: " << time_zone - << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl - << "Proceeding with local time zone." - << std::endl << std::endl; + << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl + << "Proceeding with local time zone." + << std::endl + << std::endl; } } else { std::cerr << "Warning: could not determine server time zone. " - << "Proceeding with local time zone." - << std::endl << std::endl; + << "Proceeding with local time zone." + << std::endl + << std::endl; } } @@ -416,8 +434,7 @@ class Client : public Poco::Util::Application } /// Prompt may contain the following substitutions in a form of {name}. - std::map prompt_substitutions - { + std::map prompt_substitutions{ {"host", connection_parameters.host}, {"port", toString(connection_parameters.port)}, {"user", connection_parameters.user}, @@ -425,7 +442,7 @@ class Client : public Poco::Util::Application }; /// Quite suboptimal. - for (const auto & [key, value]: prompt_substitutions) + for (const auto & [key, value] : prompt_substitutions) boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value); if (is_interactive) @@ -454,7 +471,7 @@ class Client : public Poco::Util::Application throwFromErrno("Cannot read history from file " + history_file, ErrorCodes::CANNOT_READ_HISTORY); #endif } - else /// Create history file. + else /// Create history file. Poco::File(history_file).createFile(); } @@ -481,10 +498,10 @@ class Client : public Poco::Util::Application { if (is_interactive) std::cout << "Connecting to " - << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " : "") - << connection_parameters.host << ":" << connection_parameters.port - << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") - << "." << std::endl; + << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " : "") + << connection_parameters.host << ":" << connection_parameters.port + << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") + << "." << std::endl; connection = std::make_unique( connection_parameters.host, @@ -504,7 +521,7 @@ class Client : public Poco::Util::Application if (max_client_network_bandwidth) { - ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); + ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); connection->setThrottler(throttler); } @@ -521,7 +538,8 @@ class Client : public Poco::Util::Application { std::cout << "Connected to " << server_name << " server version " << server_version - << "." << std::endl << std::endl; + << "." << std::endl + << std::endl; } } @@ -530,14 +548,14 @@ class Client : public Poco::Util::Application /// Allows delaying the start of query execution until the entirety of query is inserted. static bool hasDataInSTDIN() { - timeval timeout = { 0, 0 }; - fd_set fds; + timeval timeout = {0, 0}; + fd_set fds; // NOLINT FD_ZERO(&fds); - FD_SET(STDIN_FILENO, &fds); - return select(1, &fds, 0, 0, &timeout) == 1; + FD_SET(STDIN_FILENO, &fds); // NOLINT + return select(1, &fds, nullptr, nullptr, &timeout) == 1; } - inline const String prompt() const + inline String prompt() const { return boost::replace_all_copy(prompt_by_server_display_name, "{database}", config().getString("database", "default")); } @@ -547,10 +565,10 @@ class Client : public Poco::Util::Application String query; String prev_query; - while (char * line_ = readline(query.empty() ? prompt().c_str() : ":-] ")) + while (char * line_read = readline(query.empty() ? prompt().c_str() : ":-] ")) { - String line = line_; - free(line_); + String line = line_read; + free(line_read); size_t ws = line.size(); while (ws > 0 && isWhitespaceASCII(line[ws - 1])) @@ -603,9 +621,9 @@ class Client : public Poco::Util::Application catch (const Exception & e) { std::cerr << std::endl - << "Exception on client:" << std::endl - << "Code: " << e.code() << ". " << e.displayText() << std::endl - << std::endl; + << "Exception on client:" << std::endl + << "Code: " << e.code() << ". " << e.displayText() << std::endl + << std::endl; /// Client-side exception during query execution can result in the loss of /// sync in the connection protocol. @@ -694,7 +712,8 @@ class Client : public Poco::Util::Application } catch (...) { - std::cerr << "Error on processing query: " << query << std::endl << getCurrentExceptionMessage(true); + std::cerr << "Error on processing query: " << query << std::endl + << getCurrentExceptionMessage(true); got_exception = true; } @@ -794,12 +813,13 @@ class Client : public Poco::Util::Application if (is_interactive) { std::cout << std::endl - << processed_rows << " rows in set. Elapsed: " << watch.elapsedSeconds() << " sec. "; + << processed_rows << " rows in set. Elapsed: " << watch.elapsedSeconds() << " sec. "; if (progress.rows >= 1000) writeFinalProgress(); - std::cout << std::endl << std::endl; + std::cout << std::endl + << std::endl; } else if (print_time_to_stderr) { @@ -813,7 +833,7 @@ class Client : public Poco::Util::Application /// Convert external tables to ExternalTableData and send them using the connection. void sendExternalTables() { - auto * select = typeid_cast(&*parsed_query); + const auto * select = typeid_cast(&*parsed_query); if (!select && !external_tables.empty()) throw Exception("External tables could be sent only with select query", ErrorCodes::BAD_ARGUMENTS); @@ -861,7 +881,7 @@ class Client : public Poco::Util::Application } - ASTPtr parseQuery(const char * & pos, const char * end, bool allow_multi_statements) + ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) { ParserQuery parser(end); ASTPtr res; @@ -875,7 +895,9 @@ class Client : public Poco::Util::Application if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; + std::cerr << std::endl + << message << std::endl + << std::endl; return nullptr; } } @@ -886,7 +908,8 @@ class Client : public Poco::Util::Application { std::cout << std::endl; formatAST(*res, std::cout); - std::cout << std::endl << std::endl; + std::cout << std::endl + << std::endl; } return res; @@ -926,7 +949,10 @@ class Client : public Poco::Util::Application current_format = insert->format; BlockInputStreamPtr block_input = context.getInputFormat( - current_format, buf, sample, insert_format_max_block_size); + current_format, + buf, + sample, + insert_format_max_block_size); BlockInputStreamPtr async_block_input = std::make_shared(block_input); @@ -990,7 +1016,7 @@ class Client : public Poco::Util::Application interrupt_listener.unblock(); } else if (!connection->poll(1000000)) - continue; /// If there is no new data, continue checking whether the query was cancelled after a timeout. + continue; /// If there is no new data, continue checking whether the query was cancelled after a timeout. } if (!receivePacket()) @@ -1010,37 +1036,37 @@ class Client : public Poco::Util::Application switch (packet.type) { - case Protocol::Server::Data: - onData(packet.block); - return true; + case Protocol::Server::Data: + onData(packet.block); + return true; - case Protocol::Server::Progress: - onProgress(packet.progress); - return true; + case Protocol::Server::Progress: + onProgress(packet.progress); + return true; - case Protocol::Server::ProfileInfo: - onProfileInfo(packet.profile_info); - return true; + case Protocol::Server::ProfileInfo: + onProfileInfo(packet.profile_info); + return true; - case Protocol::Server::Totals: - onTotals(packet.block); - return true; + case Protocol::Server::Totals: + onTotals(packet.block); + return true; - case Protocol::Server::Extremes: - onExtremes(packet.block); - return true; + case Protocol::Server::Extremes: + onExtremes(packet.block); + return true; - case Protocol::Server::Exception: - onException(*packet.exception); - last_exception = std::move(packet.exception); - return false; + case Protocol::Server::Exception: + onException(*packet.exception); + last_exception = std::move(packet.exception); + return false; - case Protocol::Server::EndOfStream: - onEndOfStream(); - return false; + case Protocol::Server::EndOfStream: + onEndOfStream(); + return false; - default: - throw Exception("Unknown packet from server", ErrorCodes::UNKNOWN_PACKET_FROM_SERVER); + default: + throw Exception("Unknown packet from server", ErrorCodes::UNKNOWN_PACKET_FROM_SERVER); } } @@ -1052,18 +1078,19 @@ class Client : public Poco::Util::Application switch (packet.type) { - case Protocol::Server::Data: - out = packet.block; - return true; - - case Protocol::Server::Exception: - onException(*packet.exception); - last_exception = std::move(packet.exception); - return false; - - default: - throw NetException("Unexpected packet from server (expected Data, got " - + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + case Protocol::Server::Data: + out = packet.block; + return true; + + case Protocol::Server::Exception: + onException(*packet.exception); + last_exception = std::move(packet.exception); + return false; + + default: + throw NetException("Unexpected packet from server (expected Data, got " + + String(Protocol::Server::toString(packet.type)) + ")", + ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); } } @@ -1177,8 +1204,7 @@ class Client : public Poco::Util::Application return; static size_t increment = 0; - static const char * indicators[8] = - { + static const char * indicators[8] = { "\033[1;30m→\033[0m", "\033[1;31m↘\033[0m", "\033[1;32m↓\033[0m", @@ -1196,8 +1222,8 @@ class Client : public Poco::Util::Application std::stringstream message; message << indicators[increment % 8] - << std::fixed << std::setprecision(3) - << " Progress: "; + << std::fixed << std::setprecision(3) + << " Progress: "; message << formatReadableQuantity(progress.rows) << " rows, " @@ -1206,8 +1232,8 @@ class Client : public Poco::Util::Application size_t elapsed_ns = watch.elapsed(); if (elapsed_ns) message << " (" - << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " - << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; + << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; else message << ". "; @@ -1236,7 +1262,7 @@ class Client : public Poco::Util::Application std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, total_rows_corrected, width_of_progress_bar)); std::cerr << "\033[0;32m" << bar << "\033[0m"; if (width_of_progress_bar > static_cast(bar.size() / UNICODE_BAR_CHAR_SIZE)) - std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' '); + std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' '); } } } @@ -1253,14 +1279,14 @@ class Client : public Poco::Util::Application void writeFinalProgress() { std::cout << "Processed " - << formatReadableQuantity(progress.rows) << " rows, " - << formatReadableSizeWithDecimalSuffix(progress.bytes); + << formatReadableQuantity(progress.rows) << " rows, " + << formatReadableSizeWithDecimalSuffix(progress.bytes); size_t elapsed_ns = watch.elapsed(); if (elapsed_ns) std::cout << " (" - << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " - << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; + << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; else std::cout << ". "; } @@ -1278,7 +1304,7 @@ class Client : public Poco::Util::Application text.resize(embedded_stack_trace_pos); std::cerr << "Received exception from server (version " << server_version << "):" << std::endl - << "Code: " << e.code() << ". " << text << std::endl; + << "Code: " << e.code() << ". " << text << std::endl; } @@ -1300,12 +1326,12 @@ class Client : public Poco::Util::Application std::cout << "Ok." << std::endl; } - void showClientVersion() + static void showClientVersion() { std::cout << "ClickHouse client version " << DBMS_VERSION_MAJOR - << "." << DBMS_VERSION_MINOR - << "." << ClickHouseRevision::get() - << "." << std::endl; + << "." << DBMS_VERSION_MINOR + << "." << ClickHouseRevision::get() + << "." << std::endl; } public: @@ -1322,7 +1348,7 @@ class Client : public Poco::Util::Application */ using Arguments = std::vector; - Arguments common_arguments{""}; /// 0th argument is ignored. + Arguments common_arguments{""}; /// 0th argument is ignored. std::vector external_tables_arguments; bool in_external_group = false; @@ -1337,21 +1363,21 @@ class Client : public Poco::Util::Application } /// Options with value after equal sign. else if (in_external_group - && (0 == strncmp(arg, "--file=", strlen("--file=")) - || 0 == strncmp(arg, "--name=", strlen("--name=")) - || 0 == strncmp(arg, "--format=", strlen("--format=")) - || 0 == strncmp(arg, "--structure=", strlen("--structure=")) - || 0 == strncmp(arg, "--types=", strlen("--types=")))) + && (0 == strncmp(arg, "--file=", strlen("--file=")) + || 0 == strncmp(arg, "--name=", strlen("--name=")) + || 0 == strncmp(arg, "--format=", strlen("--format=")) + || 0 == strncmp(arg, "--structure=", strlen("--structure=")) + || 0 == strncmp(arg, "--types=", strlen("--types=")))) { external_tables_arguments.back().emplace_back(arg); } /// Options with value after whitespace. else if (in_external_group - && (0 == strcmp(arg, "--file") - || 0 == strcmp(arg, "--name") - || 0 == strcmp(arg, "--format") - || 0 == strcmp(arg, "--structure") - || 0 == strcmp(arg, "--types"))) + && (0 == strcmp(arg, "--file") + || 0 == strcmp(arg, "--name") + || 0 == strcmp(arg, "--format") + || 0 == strcmp(arg, "--structure") + || 0 == strcmp(arg, "--types"))) { if (arg_num + 1 < argc) { @@ -1370,10 +1396,11 @@ class Client : public Poco::Util::Application } } -#define DECLARE_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) (#NAME, boost::program_options::value (), DESCRIPTION) +#define DECLARE_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) (#NAME, boost::program_options::value(), DESCRIPTION) /// Main commandline options related to client functionality and all parameters from Settings. boost::program_options::options_description main_description("Main options"); + // clang-format off main_description.add_options() ("help", "produce help message") ("config-file,c", boost::program_options::value(), "config-file path") @@ -1400,10 +1427,12 @@ class Client : public Poco::Util::Application ("compression", boost::program_options::value(), "enable or disable compression") APPLY_FOR_SETTINGS(DECLARE_SETTING) ; + // clang-format on #undef DECLARE_SETTING /// Commandline options related to external tables. boost::program_options::options_description external_description("External tables options"); + // clang-format off external_description.add_options() ("file", boost::program_options::value(), "data file or - for stdin") ("name", boost::program_options::value()->default_value("_data"), "name of the table") @@ -1411,10 +1440,14 @@ class Client : public Poco::Util::Application ("structure", boost::program_options::value(), "structure") ("types", boost::program_options::value(), "types") ; + // clang-format on /// Parse main commandline options. boost::program_options::parsed_options parsed = boost::program_options::command_line_parser( - common_arguments.size(), common_arguments.data()).options(main_description).run(); + common_arguments.size(), + common_arguments.data()) + .options(main_description) + .run(); boost::program_options::variables_map options; boost::program_options::store(parsed, options); @@ -1426,7 +1459,7 @@ class Client : public Poco::Util::Application /// Output of help message. if (options.count("help") - || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. + || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. { std::cout << main_description << "\n"; std::cout << external_description << "\n"; @@ -1438,7 +1471,10 @@ class Client : public Poco::Util::Application { /// Parse commandline options related to external tables. boost::program_options::parsed_options parsed = boost::program_options::command_line_parser( - external_tables_arguments[i].size(), external_tables_arguments[i].data()).options(external_description).run(); + external_tables_arguments[i].size(), + external_tables_arguments[i].data()) + .options(external_description) + .run(); boost::program_options::variables_map external_options; boost::program_options::store(parsed, external_options); @@ -1454,15 +1490,16 @@ class Client : public Poco::Util::Application { std::string text = e.displayText(); std::cerr << "Code: " << e.code() << ". " << text << std::endl; - std::cerr << "Table №" << i << std::endl << std::endl; + std::cerr << "Table №" << i << std::endl + << std::endl; exit(e.code()); } } /// Extract settings and limits from the options. #define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - if (options.count(#NAME)) \ - context.setSetting(#NAME, options[#NAME].as()); + if (options.count(#NAME)) \ + context.setSetting(#NAME, options[#NAME].as()); APPLY_FOR_SETTINGS(EXTRACT_SETTING) #undef EXTRACT_SETTING @@ -1511,11 +1548,10 @@ class Client : public Poco::Util::Application max_client_network_bandwidth = options["max_client_network_bandwidth"].as(); if (options.count("compression")) config().setBool("compression", options["compression"].as()); - } }; -} +} // namespace DB int mainEntryClickHouseClient(int argc, char ** argv) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 44e8ea29c29..a6681be92b1 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -989,6 +990,7 @@ int Server::main(const std::vector & /*args*/) registerFunctions(); registerAggregateFunctions(); + registerWindowFunctions(); registerTableFunctions(); registerStorages(); diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index b7a061e68fb..6aa7541ee59 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -98,6 +98,24 @@ ::testing::AssertionResult columnEqual( return columnEqual(expected.column, actual.column); } +void blockEqual( + const Block & expected, + const Block & actual) +{ + size_t columns = actual.columns(); + + ASSERT_TRUE(expected.columns() == columns); + + for (size_t i = 0; i < columns; ++i) + { + const auto & expected_col = expected.getByPosition(i); + const auto & actual_col = actual.getByPosition(i); + ASSERT_TRUE(actual_col.type->getName() == expected_col.type->getName()); + ASSERT_COLUMN_EQ(expected_col.column, actual_col.column); + } +} + + ColumnWithTypeAndName executeFunction(Context & context, const String & func_name, const ColumnsWithTypeAndName & columns, const TiDB::TiDBCollatorPtr & collator) { auto & factory = FunctionFactory::instance(); diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index 41937adb9b6..e88f33a5ca7 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -527,6 +527,10 @@ ::testing::AssertionResult columnEqual( const ColumnWithTypeAndName & expected, const ColumnWithTypeAndName & actual); +void blockEqual( + const Block & expected, + const Block & actual); + ColumnWithTypeAndName executeFunction( Context & context, const String & func_name, @@ -711,5 +715,6 @@ class FunctionTest : public ::testing::Test }; #define ASSERT_COLUMN_EQ(expected, actual) ASSERT_TRUE(DB::tests::columnEqual((expected), (actual))) +#define ASSERT_BLOCK_EQ(expected, actual) DB::tests::blockEqual((expected), (actual)) } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/MockTableScanBlockInputStream.cpp b/dbms/src/TestUtils/MockTableScanBlockInputStream.cpp new file mode 100644 index 00000000000..316c7487a63 --- /dev/null +++ b/dbms/src/TestUtils/MockTableScanBlockInputStream.cpp @@ -0,0 +1,62 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB +{ +MockTableScanBlockInputStream::MockTableScanBlockInputStream(ColumnsWithTypeAndName columns, size_t max_block_size) + : columns(columns) + , output_index(0) + , max_block_size(max_block_size) +{ + rows = 0; + for (const auto & elem : columns) + { + if (elem.column) + { + assert(rows == 0 || rows == elem.column->size()); + rows = elem.column->size(); + } + } +} + +ColumnPtr MockTableScanBlockInputStream::makeColumn(ColumnWithTypeAndName elem) +{ + auto column = elem.type->createColumn(); + size_t row_count = 0; + for (size_t i = output_index; i < rows & row_count < max_block_size; ++i) + { + column->insert((*elem.column)[i]); + row_count++; + } + + return column; +} + +Block MockTableScanBlockInputStream::readImpl() +{ + if (output_index >= rows) + return {}; + ColumnsWithTypeAndName output_columns; + for (const auto & elem : columns) + { + output_columns.push_back({makeColumn(elem), elem.type, elem.name, elem.column_id}); + } + output_index += max_block_size; + + return Block(output_columns); +} + +} // namespace DB diff --git a/dbms/src/TestUtils/MockTableScanBlockInputStream.h b/dbms/src/TestUtils/MockTableScanBlockInputStream.h new file mode 100644 index 00000000000..d148d7f3ac1 --- /dev/null +++ b/dbms/src/TestUtils/MockTableScanBlockInputStream.h @@ -0,0 +1,40 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB +{ +class MockTableScanBlockInputStream : public IProfilingBlockInputStream +{ +public: + MockTableScanBlockInputStream(ColumnsWithTypeAndName columns, size_t max_block_size); + Block getHeader() const override + { + return Block(columns); + } + String getName() const override { return "MockTableScan"; } + ColumnsWithTypeAndName columns; + size_t output_index; + size_t max_block_size; + size_t rows; + +protected: + Block readImpl() override; + ColumnPtr makeColumn(ColumnWithTypeAndName elem); +}; + +} // namespace DB diff --git a/dbms/src/WindowFunctions/IWindowFunction.cpp b/dbms/src/WindowFunctions/IWindowFunction.cpp new file mode 100644 index 00000000000..84a329e54aa --- /dev/null +++ b/dbms/src/WindowFunctions/IWindowFunction.cpp @@ -0,0 +1,109 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int NOT_IMPLEMENTED; +} // namespace ErrorCodes + +struct WindowFunctionRank final : public IWindowFunction +{ + WindowFunctionRank(const std::string & name_, + const DataTypes & argument_types_) + : IWindowFunction(name_, argument_types_) + {} + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + void windowInsertResultInto(WindowBlockInputStreamPtr stream, + size_t function_index) override + { + IColumn & to = *stream->blockAt(stream->current_row) + .output_columns[function_index]; + assert_cast(to).getData().push_back( + stream->peer_group_start_row_number); + } +}; + +struct WindowFunctionDenseRank final : public IWindowFunction +{ + WindowFunctionDenseRank(const std::string & name_, + const DataTypes & argument_types_) + : IWindowFunction(name_, argument_types_) + {} + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + + void windowInsertResultInto(WindowBlockInputStreamPtr stream, + size_t function_index) override + { + IColumn & to = *stream->blockAt(stream->current_row) + .output_columns[function_index]; + assert_cast(to).getData().push_back( + stream->peer_group_number); + } +}; + +struct WindowFunctionRowNumber final : public IWindowFunction +{ + WindowFunctionRowNumber(const std::string & name_, + const DataTypes & argument_types_) + : IWindowFunction(name_, argument_types_) + {} + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + + void windowInsertResultInto(WindowBlockInputStreamPtr stream, + size_t function_index) override + { + IColumn & to = *stream->blockAt(stream->current_row) + .output_columns[function_index]; + assert_cast(to).getData().push_back( + stream->current_row_number); + } +}; + +void registerWindowFunctions(WindowFunctionFactory & factory) +{ + factory.registerFunction( + "rank", + [](const std::string & name, const DataTypes & argument_types) { return std::make_shared(name, argument_types); }); + factory.registerFunction( + "dense_rank", + [](const std::string & name, const DataTypes & argument_types) { return std::make_shared(name, argument_types); }); + factory.registerFunction( + "row_number", + [](const std::string & name, const DataTypes & argument_types) { return std::make_shared(name, argument_types); }); +} +} // namespace DB diff --git a/dbms/src/WindowFunctions/IWindowFunction.h b/dbms/src/WindowFunctions/IWindowFunction.h new file mode 100644 index 00000000000..99d121d349d --- /dev/null +++ b/dbms/src/WindowFunctions/IWindowFunction.h @@ -0,0 +1,58 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include + + +namespace DB +{ +class WindowBlockInputStream; +using WindowBlockInputStreamPtr = std::shared_ptr; + +class IWindowFunction +{ +public: + IWindowFunction(const std::string & name_, + const DataTypes & argument_types_) + : name(name_) + , argument_types(argument_types_) + {} + + String getName() + { + return name; + } + + virtual ~IWindowFunction() = default; + + virtual DataTypePtr getReturnType() const = 0; + // Must insert the result for current_row. + virtual void windowInsertResultInto(WindowBlockInputStreamPtr streamPtr, + size_t function_index) + = 0; + +protected: + std::string name; + DataTypes argument_types; +}; + +using WindowFunctionPtr = std::shared_ptr; + +} // namespace DB diff --git a/dbms/src/WindowFunctions/WindowFunctionFactory.cpp b/dbms/src/WindowFunctions/WindowFunctionFactory.cpp new file mode 100644 index 00000000000..91f0da84a03 --- /dev/null +++ b/dbms/src/WindowFunctions/WindowFunctionFactory.cpp @@ -0,0 +1,80 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ +extern const int UNKNOWN_WINDOW_FUNCTION; +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes + +void WindowFunctionFactory::registerFunction(const String & name, Creator creator) +{ + if (creator == nullptr) + throw Exception( + fmt::format("WindowFunctionFactory: the window function {} has been provided a null constructor", name), + ErrorCodes::LOGICAL_ERROR); + + if (!window_functions.emplace(name, creator).second) + throw Exception( + fmt::format("WindowFunctionFactory: the window function {} is not unique", name), + ErrorCodes::LOGICAL_ERROR); +} + +WindowFunctionPtr WindowFunctionFactory::get( + const String & name, + const DataTypes & argument_types) const +{ + auto res = getImpl(name, argument_types); + if (!res) + throw Exception("Logical error: WindowFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR); + return res; +} + + +WindowFunctionPtr WindowFunctionFactory::getImpl( + const String & name, + const DataTypes & argument_types) const +{ + /// Find by exact match. + auto it = window_functions.find(name); + if (it != window_functions.end()) + return it->second(name, argument_types); + + throw Exception("Unknown window function " + name, ErrorCodes::UNKNOWN_WINDOW_FUNCTION); +} + + +WindowFunctionPtr WindowFunctionFactory::tryGet(const String & name, const DataTypes & argument_types) const +{ + return isWindowFunctionName(name) + ? get(name, argument_types) + : nullptr; +} + + +bool WindowFunctionFactory::isWindowFunctionName(const String & name) const +{ + return window_functions.count(name); +} + +} // namespace DB diff --git a/dbms/src/WindowFunctions/WindowFunctionFactory.h b/dbms/src/WindowFunctions/WindowFunctionFactory.h new file mode 100644 index 00000000000..ae2091c16a1 --- /dev/null +++ b/dbms/src/WindowFunctions/WindowFunctionFactory.h @@ -0,0 +1,70 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class Context; +class IDataType; + +using DataTypePtr = std::shared_ptr; +using DataTypes = std::vector; + +/** Creates an window function by name. + */ +class WindowFunctionFactory final : public ext::Singleton +{ +public: + using Creator = std::function; + + /// Register a function by its name. + /// No locking, you must register all functions before usage of get. + void registerFunction( + const String & name, + Creator creator); + + /// Throws an exception if not found. + WindowFunctionPtr get( + const String & name, + const DataTypes & argument_types) const; + + /// Returns nullptr if not found. + WindowFunctionPtr tryGet(const String & name, const DataTypes & argument_types) const; + + bool isWindowFunctionName(const String & name) const; + +private: + WindowFunctionPtr getImpl( + const String & name, + const DataTypes & argument_types) const; + +private: + using WindowFunctions = std::unordered_map; + + WindowFunctions window_functions; +}; + +} // namespace DB diff --git a/dbms/src/WindowFunctions/registerWindowFunctions.cpp b/dbms/src/WindowFunctions/registerWindowFunctions.cpp new file mode 100644 index 00000000000..54726399cf4 --- /dev/null +++ b/dbms/src/WindowFunctions/registerWindowFunctions.cpp @@ -0,0 +1,27 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB +{ +void registerWindowFunctions(WindowFunctionFactory & factory); + +void registerWindowFunctions() +{ + auto & window_factory = WindowFunctionFactory::instance(); + registerWindowFunctions(window_factory); +} + +} // namespace DB diff --git a/dbms/src/WindowFunctions/registerWindowFunctions.h b/dbms/src/WindowFunctions/registerWindowFunctions.h new file mode 100644 index 00000000000..96f1e0db234 --- /dev/null +++ b/dbms/src/WindowFunctions/registerWindowFunctions.h @@ -0,0 +1,21 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace DB +{ +void registerWindowFunctions(); + +} diff --git a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp new file mode 100644 index 00000000000..f94a20c1a65 --- /dev/null +++ b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp @@ -0,0 +1,345 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::tests +{ +class WindowFunction : public DB::tests::FunctionTest +{ +protected: + std::shared_ptr mock_interpreter; + + void SetUp() override + { + DB::tests::FunctionTest::SetUp(); + DB::registerWindowFunctions(); + } + + template + ColumnWithTypeAndName toNullableVec(String name, const std::vector::FieldType>> & v) + { + return createColumn>(v, name); + } + + template + ColumnWithTypeAndName toVec(String name, const std::vector::FieldType> & v) + { + return createColumn(v, name); + } + + template + static ColumnWithTypeAndName toConst(const T s) + { + return createConstColumn(1, s); + } + + static ColumnWithTypeAndName toDatetimeVec(String name, const std::vector & v, int fsp) + { + std::vector::FieldType> vec; + for (const auto & value_str : v) + { + Field value = parseMyDateTime(value_str, fsp); + vec.push_back(value.template safeGet()); + } + DataTypePtr data_type = std::make_shared(fsp); + return {makeColumn(data_type, vec), data_type, name, 0}; + } + + static ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector & v, int fsp) + { + std::vector::FieldType>> vec; + for (const auto & value_str : v) + { + if (!value_str.empty()) + { + Field value = parseMyDateTime(value_str, fsp); + vec.push_back(value.template safeGet()); + } + else + { + vec.push_back({}); + } + } + DataTypePtr data_type = makeNullable(std::make_shared(fsp)); + return {makeColumn>(data_type, vec), data_type, name, 0}; + } + + void setMaxBlockSize(int size) + { + context.getSettingsRef().max_block_size.set(size); + } + + void mockInterpreter(std::vector source_columns, Context context) + { + std::vector mock_input_streams_vec = {}; + DAGQueryBlock mock_query_block(0, static_cast>(nullptr)); + std::vector mock_subqueries_for_sets = {}; + mock_interpreter = std::make_shared(context, + mock_input_streams_vec, + mock_query_block, + 1); + + mock_interpreter->analyzer = std::make_unique(std::move(source_columns), context); + } + + void mockExecuteTableScan(DAGPipeline & pipeline, ColumnsWithTypeAndName columns) + { + pipeline.streams.push_back(std::make_shared(columns, context.getSettingsRef().max_block_size)); + mock_interpreter->input_streams_vec.push_back(pipeline.streams); + } + + void mockExecuteWindowOrder(DAGPipeline & pipeline, std::string sort_json_str) + { + tipb::Sort sort; + google::protobuf::util::JsonStringToMessage(sort_json_str, &sort); + mock_interpreter->handleWindowOrder(pipeline, sort); + mock_interpreter->input_streams_vec[0] = pipeline.streams; + NamesWithAliases final_project; + for (const auto & column : (*mock_interpreter->analyzer).source_columns) + { + final_project.push_back({column.name, ""}); + } + mockExecuteProject(pipeline, final_project); + } + + void mockExecuteWindow(DAGPipeline & pipeline, std::string window_json_str) + { + tipb::Window window; + google::protobuf::util::JsonStringToMessage(window_json_str, &window); + mock_interpreter->handleWindow(pipeline, window); + mock_interpreter->input_streams_vec[0] = pipeline.streams; + NamesWithAliases final_project; + for (const auto & column : (*mock_interpreter->analyzer).source_columns) + { + final_project.push_back({column.name, ""}); + } + mockExecuteProject(pipeline, final_project); + } + + void mockExecuteProject(DAGPipeline & pipeline, NamesWithAliases & final_project) + { + mock_interpreter->executeProject(pipeline, final_project); + } + + static Block mergeBlocks(Blocks blocks) + { + if (blocks.empty()) + { + return {}; + } + Block sample_block; + std::vector actual_cols; + + for (const auto & block : blocks) + { + if (!sample_block) + { + sample_block = block; + for (const auto & column : block.getColumnsWithTypeAndName()) + { + actual_cols.push_back(column.type->createColumn()); + } + } + + for (size_t i = 0; i < block.columns(); ++i) + { + for (size_t j = 0; j < block.rows(); ++j) + { + actual_cols[i]->insert((*(block.getColumnsWithTypeAndName())[i].column)[j]); + } + } + } + + ColumnsWithTypeAndName actual_columns; + + for (size_t i = 0; i < actual_cols.size(); ++i) + { + actual_columns.push_back({std::move(actual_cols[i]), sample_block.getColumnsWithTypeAndName()[i].type, sample_block.getColumnsWithTypeAndName()[i].name, sample_block.getColumnsWithTypeAndName()[i].column_id}); + } + return Block(actual_columns); + } + + void testOneWindowFunction(const std::vector & source_column_types, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns, const std::string window_json_str, const std::string sort_json_str) + { + mockInterpreter(source_column_types, context); + DAGPipeline pipeline; + ExpressionActionsChain chain; + Block except_block(expect_columns); + + mockExecuteTableScan(pipeline, source_columns); + + mockExecuteWindowOrder(pipeline, sort_json_str); + + mockExecuteWindow(pipeline, window_json_str); + + auto stream = pipeline.firstStream(); + + Blocks actual_blocks; + while (Block block = stream->read()) + { + actual_blocks.push_back(block); + } + + Block actual_block = mergeBlocks(actual_blocks); + + if (actual_block) + { + // Check that input columns is properly split to many blocks + ASSERT_EQ(actual_blocks.size(), (actual_block.rows() - 1) / context.getSettingsRef().max_block_size + 1); + } + ASSERT_BLOCK_EQ(except_block, actual_block); + } +}; + +TEST_F(WindowFunction, testWindowFunctionByPartitionAndOrder) +try +{ + setMaxBlockSize(3); + + std::string window_json; + std::string sort_json; + + /***** row_number with different types of input *****/ + // int - sql : select *, row_number() over w1 from test1 window w1 as (partition by partition_int order by order_int) + window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAkMCV6NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; + sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAkMCV6NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; + testOneWindowFunction( + {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // null input + testOneWindowFunction( + {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + {toNullableVec("partition", {}), toNullableVec("order", {})}, + {}, + window_json, + sort_json); + + // nullable + testOneWindowFunction( + {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // string - sql : select *, row_number() over w1 from test2 window w1 as (partition by partition_string order by order_string) + window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGA8Nz57tP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"}]},"executorId":"ExchangeReceiver_11"}}}})"; + sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGA8Nz57tP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"}]},"executorId":"ExchangeReceiver_11"}})"; + testOneWindowFunction( + {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, + {toVec("partition", {"banana", "banana", "banana", "banana", "apple", "apple", "apple", "apple"}), toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"})}, + {toVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // nullable + testOneWindowFunction( + {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}, + {toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // decimal - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_decimal) + window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAoN3M99P+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; + sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAoN3M99P+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; + testOneWindowFunction( + {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, + {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, + {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // nullable + testOneWindowFunction( + {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, + {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // datetime - select *, row_number() over w1 from test4 window w1 as (partition by partition_datetime order by order_datetime); + window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsNmBhdT+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; + sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsNmBhdT+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; + testOneWindowFunction( + {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, + {toDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, + {toDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // nullable + testOneWindowFunction( + {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, + {toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, + window_json, + sort_json); + + // 2 partiton key and 2 order key + // sql : select *, row_number() over w1 from test6 window w1 as (partition by partition_int1, partition_int2 order by order_int1,order_int2) + window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIKA0Img1If/BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; + sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIKA0Img1If/BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; + testOneWindowFunction( + {NameAndTypePair("partition1", std::make_shared()), NameAndTypePair("partition2", std::make_shared()), NameAndTypePair("order1", std::make_shared()), NameAndTypePair("order2", std::make_shared())}, + {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), toVec("order1", {2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}), toVec("order2", {2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1})}, + {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), toVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), toVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})}, + window_json, + sort_json); + + /***** rank, dense_rank *****/ + window_json = R"({"funcDesc":[{"tp":"Rank","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false},{"tp":"DenseRank","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsOnl3NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; + sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsOnl3NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; + testOneWindowFunction( + {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})}, + window_json, + sort_json); + + // nullable + testOneWindowFunction( + {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})}, + window_json, + sort_json); + + testOneWindowFunction( + {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}, + {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})}, + window_json, + sort_json); +} +CATCH +} // namespace DB::tests From 2d234262eb551b04b0ce304c7c6bacac847ca264 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Sun, 24 Apr 2022 11:08:49 +0800 Subject: [PATCH 38/79] fix tiflash crash when setting join_concurrent_build = 0 (#4735) close pingcap/tiflash#4734 --- dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp | 3 +-- dbms/src/DataStreams/HashJoinBuildBlockInputStream.h | 6 +++--- .../src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp | 10 +++++++--- dbms/src/Interpreters/Join.cpp | 2 ++ 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp index e1a54c7a2c3..91fd34bfff4 100644 --- a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp +++ b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp @@ -16,13 +16,12 @@ #include namespace DB { - Block HashJoinBuildBlockInputStream::readImpl() { Block block = children.back()->read(); if (!block) return block; - join->insertFromBlock(block, stream_index); + join->insertFromBlock(block, concurrency_build_index); return block; } diff --git a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h index 41576e6e061..57b505f5237 100644 --- a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h +++ b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h @@ -28,9 +28,9 @@ class HashJoinBuildBlockInputStream : public IProfilingBlockInputStream HashJoinBuildBlockInputStream( const BlockInputStreamPtr & input, JoinPtr join_, - size_t stream_index_, + size_t concurrency_build_index_, const String & req_id) - : stream_index(stream_index_) + : concurrency_build_index(concurrency_build_index_) , log(Logger::get(NAME, req_id)) { children.push_back(input); @@ -44,7 +44,7 @@ class HashJoinBuildBlockInputStream : public IProfilingBlockInputStream private: JoinPtr join; - size_t stream_index; + size_t concurrency_build_index; const LoggerPtr log; }; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index cb609f6887e..424f6304f6c 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -697,9 +697,13 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & recordJoinExecuteInfo(swap_join_side ? 0 : 1, join_ptr); // add a HashJoinBuildBlockInputStream to build a shared hash table - size_t stream_index = 0; - right_pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, join_ptr, stream_index++, log->identifier()); }); + size_t concurrency_build_index = 0; + auto get_concurrency_build_index = [&concurrency_build_index, &join_build_concurrency]() { + return (concurrency_build_index++) % join_build_concurrency; + }; + right_pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, join_ptr, get_concurrency_build_index(), log->identifier()); + }); executeUnion(right_pipeline, max_streams, log, /*ignore_block=*/true); right_query.source = right_pipeline.firstStream(); diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 318038fa5b0..f1275d8e88e 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -772,6 +772,8 @@ bool Join::insertFromBlock(const Block & block) /// the block should be valid. void Join::insertFromBlock(const Block & block, size_t stream_index) { + assert(stream_index < build_concurrency); + if (empty()) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); std::shared_lock lock(rwlock); From d752d0e3d3c4e62f9ac02d5c55905f504c97979f Mon Sep 17 00:00:00 2001 From: Flowyi Date: Mon, 25 Apr 2022 11:34:49 +0800 Subject: [PATCH 39/79] Fxied some compile errors on mac in PS V3 (#4741) close pingcap/tiflash#4744 --- dbms/src/Storages/Page/V3/PageDirectory.cpp | 1 - dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 4c7214da466..4dfe2c6da09 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index 6e57abd43f4..b691d0b1d81 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -40,7 +40,7 @@ class STDMapSpaceMap bool check(std::function checker, size_t size) override { size_t idx = 0; - for (const auto [offset, length] : free_map) + for (const auto & [offset, length] : free_map) { if (!checker(idx, offset, offset + length)) return false; From c7510734bf6b57a072e3c67ee602c183440aa990 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Mon, 25 Apr 2022 13:30:50 +0800 Subject: [PATCH 40/79] Refine handle agg and exchange sender (#4721) ref pingcap/tiflash#4118 --- .../AggregationInterpreterHelper.cpp | 117 +++++++++++++++++ .../AggregationInterpreterHelper.h | 44 +++++++ .../Coprocessor/DAGQueryBlockInterpreter.cpp | 119 ++++-------------- .../Coprocessor/DAGQueryBlockInterpreter.h | 4 +- .../ExchangeSenderInterpreterHelper.cpp | 68 ++++++++++ .../ExchangeSenderInterpreterHelper.h | 26 ++++ 6 files changed, 278 insertions(+), 100 deletions(-) create mode 100644 dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.cpp create mode 100644 dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.h create mode 100644 dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.cpp create mode 100644 dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.h diff --git a/dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.cpp new file mode 100644 index 00000000000..404a1932b87 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.cpp @@ -0,0 +1,117 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +namespace DB::AggregationInterpreterHelper +{ +namespace +{ +bool isFinalAggMode(const tipb::Expr & expr) +{ + if (!expr.has_aggfuncmode()) + /// set default value to true to make it compatible with old version of TiDB since before this + /// change, all the aggregation in TiFlash is treated as final aggregation + return true; + return expr.aggfuncmode() == tipb::AggFunctionMode::FinalMode || expr.aggfuncmode() == tipb::AggFunctionMode::CompleteMode; +} + +bool isAllowToUseTwoLevelGroupBy(size_t before_agg_streams_size, const Settings & settings) +{ + /** Two-level aggregation is useful in two cases: + * 1. Parallel aggregation is done, and the results should be merged in parallel. + * 2. An aggregation is done with store of temporary data on the disk, and they need to be merged in a memory efficient way. + */ + return before_agg_streams_size > 1 || settings.max_bytes_before_external_group_by != 0; +} +} // namespace + +bool isFinalAgg(const tipb::Aggregation & aggregation) +{ + /// set default value to true to make it compatible with old version of TiDB since before this + /// change, all the aggregation in TiFlash is treated as final aggregation + bool is_final_agg = true; + if (aggregation.agg_func_size() > 0 && !isFinalAggMode(aggregation.agg_func(0))) + is_final_agg = false; + for (int i = 1; i < aggregation.agg_func_size(); ++i) + { + if (unlikely(is_final_agg != isFinalAggMode(aggregation.agg_func(i)))) + throw TiFlashException("Different aggregation mode detected", Errors::Coprocessor::BadRequest); + } + return is_final_agg; +} + +bool isGroupByCollationSensitive(const Context & context) +{ + // todo now we can tell if the aggregation is final stage or partial stage, + // maybe we can do collation insensitive aggregation if the stage is partial + + /// collation sensitive group by is slower than normal group by, use normal group by by default + return context.getSettingsRef().group_by_collation_sensitive || context.getDAGContext()->isMPPTask(); +} + +Aggregator::Params buildParams( + const Context & context, + const Block & before_agg_header, + size_t before_agg_streams_size, + const Names & key_names, + const TiDB::TiDBCollators & collators, + const AggregateDescriptions & aggregate_descriptions, + bool is_final_agg) +{ + ColumnNumbers keys; + for (const auto & name : key_names) + { + keys.push_back(before_agg_header.getPositionByName(name)); + } + + const Settings & settings = context.getSettingsRef(); + + bool allow_to_use_two_level_group_by = isAllowToUseTwoLevelGroupBy(before_agg_streams_size, settings); + + bool has_collator = std::any_of(begin(collators), end(collators), [](const auto & p) { return p != nullptr; }); + + return Aggregator::Params( + before_agg_header, + keys, + aggregate_descriptions, + false, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), + allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), + settings.max_bytes_before_external_group_by, + !is_final_agg, + context.getTemporaryPath(), + has_collator ? collators : TiDB::dummy_collators); +} + +void fillArgColumnNumbers(AggregateDescriptions & aggregate_descriptions, const Block & before_agg_header) +{ + for (auto & descr : aggregate_descriptions) + { + if (descr.arguments.empty()) + { + for (const auto & name : descr.argument_names) + { + descr.arguments.push_back(before_agg_header.getPositionByName(name)); + } + } + } +} +} // namespace DB::AggregationInterpreterHelper \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.h b/dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.h new file mode 100644 index 00000000000..4b9b54a76e9 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/AggregationInterpreterHelper.h @@ -0,0 +1,44 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ +class Context; + +namespace AggregationInterpreterHelper +{ +bool isFinalAgg(const tipb::Aggregation & aggregation); + +bool isGroupByCollationSensitive(const Context & context); + +Aggregator::Params buildParams( + const Context & context, + const Block & before_agg_header, + size_t before_agg_streams_size, + const Names & key_names, + const TiDB::TiDBCollators & collators, + const AggregateDescriptions & aggregate_descriptions, + bool is_final_agg); + +void fillArgColumnNumbers(AggregateDescriptions & aggregate_descriptions, const Block & before_agg_header); +} // namespace AggregationInterpreterHelper +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 424f6304f6c..561011e2d95 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -33,10 +33,11 @@ #include #include #include -#include +#include #include #include #include +#include #include #include #include @@ -104,15 +105,6 @@ bool addExtraCastsAfterTs( return analyzer.appendExtraCastsAfterTS(chain, need_cast_column, table_scan); } -bool isFinalAgg(const tipb::Expr & expr) -{ - if (!expr.has_aggfuncmode()) - /// set default value to true to make it compatible with old version of TiDB since before this - /// change, all the aggregation in TiFlash is treated as final aggregation - return true; - return expr.aggfuncmode() == tipb::AggFunctionMode::FinalMode || expr.aggfuncmode() == tipb::AggFunctionMode::CompleteMode; -} - AnalysisResult analyzeExpressions( Context & context, DAGExpressionAnalyzer & analyzer, @@ -134,27 +126,12 @@ AnalysisResult analyzeExpressions( // There will be either Agg... if (query_block.aggregation) { - /// set default value to true to make it compatible with old version of TiDB since before this - /// change, all the aggregation in TiFlash is treated as final aggregation - res.is_final_agg = true; - const auto & aggregation = query_block.aggregation->aggregation(); - if (aggregation.agg_func_size() > 0 && !isFinalAgg(aggregation.agg_func(0))) - res.is_final_agg = false; - for (int i = 1; i < aggregation.agg_func_size(); i++) - { - if (res.is_final_agg != isFinalAgg(aggregation.agg_func(i))) - throw TiFlashException("Different aggregation mode detected", Errors::Coprocessor::BadRequest); - } - // todo now we can tell if the aggregation is final stage or partial stage, maybe we can do collation insensitive - // aggregation if the stage is partial - bool group_by_collation_sensitive = - /// collation sensitive group by is slower than normal group by, use normal group by by default - context.getSettingsRef().group_by_collation_sensitive || context.getDAGContext()->isMPPTask(); + res.is_final_agg = AggregationInterpreterHelper::isFinalAgg(query_block.aggregation->aggregation()); std::tie(res.aggregation_keys, res.aggregation_collators, res.aggregate_descriptions, res.before_aggregation) = analyzer.appendAggregation( chain, query_block.aggregation->aggregation(), - group_by_collation_sensitive); + AggregationInterpreterHelper::isGroupByCollationSensitive(context)); if (query_block.having != nullptr) { @@ -777,56 +754,29 @@ void DAGQueryBlockInterpreter::executeWindow( void DAGQueryBlockInterpreter::executeAggregation( DAGPipeline & pipeline, const ExpressionActionsPtr & expression_actions_ptr, - Names & key_names, - TiDB::TiDBCollators & collators, + const Names & key_names, + const TiDB::TiDBCollators & collators, AggregateDescriptions & aggregate_descriptions, bool is_final_agg) { pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expression_actions_ptr, log->identifier()); }); - Block header = pipeline.firstStream()->getHeader(); - ColumnNumbers keys; - for (const auto & name : key_names) - { - keys.push_back(header.getPositionByName(name)); - } - for (auto & descr : aggregate_descriptions) - { - if (descr.arguments.empty()) - { - for (const auto & name : descr.argument_names) - { - descr.arguments.push_back(header.getPositionByName(name)); - } - } - } - - const Settings & settings = context.getSettingsRef(); - - /** Two-level aggregation is useful in two cases: - * 1. Parallel aggregation is done, and the results should be merged in parallel. - * 2. An aggregation is done with store of temporary data on the disk, and they need to be merged in a memory efficient way. - */ - bool allow_to_use_two_level_group_by = pipeline.streams.size() > 1 || settings.max_bytes_before_external_group_by != 0; - bool has_collator = std::any_of(begin(collators), end(collators), [](const auto & p) { return p != nullptr; }); + Block before_agg_header = pipeline.firstStream()->getHeader(); - Aggregator::Params params( - header, - keys, + AggregationInterpreterHelper::fillArgColumnNumbers(aggregate_descriptions, before_agg_header); + auto params = AggregationInterpreterHelper::buildParams( + context, + before_agg_header, + pipeline.streams.size(), + key_names, + collators, aggregate_descriptions, - false, - settings.max_rows_to_group_by, - settings.group_by_overflow_mode, - allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), - allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), - settings.max_bytes_before_external_group_by, - !is_final_agg, - context.getTemporaryPath(), - has_collator ? collators : TiDB::dummy_collators); + is_final_agg); /// If there are several sources, then we perform parallel aggregation if (pipeline.streams.size() > 1) { + const Settings & settings = context.getSettingsRef(); BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams, log); pipeline.firstStream() = std::make_shared( pipeline.streams, @@ -860,7 +810,6 @@ void DAGQueryBlockInterpreter::executeAggregation( log->identifier()); recordProfileStreams(pipeline, query_block.aggregation_name); } - // add cast } void DAGQueryBlockInterpreter::executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr) @@ -1205,44 +1154,18 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline) { - /// only run in MPP - assert(dagContext().isMPPTask() && dagContext().tunnel_set != nullptr); + RUNTIME_ASSERT(dagContext().isMPPTask() && dagContext().tunnel_set != nullptr, log, "exchange_sender only run in MPP"); /// exchange sender should be at the top of operators const auto & exchange_sender = query_block.exchange_sender->exchange_sender(); - /// get partition column ids - const auto & part_keys = exchange_sender.partition_keys(); - std::vector partition_col_id; - TiDB::TiDBCollators collators; - /// in case TiDB is an old version, it has no collation info - bool has_collator_info = exchange_sender.types_size() != 0; - if (has_collator_info && part_keys.size() != exchange_sender.types_size()) - { - throw TiFlashException( - std::string(__PRETTY_FUNCTION__) + ": Invalid plan, in ExchangeSender, the length of partition_keys and types is not the same when TiDB new collation is enabled", - Errors::Coprocessor::BadRequest); - } - for (int i = 0; i < part_keys.size(); ++i) - { - const auto & expr = part_keys[i]; - assert(isColumnExpr(expr)); - auto column_index = decodeDAGInt64(expr.val()); - partition_col_id.emplace_back(column_index); - if (has_collator_info && removeNullable(getDataTypeByFieldTypeForComputingLayer(expr.field_type()))->isString()) - { - collators.emplace_back(getCollatorFromFieldType(exchange_sender.types(i))); - } - else - { - collators.emplace_back(nullptr); - } - } + std::vector partition_col_ids = ExchangeSenderInterpreterHelper::genPartitionColIds(exchange_sender); + TiDB::TiDBCollators partition_col_collators = ExchangeSenderInterpreterHelper::genPartitionColCollators(exchange_sender); int stream_id = 0; pipeline.transform([&](auto & stream) { // construct writer std::unique_ptr response_writer = std::make_unique>( context.getDAGContext()->tunnel_set, - partition_col_id, - collators, + partition_col_ids, + partition_col_collators, exchange_sender.tp(), context.getSettingsRef().dag_records_per_chunk, context.getSettingsRef().batch_send_min_limit, diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 1f19e5d5569..8e6908dec80 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -95,8 +95,8 @@ class DAGQueryBlockInterpreter void executeAggregation( DAGPipeline & pipeline, const ExpressionActionsPtr & expression_actions_ptr, - Names & key_names, - TiDB::TiDBCollators & collators, + const Names & key_names, + const TiDB::TiDBCollators & collators, AggregateDescriptions & aggregate_descriptions, bool is_final_agg); void executeProject(DAGPipeline & pipeline, NamesWithAliases & project_cols); diff --git a/dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.cpp new file mode 100644 index 00000000000..4e659230c84 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.cpp @@ -0,0 +1,68 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::ExchangeSenderInterpreterHelper +{ +std::vector genPartitionColIds(const tipb::ExchangeSender & exchange_sender) +{ + std::vector partition_col_ids; + for (const auto & part_key : exchange_sender.partition_keys()) + { + if (unlikely(!isColumnExpr(part_key))) + { + throw TiFlashException( + fmt::format("{}: Invalid plan, in ExchangeSender, part_key of ExchangeSender must be column", __PRETTY_FUNCTION__), + Errors::Coprocessor::BadRequest); + } + partition_col_ids.emplace_back(decodeDAGInt64(part_key.val())); + } + return partition_col_ids; +} + +TiDB::TiDBCollators genPartitionColCollators(const tipb::ExchangeSender & exchange_sender) +{ + TiDB::TiDBCollators partition_col_collators; + const auto & part_keys = exchange_sender.partition_keys(); + /// in case TiDB is an old version, it has no collation info + bool has_collator_info = exchange_sender.types_size() != 0; + if (unlikely(has_collator_info && part_keys.size() != exchange_sender.types_size())) + { + throw TiFlashException( + fmt::format("{}: Invalid plan, in ExchangeSender, the length of partition_keys and types is not the same when TiDB new collation is enabled", __PRETTY_FUNCTION__), + Errors::Coprocessor::BadRequest); + } + for (int i = 0; i < part_keys.size(); ++i) + { + const auto & expr = part_keys[i]; + if (has_collator_info && removeNullable(getDataTypeByFieldTypeForComputingLayer(expr.field_type()))->isString()) + { + partition_col_collators.emplace_back(getCollatorFromFieldType(exchange_sender.types(i))); + } + else + { + partition_col_collators.emplace_back(nullptr); + } + } + return partition_col_collators; +} +} // namespace DB::ExchangeSenderInterpreterHelper \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.h b/dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.h new file mode 100644 index 00000000000..7cd0ce7fa53 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/ExchangeSenderInterpreterHelper.h @@ -0,0 +1,26 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace DB::ExchangeSenderInterpreterHelper +{ +std::vector genPartitionColIds(const tipb::ExchangeSender & exchange_sender); + +TiDB::TiDBCollators genPartitionColCollators(const tipb::ExchangeSender & exchange_sender); +} // namespace DB::ExchangeSenderInterpreterHelper \ No newline at end of file From 1bbe1b08f562892a672686148ba6334599bd82af Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Mon, 25 Apr 2022 15:22:50 +0800 Subject: [PATCH 41/79] update TiFlash proxy to raftstore-proxy-6.0 for up(down)grade test (#4748) ref pingcap/tiflash#4618 --- contrib/tiflash-proxy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index 5874ff95712..cb1f8d04fb3 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit 5874ff95712aedcfd62c4b6f352a3420126cbc9a +Subproject commit cb1f8d04fb31a49be60f4c67afb3d4aee58fa2a7 From b725d346e38f2617fcf3b598f072987343736cbf Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Mon, 25 Apr 2022 16:46:50 +0800 Subject: [PATCH 42/79] ignore delmark when add minmax for pk column (#4746) close pingcap/tiflash#4747 --- .../Storages/DeltaMerge/File/DMFileWriter.cpp | 10 ++++- .../DeltaMerge/tests/dm_basic_include.h | 5 ++- .../DeltaMerge/tests/gtest_dm_segment.cpp | 37 +++++++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp index 4ea3e398aaa..3bff05ef19f 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp @@ -193,7 +193,9 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu auto & minmax_indexs = single_file_stream->minmax_indexs; if (auto iter = minmax_indexs.find(stream_name); iter != minmax_indexs.end()) { - iter->second->addPack(column, del_mark); + // For EXTRA_HANDLE_COLUMN_ID, we ignore del_mark when add minmax index. + // Because we need all rows which satisfy a certain range when place delta index no matter whether the row is a delete row. + iter->second->addPack(column, col_id == EXTRA_HANDLE_COLUMN_ID ? nullptr : del_mark); } auto offset_in_compressed_block = single_file_stream->original_layer.offset(); @@ -255,7 +257,11 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu const auto name = DMFile::getFileNameBase(col_id, substream); auto & stream = column_streams.at(name); if (stream->minmaxes) - stream->minmaxes->addPack(column, del_mark); + { + // For EXTRA_HANDLE_COLUMN_ID, we ignore del_mark when add minmax index. + // Because we need all rows which satisfy a certain range when place delta index no matter whether the row is a delete row. + stream->minmaxes->addPack(column, col_id == EXTRA_HANDLE_COLUMN_ID ? nullptr : del_mark); + } /// There could already be enough data to compress into the new block. if (stream->compressed_buf->offset() >= options.min_compress_block_size) diff --git a/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h b/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h index 4fb4ae20bdb..5554f269381 100644 --- a/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h +++ b/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h @@ -241,9 +241,10 @@ class DMTestEnv * @param ts_beg `timestamp`'s value begin * @param ts_end `timestamp`'s value end (not included) * @param reversed increasing/decreasing insert `timestamp`'s value + * @param deleted if deleted is false, set `tag` to 0; otherwise set `tag` to 1 * @return */ - static Block prepareBlockWithTso(Int64 pk, size_t ts_beg, size_t ts_end, bool reversed = false) + static Block prepareBlockWithTso(Int64 pk, size_t ts_beg, size_t ts_end, bool reversed = false, bool deleted = false) { Block block; const size_t num_rows = (ts_end - ts_beg); @@ -259,7 +260,7 @@ class DMTestEnv VERSION_COLUMN_ID)); // tag_col block.insert(DB::tests::createColumn( - std::vector(num_rows, 0), + std::vector(num_rows, deleted ? 1 : 0), TAG_COLUMN_NAME, TAG_COLUMN_ID)); return block; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 3de6e5f9993..197ec73fdb5 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -212,6 +212,43 @@ try } CATCH +TEST_F(Segment_test, WriteRead2) +try +{ + const size_t num_rows_write = dmContext().stable_pack_rows; + { + // write a block with rows all deleted + Block block = DMTestEnv::prepareBlockWithTso(2, 100, 100 + num_rows_write, false, true); + segment->write(dmContext(), block); + // write not deleted rows with larger pk + Block block2 = DMTestEnv::prepareBlockWithTso(3, 100, 100 + num_rows_write, false, false); + segment->write(dmContext(), block2); + + // flush segment and make sure there is two packs in stable + segment = segment->mergeDelta(dmContext(), tableColumns()); + ASSERT_EQ(segment->getStable()->getPacks(), 2); + } + + { + Block block = DMTestEnv::prepareBlockWithTso(1, 100, 100 + num_rows_write, false, false); + segment->write(dmContext(), block); + } + + { + auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + // only write two visible pks + ASSERT_EQ(num_rows_read, 2); + } +} +CATCH + TEST_F(Segment_test, WriteReadMultiRange) try { From 5a0b5eba715a2d8fee64bdba743b9d0f8fe93476 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 26 Apr 2022 13:38:51 +0800 Subject: [PATCH 43/79] Interpreter: Serialize executors (#4742) ref pingcap/tiflash#4609 --- dbms/src/Debug/astToExecutor.cpp | 9 + dbms/src/Flash/Coprocessor/DAGUtils.cpp | 92 ++++++ dbms/src/Flash/Coprocessor/DAGUtils.h | 5 + .../Flash/Statistics/ExchangeSenderImpl.cpp | 21 +- dbms/src/TestUtils/CMakeLists.txt | 2 +- dbms/src/TestUtils/InterpreterTestUtils.cpp | 73 ++--- dbms/src/TestUtils/InterpreterTestUtils.h | 29 +- dbms/src/TestUtils/executorSerializer.cpp | 272 ++++++++++++++++++ dbms/src/TestUtils/executorSerializer.h | 39 +++ dbms/src/TestUtils/mockExecutor.cpp | 23 +- dbms/src/TestUtils/mockExecutor.h | 4 +- .../TestUtils/tests/gtest_mock_executors.cpp | 184 +++++++----- 12 files changed, 578 insertions(+), 175 deletions(-) create mode 100644 dbms/src/TestUtils/executorSerializer.cpp create mode 100644 dbms/src/TestUtils/executorSerializer.h diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index b6003bbc710..999eb6d2e68 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -808,6 +808,15 @@ bool ExchangeSender::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t col auto * meta_string = exchange_sender->add_encoded_task_meta(); meta.AppendToString(meta_string); } + + for (auto & field : output_schema) + { + auto tipb_type = TiDB::columnInfoToFieldType(field.second); + tipb_type.set_collate(collator_id); + auto * field_type = exchange_sender->add_all_field_types(); + *field_type = tipb_type; + } + auto * child_executor = exchange_sender->mutable_child(); return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); } diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 69e76e2a098..6f5beec9937 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -778,6 +778,98 @@ const String & getFunctionName(const tipb::Expr & expr) } } +String getExchangeTypeName(const tipb::ExchangeType & tp) +{ + switch (tp) + { + case tipb::ExchangeType::Broadcast: + return "Broadcast"; + case tipb::ExchangeType::PassThrough: + return "PassThrough"; + case tipb::ExchangeType::Hash: + return "Hash"; + default: + throw TiFlashException(fmt::format("Not supported Exchange type: {}", tp), Errors::Coprocessor::Internal); + } +} + +String getJoinTypeName(const tipb::JoinType & tp) +{ + switch (tp) + { + case tipb::JoinType::TypeAntiLeftOuterSemiJoin: + return "AntiLeftOuterSemiJoin"; + case tipb::JoinType::TypeLeftOuterJoin: + return "LeftOuterJoin"; + case tipb::JoinType::TypeRightOuterJoin: + return "RightOuterJoin"; + case tipb::JoinType::TypeLeftOuterSemiJoin: + return "LeftOuterSemiJoin"; + case tipb::JoinType::TypeAntiSemiJoin: + return "AntiSemiJoin"; + case tipb::JoinType::TypeInnerJoin: + return "InnerJoin"; + case tipb::JoinType::TypeSemiJoin: + return "SemiJoin"; + default: + throw TiFlashException(fmt::format("Not supported Join type: {}", tp), Errors::Coprocessor::Internal); + } +} + +String getJoinExecTypeName(const tipb::JoinExecType & tp) +{ + switch (tp) + { + case tipb::JoinExecType::TypeHashJoin: + return "HashJoin"; + default: + throw TiFlashException(fmt::format("Not supported Join exectution type: {}", tp), Errors::Coprocessor::Internal); + } +} + +String getFieldTypeName(Int32 tp) +{ + switch (tp) + { + case TiDB::TypeTiny: + return "Tiny"; + case TiDB::TypeShort: + return "Short"; + case TiDB::TypeInt24: + return "Int24"; + case TiDB::TypeLong: + return "Long"; + case TiDB::TypeLongLong: + return "Longlong"; + case TiDB::TypeYear: + return "Year"; + case TiDB::TypeDouble: + return "Double"; + case TiDB::TypeTime: + return "Time"; + case TiDB::TypeDate: + return "Date"; + case TiDB::TypeDatetime: + return "Datetime"; + case TiDB::TypeNewDate: + return "NewDate"; + case TiDB::TypeTimestamp: + return "Timestamp"; + case TiDB::TypeFloat: + return "Float"; + case TiDB::TypeDecimal: + return "Decimal"; + case TiDB::TypeNewDecimal: + return "NewDecimal"; + case TiDB::TypeVarchar: + return "Varchar"; + case TiDB::TypeString: + return "String"; + default: + throw TiFlashException(fmt::format("Not supported field type: {}", tp), Errors::Coprocessor::Internal); + } +} + String exprToString(const tipb::Expr & expr, const std::vector & input_col) { FmtBuffer fmt_buf; diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index aa2b90c1bd2..ab59ec1d07d 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,10 @@ bool isWindowFunctionExpr(const tipb::Expr & expr); const String & getFunctionName(const tipb::Expr & expr); const String & getAggFunctionName(const tipb::Expr & expr); const String & getWindowFunctionName(const tipb::Expr & expr); +String getExchangeTypeName(const tipb::ExchangeType & tp); +String getJoinTypeName(const tipb::JoinType & tp); +String getFieldTypeName(Int32 tp); +String getJoinExecTypeName(const tipb::JoinExecType & tp); bool isColumnExpr(const tipb::Expr & expr); String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col); NameAndTypePair getColumnNameAndTypeForColumnExpr(const tipb::Expr & expr, const std::vector & input_col); diff --git a/dbms/src/Flash/Statistics/ExchangeSenderImpl.cpp b/dbms/src/Flash/Statistics/ExchangeSenderImpl.cpp index ba6fcc8e937..b31a7039071 100644 --- a/dbms/src/Flash/Statistics/ExchangeSenderImpl.cpp +++ b/dbms/src/Flash/Statistics/ExchangeSenderImpl.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -30,31 +31,13 @@ String MPPTunnelDetail::toJson() const bytes); } -namespace -{ -String exchangeTypeToString(const tipb::ExchangeType & exchange_type) -{ - switch (exchange_type) - { - case tipb::ExchangeType::PassThrough: - return "PassThrough"; - case tipb::ExchangeType::Broadcast: - return "Broadcast"; - case tipb::ExchangeType::Hash: - return "Hash"; - default: - throw TiFlashException("unknown ExchangeType", Errors::Coprocessor::Internal); - } -} -} // namespace - void ExchangeSenderStatistics::appendExtraJson(FmtBuffer & fmt_buffer) const { fmt_buffer.fmtAppend( R"("partition_num":{},"sender_target_task_ids":[{}],"exchange_type":"{}","connection_details":[)", partition_num, fmt::join(sender_target_task_ids, ","), - exchangeTypeToString(exchange_type)); + getExchangeTypeName(exchange_type)); fmt_buffer.joinStr( mpp_tunnel_details.cbegin(), mpp_tunnel_details.cend(), diff --git a/dbms/src/TestUtils/CMakeLists.txt b/dbms/src/TestUtils/CMakeLists.txt index 590ec3dc093..2adee4f9859 100644 --- a/dbms/src/TestUtils/CMakeLists.txt +++ b/dbms/src/TestUtils/CMakeLists.txt @@ -18,7 +18,7 @@ add_headers_and_sources(test_util .) list(REMOVE_ITEM test_util_sources "bench_dbms_main.cpp" "gtests_dbms_main.cpp") add_library(test_util_gtest_main ${test_util_headers} ${test_util_sources} gtests_dbms_main.cpp) -target_link_libraries(test_util_gtest_main dbms gtest_main) +target_link_libraries(test_util_gtest_main dbms gtest_main clickhouse_aggregate_functions) add_library(test_util_bench_main ${test_util_headers} ${test_util_sources} bench_dbms_main.cpp) target_link_libraries(test_util_bench_main dbms gtest_main benchmark) diff --git a/dbms/src/TestUtils/InterpreterTestUtils.cpp b/dbms/src/TestUtils/InterpreterTestUtils.cpp index dd1a981c504..737978a8bc4 100644 --- a/dbms/src/TestUtils/InterpreterTestUtils.cpp +++ b/dbms/src/TestUtils/InterpreterTestUtils.cpp @@ -12,72 +12,39 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include namespace DB::tests { -namespace +DAGContext & MockExecutorTest::getDAGContext() { -String toTreeString(const tipb::Executor & root_executor, size_t level = 0); + assert(dag_context_ptr != nullptr); + return *dag_context_ptr; +} + +void MockExecutorTest::initializeContext() +{ + dag_context_ptr = std::make_unique(1024); + context = MockDAGRequestContext(TiFlashTestEnv::getContext()); +} -// serialize tipb::DAGRequest, print the executor name in a Tree format. -String toTreeString(std::shared_ptr dag_request) +void MockExecutorTest::SetUpTestCase() { - assert((dag_request->executors_size() > 0) != dag_request->has_root_executor()); - if (dag_request->has_root_executor()) + try { - return toTreeString(dag_request->root_executor()); + DB::registerFunctions(); + DB::registerAggregateFunctions(); } - else + catch (DB::Exception &) { - FmtBuffer buffer; - String prefix; - traverseExecutors(dag_request.get(), [&buffer, &prefix](const tipb::Executor & executor) { - assert(executor.has_executor_id()); - buffer.fmtAppend("{}{}\n", prefix, executor.executor_id()); - prefix.append(" "); - return true; - }); - return buffer.toString(); + // Maybe another test has already registered, ignore exception here. } } -String toTreeString(const tipb::Executor & root_executor, size_t level) -{ - FmtBuffer buffer; - - auto append_str = [&buffer, &level](const tipb::Executor & executor) { - assert(executor.has_executor_id()); - - buffer.append(String(level, ' ')); - buffer.append(executor.executor_id()).append("\n"); - }; - - traverseExecutorTree(root_executor, [&](const tipb::Executor & executor) { - if (executor.has_join()) - { - append_str(executor); - ++level; - for (const auto & child : executor.join().children()) - buffer.append(toTreeString(child, level)); - return false; - } - else - { - append_str(executor); - ++level; - return true; - } - }); - - return buffer.toString(); -} -} // namespace - -void dagRequestEqual(String & expected_string, const std::shared_ptr & actual) +void MockExecutorTest::dagRequestEqual(String & expected_string, const std::shared_ptr & actual) { - String actual_string = toTreeString(actual); - ASSERT_EQ(Poco::trimInPlace(expected_string), Poco::trimInPlace(actual_string)); + ASSERT_EQ(Poco::trimInPlace(expected_string), Poco::trim(ExecutorSerializer().serialize(actual.get()))); } - } // namespace DB::tests diff --git a/dbms/src/TestUtils/InterpreterTestUtils.h b/dbms/src/TestUtils/InterpreterTestUtils.h index 37616911f87..074c65da6f0 100644 --- a/dbms/src/TestUtils/InterpreterTestUtils.h +++ b/dbms/src/TestUtils/InterpreterTestUtils.h @@ -15,16 +15,17 @@ #pragma once #include +#include #include #include #include #include #include #include +#include #include namespace DB::tests { -void dagRequestEqual(String & expected_string, const std::shared_ptr & actual); class MockExecutorTest : public ::testing::Test { protected: @@ -37,29 +38,13 @@ class MockExecutorTest : public ::testing::Test MockExecutorTest() : context(TiFlashTestEnv::getContext()) {} - static void SetUpTestCase() - { - try - { - DB::registerFunctions(); - } - catch (DB::Exception &) - { - // Maybe another test has already registered, ignore exception here. - } - } + static void SetUpTestCase(); - virtual void initializeContext() - { - dag_context_ptr = std::make_unique(1024); - context = MockDAGRequestContext(TiFlashTestEnv::getContext()); - } + virtual void initializeContext(); - DAGContext & getDAGContext() - { - assert(dag_context_ptr != nullptr); - return *dag_context_ptr; - } + DAGContext & getDAGContext(); + + static void dagRequestEqual(String & expected_string, const std::shared_ptr & actual); protected: MockDAGRequestContext context; diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp new file mode 100644 index 00000000000..b8d2b039bd2 --- /dev/null +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -0,0 +1,272 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +namespace DB::tests +{ +namespace +{ +template +struct IsExpr +{ + static constexpr bool value = false; +}; + +template <> +struct IsExpr<::tipb::Expr> +{ + static constexpr bool value = true; +}; + +template +String getColumnTypeName(const Column column) +{ + String name; + if constexpr (IsExpr::value == true) + name = getFieldTypeName(column.field_type().tp()); + else + name = getFieldTypeName(column.tp()); + return name; +} + +template +void toString(const Columns & columns, FmtBuffer & buf) +{ + assert(columns.size() > 0); + int bound = columns.size() - 1; + for (int i = 0; i < bound; ++i) + { + buf.fmtAppend("<{}, {}>, ", i, getColumnTypeName(columns.at(i))); + } + buf.fmtAppend("<{}, {}>", bound, getColumnTypeName(columns.at(bound))); +} +} // namespace + +String ExecutorSerializer::serialize(const tipb::DAGRequest * dag_request) +{ + assert((dag_request->executors_size() > 0) != dag_request->has_root_executor()); + if (dag_request->has_root_executor()) + { + serialize(dag_request->root_executor(), 0); + return buf.toString(); + } + else + { + FmtBuffer buffer; + String prefix; + traverseExecutors(dag_request, [this, &prefix](const tipb::Executor & executor) { + assert(executor.has_executor_id()); + buf.fmtAppend("{}{}\n", prefix, executor.executor_id()); + prefix.append(" "); + return true; + }); + return buffer.toString(); + } +} + +void serializeTableScan(const String & executor_id, const tipb::TableScan & ts, FmtBuffer & buf) +{ + if (ts.columns_size() == 0) + { + // no column selected, must be something wrong + throw TiFlashException("No column is selected in table scan executor", Errors::Coprocessor::BadRequest); + } + buf.fmtAppend("{} | {{", executor_id); + toString(ts.columns(), buf); + buf.append("}\n"); +} + +void serializeExpression(const tipb::Expr & expr, FmtBuffer & buf) +{ + if (isFunctionExpr(expr)) + { + buf.fmtAppend("{}(", getFunctionName(expr)); + buf.joinStr( + expr.children().begin(), + expr.children().end(), + [&](const auto & ex, FmtBuffer &) { + serializeExpression(ex, buf); + }, + ", "); + buf.append(")"); + } + else + { + buf.fmtAppend("<{}, {}>", decodeDAGInt64(expr.val()), getFieldTypeName(expr.field_type().tp())); + } +} + +void serializeSelection(const String & executor_id, const tipb::Selection & sel, FmtBuffer & buf) +{ + buf.fmtAppend("{} | ", executor_id); + // currently only support "and" function in selection executor. + buf.joinStr( + sel.conditions().begin(), + sel.conditions().end(), + [&](const auto & expr, FmtBuffer &) { + serializeExpression(expr, buf); + }, + " and "); + buf.append("}\n"); +} + +void serializeLimit(const String & executor_id, const tipb::Limit & limit, FmtBuffer & buf) +{ + buf.fmtAppend("{} | {}\n", executor_id, limit.limit()); +} + +void serializeProjection(const String & executor_id, const tipb::Projection & proj, FmtBuffer & buf) +{ + buf.fmtAppend("{} | {{", executor_id); + buf.joinStr( + proj.exprs().begin(), + proj.exprs().end(), + [&](const auto & expr, FmtBuffer &) { + serializeExpression(expr, buf); + }, + ", "); + buf.append("}\n"); +} + +void serializeAggregation(const String & executor_id, const tipb::Aggregation & agg, FmtBuffer & buf) +{ + buf.fmtAppend("{} | group_by: {{", executor_id); + buf.joinStr( + agg.group_by().begin(), + agg.group_by().end(), + [&](const auto & group_by, FmtBuffer &) { + serializeExpression(group_by, buf); + }, + ", "); + buf.append("}, agg_func: {"); + buf.joinStr( + agg.agg_func().begin(), + agg.agg_func().end(), + [&](const auto & func, FmtBuffer &) { + serializeExpression(func, buf); + }, + ", "); + buf.append("}\n"); +} + +void serializeTopN(const String & executor_id, const tipb::TopN & top_n, FmtBuffer & buf) +{ + buf.fmtAppend("{} | order_by: {{", executor_id); + buf.joinStr( + top_n.order_by().begin(), + top_n.order_by().end(), + [&](const auto & order_by, FmtBuffer & fb) { + fb.append("("); + serializeExpression(order_by.expr(), buf); + fb.fmtAppend(", desc: {})", order_by.desc()); + }, + ", "); + buf.fmtAppend("}}, limit: {}\n", top_n.limit()); +} + +void serializeJoin(const String & executor_id, const tipb::Join & join, FmtBuffer & buf) +{ + buf.fmtAppend("{} | {}, {}. left_join_keys: {{", executor_id, getJoinTypeName(join.join_type()), getJoinExecTypeName(join.join_exec_type())); + toString(join.left_join_keys(), buf); + buf.append("}, right_join_keys: {"); + toString(join.right_join_keys(), buf); + buf.append("}\n"); +} + +void serializeExchangeSender(const String & executor_id, const tipb::ExchangeSender & sender, FmtBuffer & buf) +{ + buf.fmtAppend("{} | type:{}, {{", executor_id, getExchangeTypeName(sender.tp())); + toString(sender.all_field_types(), buf); + buf.append("}\n"); +} + +void serializeExchangeReceiver(const String & executor_id, const tipb::ExchangeReceiver & receiver, FmtBuffer & buf) +{ + buf.fmtAppend("{} | type:{}, {{", executor_id, getExchangeTypeName(receiver.tp())); + toString(receiver.field_types(), buf); + buf.append("}\n"); +} + +void ExecutorSerializer::serialize(const tipb::Executor & root_executor, size_t level) +{ + auto append_str = [&level, this](const tipb::Executor & executor) { + assert(executor.has_executor_id()); + addPrefix(level); + switch (executor.tp()) + { + case tipb::ExecType::TypeTableScan: + serializeTableScan(executor.executor_id(), executor.tbl_scan(), buf); + break; + case tipb::ExecType::TypePartitionTableScan: + throw TiFlashException("Partition table scan executor is not supported", Errors::Coprocessor::Unimplemented); // todo support partition table scan executor. + case tipb::ExecType::TypeJoin: + serializeJoin(executor.executor_id(), executor.join(), buf); + break; + case tipb::ExecType::TypeIndexScan: + // index scan not supported + throw TiFlashException("IndexScan executor is not supported", Errors::Coprocessor::Unimplemented); + case tipb::ExecType::TypeSelection: + serializeSelection(executor.executor_id(), executor.selection(), buf); + break; + case tipb::ExecType::TypeAggregation: + // stream agg is not supported, treated as normal agg + case tipb::ExecType::TypeStreamAgg: + serializeAggregation(executor.executor_id(), executor.aggregation(), buf); + break; + case tipb::ExecType::TypeTopN: + serializeTopN(executor.executor_id(), executor.topn(), buf); + break; + case tipb::ExecType::TypeLimit: + serializeLimit(executor.executor_id(), executor.limit(), buf); + break; + case tipb::ExecType::TypeProjection: + serializeProjection(executor.executor_id(), executor.projection(), buf); + break; + case tipb::ExecType::TypeKill: + throw TiFlashException("Kill executor is not supported", Errors::Coprocessor::Unimplemented); + case tipb::ExecType::TypeExchangeReceiver: + serializeExchangeReceiver(executor.executor_id(), executor.exchange_receiver(), buf); + break; + case tipb::ExecType::TypeExchangeSender: + serializeExchangeSender(executor.executor_id(), executor.exchange_sender(), buf); + break; + case tipb::ExecType::TypeSort: + throw TiFlashException("Sort executor is not supported", Errors::Coprocessor::Unimplemented); // todo support sort executor. + case tipb::ExecType::TypeWindow: + throw TiFlashException("Window executor is not supported", Errors::Coprocessor::Unimplemented); // todo support window executor. + default: + throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); + } + ++level; + }; + + traverseExecutorTree(root_executor, [&](const tipb::Executor & executor) { + append_str(executor); + if (executor.has_join()) + { + for (const auto & child : executor.join().children()) + serialize(child, level); + return false; + } + return true; + }); +} + +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/executorSerializer.h b/dbms/src/TestUtils/executorSerializer.h new file mode 100644 index 00000000000..048c0564250 --- /dev/null +++ b/dbms/src/TestUtils/executorSerializer.h @@ -0,0 +1,39 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +namespace DB +{ +namespace tests +{ +class ExecutorSerializer +{ +public: + String serialize(const tipb::DAGRequest * dag_request); + +private: + void serialize(const tipb::Executor & root_executor, size_t level); + void addPrefix(size_t level) { buf.append(String(level, ' ')); } + +private: + FmtBuffer buf; +}; +} // namespace tests + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index c26c0bd4d87..c862c7deec8 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -22,6 +22,7 @@ #include #include #include + namespace DB::tests { ASTPtr buildColumn(const String & column_name) @@ -40,7 +41,7 @@ ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) size_t i = 0; for (auto item : order_by_items) { - int direction = item.second ? 1 : -1; // todo + int direction = item.second ? -1 : 1; ASTPtr locale_node; auto order_by_item = std::make_shared(direction, direction, false, locale_node); order_by_item->children.push_back(std::make_shared(item.first)); @@ -184,7 +185,10 @@ DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, AST DAGRequestBuilder & DAGRequestBuilder::project(const String & col_name) { assert(root); - root = compileProject(root, getExecutorIndex(), buildColumn(col_name)); + auto exp_list = std::make_shared(); + exp_list->children.push_back(buildColumn(col_name)); + + root = compileProject(root, getExecutorIndex(), exp_list); return *this; } @@ -208,7 +212,6 @@ DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNames col_names) { exp_list->children.push_back(col(name)); } - root = compileProject(root, getExecutorIndex(), exp_list); return *this; } @@ -220,17 +223,22 @@ DAGRequestBuilder & DAGRequestBuilder::exchangeSender(tipb::ExchangeType exchang return *this; } -DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, ASTPtr using_expr_list) +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAsts exprs) { - return join(right, using_expr_list, ASTTableJoin::Kind::Inner); + return join(right, exprs, ASTTableJoin::Kind::Inner); } -DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, ASTPtr using_expr_list, ASTTableJoin::Kind kind) +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAsts exprs, ASTTableJoin::Kind kind) { assert(root); assert(right.root); auto join_ast = std::make_shared(); - join_ast->using_expression_list = using_expr_list; + auto exp_list = std::make_shared(); + for (const auto & expr : exprs) + { + exp_list->children.push_back(expr); + } + join_ast->using_expression_list = exp_list; join_ast->strictness = ASTTableJoin::Strictness::All; join_ast->kind = kind; root = compileJoin(getExecutorIndex(), root, right.root, join_ast); @@ -254,7 +262,6 @@ DAGRequestBuilder & DAGRequestBuilder::aggregation(MockAsts agg_funcs, MockAsts agg_func_list->children.push_back(func); for (const auto & group_by : group_by_exprs) group_by_expr_list->children.push_back(group_by); - return buildAggregation(agg_func_list, group_by_expr_list); } diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 9a3507cc603..2f6d3542ebb 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -88,8 +88,8 @@ class DAGRequestBuilder // Currentlt only support inner join, left join and right join. // TODO support more types of join. - DAGRequestBuilder & join(const DAGRequestBuilder & right, ASTPtr using_expr_list); - DAGRequestBuilder & join(const DAGRequestBuilder & right, ASTPtr using_expr_list, ASTTableJoin::Kind kind); + DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAsts exprs); + DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAsts exprs, ASTTableJoin::Kind kind); // aggregation DAGRequestBuilder & aggregation(ASTPtr agg_func, ASTPtr group_by_expr); diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 1f48ebd0230..5c7d77c399a 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -31,8 +31,8 @@ class MockDAGRequestTest : public DB::tests::MockExecutorTest context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeLong}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); - context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"r_c", TiDB::TP::TypeString}}); - context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"l_c", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); context.addExchangeRelationSchema("sender_1", {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); } }; @@ -41,12 +41,15 @@ TEST_F(MockDAGRequestTest, MockTable) try { auto request = context.scan("test_db", "test_table").build(context); - String expected_string_1 = "table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string_1, request); - + { + String expected = "table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table_1").build(context); - String expected_string_2 = "table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); + { + String expected = "table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -54,14 +57,19 @@ TEST_F(MockDAGRequestTest, Filter) try { auto request = context.scan("test_db", "test_table").filter(eq(col("s1"), col("s2"))).build(context); - String expected_string = "selection_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); - + { + String expected = "selection_1 | equals(<0, String>, <1, String>)}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table_1") - .filter(And(eq(col("s1"), col("s2")), lt(col("s2"), col("s3")))) + .filter(And(eq(col("s1"), col("s2")), lt(col("s2"), lt(col("s1"), col("s2"))))) .build(context); - ASSERT_DAGREQUEST_EQAUL(expected_string, request); + { + String expected = "selection_1 | equals(<0, Long>, <1, String>) and less(<1, String>, less(<0, Long>, <1, String>))}\n" + " table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -71,21 +79,27 @@ try auto request = context.scan("test_db", "test_table") .project("s1") .build(context); - String expected_string = "project_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); - + { + String expected = "project_1 | {<0, String>}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table_1") .project({col("s3"), eq(col("s1"), col("s2"))}) .build(context); - String expected_string_2 = "project_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); - + { + String expected = "project_1 | {<2, String>, equals(<0, Long>, <1, String>)}\n" + " table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table_1") .project({"s1", "s2"}) .build(context); - ASSERT_DAGREQUEST_EQAUL(expected_string, request); + { + String expected = "project_1 | {<0, Long>, <1, String>}\n" + " table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -95,14 +109,19 @@ try auto request = context.scan("test_db", "test_table") .limit(10) .build(context); - String expected_string = "limit_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); - + { + String expected = "limit_1 | 10\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table_1") .limit(lit(Field(static_cast(10)))) .build(context); - ASSERT_DAGREQUEST_EQAUL(expected_string, request); + { + String expected = "limit_1 | 10\n" + " table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -110,16 +129,21 @@ TEST_F(MockDAGRequestTest, TopN) try { auto request = context.scan("test_db", "test_table") - .topN({{"s1", false}}, 10) + .topN({{"s1", false}, {"s2", true}}, 10) .build(context); - String expected_string = "topn_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); - + { + String expected = "topn_1 | order_by: {(<0, String>, desc: false), (<1, String>, desc: true)}, limit: 10\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table") .topN("s1", false, 10) .build(context); - ASSERT_DAGREQUEST_EQAUL(expected_string, request); + { + String expected = "topn_1 | order_by: {(<0, String>, desc: false)}, limit: 10\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -129,9 +153,20 @@ try auto request = context.scan("test_db", "test_table") .aggregation(Max(col("s1")), col("s2")) .build(context); - String expected_string = "aggregation_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); + { + String expected = "aggregation_1 | group_by: {<1, String>}, agg_func: {max(<0, String>)}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } + + request = context.scan("test_db", "test_table") + .aggregation({Max(col("s1"))}, {col("s2"), lt(col("s1"), col("s2"))}) + .build(context); + { + String expected = "aggregation_1 | group_by: {<1, String>, less(<0, String>, <1, String>)}, agg_func: {max(<0, String>)}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -139,26 +174,28 @@ TEST_F(MockDAGRequestTest, Join) try { DAGRequestBuilder right_builder = context.scan("test_db", "r_table") - .filter(eq(col("r_a"), col("r_b"))) - .project({col("r_a"), col("r_b")}) - .aggregation(Max(col("r_a")), col("r_b")); - + .filter(And(eq(col("r_a"), col("r_b")), eq(col("r_a"), col("r_b")))) + .project({col("r_a"), col("r_b"), col("join_c")}) + .aggregation({Max(col("r_a"))}, {col("join_c"), col("r_b")}) + .topN({{"r_b", false}}, 10); DAGRequestBuilder left_builder = context.scan("test_db", "l_table") .topN({{"l_a", false}}, 10) - .join(right_builder, col("l_a"), ASTTableJoin::Kind::Left) + .join(right_builder, {col("join_c")}, ASTTableJoin::Kind::Left) // todo ensure the join is legal. .limit(10); - auto request = left_builder.build(context); - String expected_string = "limit_7\n" - " Join_6\n" - " topn_5\n" - " table_scan_4\n" - " aggregation_3\n" - " project_2\n" - " selection_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); + { + String expected = "limit_8 | 10\n" + " Join_7 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" + " topn_6 | order_by: {(<0, Long>, desc: false)}, limit: 10\n" + " table_scan_5 | {<0, Long>, <1, String>, <2, String>}\n" + " topn_4 | order_by: {(<2, String>, desc: false)}, limit: 10\n" + " aggregation_3 | group_by: {<2, String>, <1, String>}, agg_func: {max(<0, Long>)}\n" + " project_2 | {<0, Long>, <1, String>, <2, String>}\n" + " selection_1 | equals(<0, Long>, <1, String>) and equals(<0, Long>, <1, String>)}\n" + " table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -168,27 +205,31 @@ try auto request = context.scan("test_db", "test_table") .exchangeSender(tipb::PassThrough) .build(context); - String expected_string = "exchange_sender_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); - + { + String expected = "exchange_sender_1 | type:PassThrough, {<0, String>, <1, String>}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table") .topN("s1", false, 10) .exchangeSender(tipb::Broadcast) .build(context); - String expected_string_2 = "exchange_sender_2\n" - " topn_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); - + { + String expected = "exchange_sender_2 | type:Broadcast, {<0, String>, <1, String>}\n" + " topn_1 | order_by: {(<0, String>, desc: false)}, limit: 10\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.scan("test_db", "test_table") .project({col("s1"), col("s2")}) .exchangeSender(tipb::Hash) .build(context); - String expected_string_3 = "exchange_sender_2\n" - " project_1\n" - " table_scan_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string_3, request); + { + String expected = "exchange_sender_2 | type:Hash, {<0, String>, <1, String>}\n" + " project_1 | {<0, String>, <1, String>}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH @@ -197,15 +238,18 @@ try { auto request = context.receive("sender_1") .build(context); - String expected_string = "exchange_receiver_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string, request); - + { + String expected = "exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } request = context.receive("sender_1") .topN("s1", false, 10) .build(context); - String expected_string_2 = "topn_1\n" - " exchange_receiver_0\n"; - ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); + { + String expected = "topn_1 | order_by: {(<0, String>, desc: false)}, limit: 10\n" + " exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } } CATCH From 0e1a37387e15f80861d5329dd6f3423ad86257bd Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 26 Apr 2022 18:12:51 +0800 Subject: [PATCH 44/79] fix: frame total size include header for indices (#4754) close pingcap/tiflash#4752 --- dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index ea0c3265757..554ecc34b01 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -254,7 +254,7 @@ class DMFilePackFilter dmfile->configuration->getChecksumFrameLength()); index_buf->seek(dmfile->colIndexOffset(file_name_base)); auto header_size = dmfile->configuration->getChecksumHeaderLength(); - auto frame_total_size = dmfile->configuration->getChecksumFrameLength(); + auto frame_total_size = dmfile->configuration->getChecksumFrameLength() + header_size; auto frame_count = index_file_size / frame_total_size + (index_file_size % frame_total_size != 0); return MinMaxIndex::read(*type, *index_buf, index_file_size - header_size * frame_count); } From c6740e5c8d184d854447cd7d8e04939efe99de75 Mon Sep 17 00:00:00 2001 From: yibin Date: Wed, 27 Apr 2022 10:04:09 +0800 Subject: [PATCH 45/79] Add more debug info for gtest failure (#4769) Signed-off-by: yibin --- dbms/src/Flash/Mpp/MPPTunnel.cpp | 11 ++++++++++- dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index 6db39e61586..b38e7d6a2ea 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -84,18 +84,25 @@ MPPTunnelBase::~MPPTunnelBase() { std::unique_lock lock(mu); if (finished) + { + LOG_FMT_TRACE(log, "already finished!"); return; + } + /// make sure to finish the tunnel after it is connected waitUntilConnectedOrFinished(lock); finishSendQueue(); } + LOG_FMT_TRACE(log, "waiting consumer finish!"); waitForConsumerFinish(/*allow_throw=*/false); } catch (...) { tryLogCurrentException(log, "Error in destructor function of MPPTunnel"); } + LOG_FMT_TRACE(log, "waiting child thread finished!"); thread_manager->wait(); + LOG_FMT_TRACE(log, "destructed tunnel obj!"); } template @@ -296,9 +303,11 @@ void MPPTunnelBase::waitForConsumerFinish(bool allow_throw) assert(connected); } #endif + LOG_FMT_TRACE(log, "start wait for consumer finish!"); String err_msg = consumer_state.getError(); // may blocking if (allow_throw && !err_msg.empty()) throw Exception("Consumer exits unexpected, " + err_msg); + LOG_FMT_TRACE(log, "end wait for consumer finish!"); } template @@ -330,8 +339,8 @@ template void MPPTunnelBase::consumerFinish(const String & err_msg, bool need_lock) { // must finish send_queue outside of the critical area to avoid deadlock with write. + LOG_FMT_TRACE(log, "calling consumer Finish"); send_queue.finish(); - auto rest_work = [this, &err_msg] { // it's safe to call it multiple times if (finished && consumer_state.errHasSet()) diff --git a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp index 133142cc867..a2860c62947 100644 --- a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp +++ b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp @@ -56,6 +56,10 @@ class MPPTunnelTest : public MPPTunnelBase { return thread_manager; } + LoggerPtr getLog() + { + return log; + } }; using MPPTunnelTestPtr = std::shared_ptr; @@ -94,7 +98,9 @@ struct MockLocalReader if (tunnel) { // In case that ExchangeReceiver throw error before finish reading from mpp_tunnel + LOG_FMT_TRACE(tunnel->getLog(), "before mocklocalreader invoking consumerFinish!"); tunnel->consumerFinish("Receiver closed"); + LOG_FMT_TRACE(tunnel->getLog(), "after mocklocalreader invoking consumerFinish!"); } } @@ -483,6 +489,7 @@ try GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 1); GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); + LOG_FMT_TRACE(mpp_tunnel_ptr->getLog(), "basic logic done!"); } CATCH From d6e4f0679fb46906db7319c51894aa9b17765973 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Thu, 28 Apr 2022 11:34:51 +0800 Subject: [PATCH 46/79] Fix ingest dmfile won't rollback (#4785) ref pingcap/tiflash#3594 --- dbms/src/Storages/DeltaMerge/WriteBatches.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/WriteBatches.h b/dbms/src/Storages/DeltaMerge/WriteBatches.h index 222bc021ce4..c64ff2cde38 100644 --- a/dbms/src/Storages/DeltaMerge/WriteBatches.h +++ b/dbms/src/Storages/DeltaMerge/WriteBatches.h @@ -42,12 +42,13 @@ struct WriteBatches : private boost::noncopyable WriteLimiterPtr write_limiter; WriteBatches(StoragePool & storage_pool_, const WriteLimiterPtr & write_limiter_ = nullptr) - : log(storage_pool_.getNamespaceId()) - , data(storage_pool_.getNamespaceId()) - , meta(storage_pool_.getNamespaceId()) - , removed_log(storage_pool_.getNamespaceId()) - , removed_data(storage_pool_.getNamespaceId()) - , removed_meta(storage_pool_.getNamespaceId()) + : ns_id(storage_pool_.getNamespaceId()) + , log(ns_id) + , data(ns_id) + , meta(ns_id) + , removed_log(ns_id) + , removed_data(ns_id) + , removed_meta(ns_id) , storage_pool(storage_pool_) , write_limiter(write_limiter_) { From e845908faadb974497836f9e5dc5f1c92e90c86a Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 28 Apr 2022 13:26:51 +0800 Subject: [PATCH 47/79] fix: ut and more idx error (#4777) close pingcap/tiflash#4778 --- dbms/src/Server/DTTool/DTToolMigrate.cpp | 2 ++ dbms/src/Server/tests/gtest_dttool.cpp | 23 +++++++++++++------- dbms/src/Storages/DeltaMerge/File/DMFile.cpp | 5 +++-- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/dbms/src/Server/DTTool/DTToolMigrate.cpp b/dbms/src/Server/DTTool/DTToolMigrate.cpp index 1be0a68f715..d54fc1ed9ed 100644 --- a/dbms/src/Server/DTTool/DTToolMigrate.cpp +++ b/dbms/src/Server/DTTool/DTToolMigrate.cpp @@ -190,6 +190,8 @@ int migrateServiceMain(DB::Context & context, const MigrateArgs & args) LOG_FMT_INFO(logger, "source version: {}", (src_file->getConfiguration() ? 2 : 1)); LOG_FMT_INFO(logger, "source bytes: {}", src_file->getBytesOnDisk()); LOG_FMT_INFO(logger, "migration temporary directory: {}", keeper.migration_temp_dir.path().c_str()); + LOG_FMT_INFO(logger, "target version: {}", args.version); + LOG_FMT_INFO(logger, "target frame size: {}", args.frame); DB::DM::DMConfigurationOpt option{}; // if new format is the target, we construct a config file. diff --git a/dbms/src/Server/tests/gtest_dttool.cpp b/dbms/src/Server/tests/gtest_dttool.cpp index 4ea163f0507..6b0d6e3d5c2 100644 --- a/dbms/src/Server/tests/gtest_dttool.cpp +++ b/dbms/src/Server/tests/gtest_dttool.cpp @@ -233,20 +233,23 @@ TEST_F(DTToolTest, BlockwiseInvariant) stream->readSuffix(); } - std::vector> test_cases{ - {2, DB::ChecksumAlgo::XXH3, DB::CompressionMethod::LZ4, -1}, - {1, DB::ChecksumAlgo::XXH3, DB::CompressionMethod::ZSTD, 1}, - {2, DB::ChecksumAlgo::City128, DB::CompressionMethod::LZ4HC, 0}, - {2, DB::ChecksumAlgo::CRC64, DB::CompressionMethod::ZSTD, 22}, - {1, DB::ChecksumAlgo::XXH3, DB::CompressionMethod::NONE, -1}}; - for (auto [version, algo, comp, level] : test_cases) + std::vector> test_cases{ + {2, DBMS_DEFAULT_BUFFER_SIZE, DB::ChecksumAlgo::XXH3, DB::CompressionMethod::LZ4, -1}, + {1, 64, DB::ChecksumAlgo::XXH3, DB::CompressionMethod::ZSTD, 1}, + {2, DBMS_DEFAULT_BUFFER_SIZE * 2, DB::ChecksumAlgo::City128, DB::CompressionMethod::LZ4HC, 0}, + {2, DBMS_DEFAULT_BUFFER_SIZE * 4, DB::ChecksumAlgo::City128, DB::CompressionMethod::LZ4HC, 0}, + {2, 4, DB::ChecksumAlgo::CRC64, DB::CompressionMethod::ZSTD, 22}, + {2, 13, DB::ChecksumAlgo::CRC64, DB::CompressionMethod::ZSTD, 22}, + {2, 5261, DB::ChecksumAlgo::CRC64, DB::CompressionMethod::ZSTD, 22}, + {1, DBMS_DEFAULT_BUFFER_SIZE, DB::ChecksumAlgo::XXH3, DB::CompressionMethod::NONE, -1}}; + for (auto [version, frame_size, algo, comp, level] : test_cases) { auto a = DTTool::Migrate::MigrateArgs{ .no_keep = false, .dry_mode = false, .file_id = 1, .version = version, - .frame = DBMS_DEFAULT_BUFFER_SIZE, + .frame = frame_size, .algorithm = algo, .workdir = getTemporaryPath(), .compression_method = comp, @@ -260,6 +263,10 @@ TEST_F(DTToolTest, BlockwiseInvariant) 0, getTemporaryPath(), DB::DM::DMFile::ReadMetaMode::all()); + if (version == 2) + { + EXPECT_EQ(refreshed_file->getConfiguration()->getChecksumFrameLength(), frame_size); + } auto stream = DB::DM::createSimpleBlockInputStream(*db_context, refreshed_file); auto size_iter = size_info.begin(); auto prop_iter = dmfile->getPackProperties().property().begin(); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp index 928bf8a6415..d40a6bf5bab 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp @@ -579,8 +579,9 @@ void DMFile::readMetadata(const FileProviderPtr & file_provider, const ReadMetaM auto recheck = [&](size_t size) { if (this->configuration) { - auto frame_count = size / this->configuration->getChecksumFrameLength() - + (0 != size % this->configuration->getChecksumFrameLength()); + auto total_size = this->configuration->getChecksumFrameLength() + this->configuration->getChecksumHeaderLength(); + auto frame_count = size / total_size + + (0 != size % total_size); size -= frame_count * this->configuration->getChecksumHeaderLength(); } return size; From 1a9b1e405b34596c86695ccf12d7b956229825e9 Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Thu, 28 Apr 2022 16:22:52 +0800 Subject: [PATCH 48/79] refactor: Remove some non-const member methods in Context. (#4786) close pingcap/tiflash#4740 --- dbms/src/Interpreters/Context.cpp | 25 ------------------------- dbms/src/Interpreters/Context.h | 4 ---- 2 files changed, 29 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index ac959158490..ce4ecc692c0 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -326,13 +326,6 @@ Databases Context::getDatabases() const return shared->databases; } -Databases Context::getDatabases() -{ - auto lock = getLock(); - return shared->databases; -} - - Context::SessionKey Context::getSessionKey(const String & session_id) const { const auto & user_name = client_info.current_user; @@ -458,14 +451,6 @@ DatabasePtr Context::getDatabase(const String & database_name) const return shared->databases[db]; } -DatabasePtr Context::getDatabase(const String & database_name) -{ - auto lock = getLock(); - String db = resolveDatabase(database_name, current_database); - assertDatabaseExists(db); - return shared->databases[db]; -} - DatabasePtr Context::tryGetDatabase(const String & database_name) const { auto lock = getLock(); @@ -476,16 +461,6 @@ DatabasePtr Context::tryGetDatabase(const String & database_name) const return it->second; } -DatabasePtr Context::tryGetDatabase(const String & database_name) -{ - auto lock = getLock(); - String db = resolveDatabase(database_name, current_database); - auto it = shared->databases.find(db); - if (it == shared->databases.end()) - return {}; - return it->second; -} - String Context::getPath() const { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index ebf7d8c82e2..9e8a1b248ba 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -294,12 +294,8 @@ class Context ASTPtr getCreateDatabaseQuery(const String & database_name) const; DatabasePtr getDatabase(const String & database_name) const; - DatabasePtr getDatabase(const String & database_name); DatabasePtr tryGetDatabase(const String & database_name) const; - DatabasePtr tryGetDatabase(const String & database_name); - Databases getDatabases() const; - Databases getDatabases(); std::shared_ptr acquireSession( const String & session_id, From 4371b39d5d4fc2ebf467ea0348f3abee1f5b7a07 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 28 Apr 2022 17:16:52 +0800 Subject: [PATCH 49/79] doc: add sanitizer test related docs (#4750) close pingcap/tiflash#4751 --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 05def1eb76c..a4ccba7d848 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,26 @@ ninja gtests_libdaemon ``` And the unit-test executables are at `$BUILD/dbms/gtests_dbms`, `$BUILD/libs/libcommon/src/tests/gtests_libcommon` and `$BUILD/libs/libdaemon/src/tests/gtests_libdaemon`. +## Run Sanitizer Tests + +TiFlash supports testing with thread sanitizer and address sanitizer. + +To generate unit test executables with sanitizer enabled: + +```shell +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DENABLE_TESTS=ON -DCMAKE_BUILD_TYPE=ASan # or TSan +ninja gtests_dbms +ninja gtests_libcommon +ninja gtests_libdaemon +``` + +There are known false positives reported from leak sanitizer (which is included in address sanitizer). To suppress these errors, set the following environment variables before running the executables: + +``` +LSAN_OPTIONS=suppressions=$WORKSPACE/tiflash/test/sanitize/asan.suppression +``` + ## Run Integration Tests TBD. From f29156fd68e228f3189841141622f0d98f236965 Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Thu, 28 Apr 2022 21:14:52 +0800 Subject: [PATCH 50/79] fix: Suppress the warning from boost to fix build failure. (#4796) close pingcap/tiflash#4771 --- dbms/src/Functions/GeoUtils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index 178e26c59a2..6bf1f52fbf0 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -30,6 +30,7 @@ #endif #pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-variable" #pragma GCC diagnostic ignored "-Wunknown-warning-option" From 4019600ea97a11d150db8f2911a7928ed19faa5f Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 28 Apr 2022 21:54:52 +0800 Subject: [PATCH 51/79] fix some unsafe constructor and destructor (#4782) close pingcap/tiflash#4781 --- dbms/src/Flash/Mpp/ExchangeReceiver.cpp | 31 ++++++++++-- dbms/src/Flash/Mpp/MPPTunnel.cpp | 4 +- dbms/src/Server/Server.cpp | 57 +++++++++++++--------- libs/libcommon/include/common/ThreadPool.h | 4 +- libs/libcommon/src/ThreadPool.cpp | 50 +++++++++++++------ 5 files changed, 101 insertions(+), 45 deletions(-) diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index 75796af6780..9639771c586 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -304,15 +304,38 @@ ExchangeReceiverBase::ExchangeReceiverBase( , exc_log(Logger::get("ExchangeReceiver", req_id, executor_id)) , collected(false) { - rpc_context->fillSchema(schema); - setUpConnection(); + try + { + rpc_context->fillSchema(schema); + setUpConnection(); + } + catch (...) + { + try + { + cancel(); + thread_manager->wait(); + } + catch (...) + { + tryLogCurrentException(exc_log, __PRETTY_FUNCTION__); + } + throw; + } } template ExchangeReceiverBase::~ExchangeReceiverBase() { - close(); - thread_manager->wait(); + try + { + close(); + thread_manager->wait(); + } + catch (...) + { + tryLogCurrentException(exc_log, __PRETTY_FUNCTION__); + } } template diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index b38e7d6a2ea..826e7fea88a 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -95,13 +95,13 @@ MPPTunnelBase::~MPPTunnelBase() } LOG_FMT_TRACE(log, "waiting consumer finish!"); waitForConsumerFinish(/*allow_throw=*/false); + LOG_FMT_TRACE(log, "waiting child thread finished!"); + thread_manager->wait(); } catch (...) { tryLogCurrentException(log, "Error in destructor function of MPPTunnel"); } - LOG_FMT_TRACE(log, "waiting child thread finished!"); - thread_manager->wait(); LOG_FMT_TRACE(log, "destructed tunnel obj!"); } diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index a6681be92b1..4762935cd6f 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -644,30 +644,39 @@ class Server::FlashGrpcServerHolder ~FlashGrpcServerHolder() { - /// Shut down grpc server. - LOG_FMT_INFO(log, "Begin to shut down flash grpc server"); - flash_grpc_server->Shutdown(); - *is_shutdown = true; - // Wait all existed MPPTunnels done to prevent crash. - // If all existed MPPTunnels are done, almost in all cases it means all existed MPPTasks and ExchangeReceivers are also done. - const int max_wait_cnt = 300; - int wait_cnt = 0; - while (GET_METRIC(tiflash_object_count, type_count_of_mpptunnel).Value() >= 1 && (wait_cnt++ < max_wait_cnt)) - std::this_thread::sleep_for(std::chrono::seconds(1)); - - for (auto & cq : cqs) - cq->Shutdown(); - for (auto & cq : notify_cqs) - cq->Shutdown(); - thread_manager->wait(); - flash_grpc_server->Wait(); - flash_grpc_server.reset(); - LOG_FMT_INFO(log, "Shut down flash grpc server"); - - /// Close flash service. - LOG_FMT_INFO(log, "Begin to shut down flash service"); - flash_service.reset(); - LOG_FMT_INFO(log, "Shut down flash service"); + try + { + /// Shut down grpc server. + LOG_FMT_INFO(log, "Begin to shut down flash grpc server"); + flash_grpc_server->Shutdown(); + *is_shutdown = true; + // Wait all existed MPPTunnels done to prevent crash. + // If all existed MPPTunnels are done, almost in all cases it means all existed MPPTasks and ExchangeReceivers are also done. + const int max_wait_cnt = 300; + int wait_cnt = 0; + while (GET_METRIC(tiflash_object_count, type_count_of_mpptunnel).Value() >= 1 && (wait_cnt++ < max_wait_cnt)) + std::this_thread::sleep_for(std::chrono::seconds(1)); + + for (auto & cq : cqs) + cq->Shutdown(); + for (auto & cq : notify_cqs) + cq->Shutdown(); + thread_manager->wait(); + flash_grpc_server->Wait(); + flash_grpc_server.reset(); + LOG_FMT_INFO(log, "Shut down flash grpc server"); + + /// Close flash service. + LOG_FMT_INFO(log, "Begin to shut down flash service"); + flash_service.reset(); + LOG_FMT_INFO(log, "Shut down flash service"); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_FMT_FATAL(log, "Exception happens in destructor of FlashGrpcServerHolder with message: {}", message); + std::terminate(); + } } private: diff --git a/libs/libcommon/include/common/ThreadPool.h b/libs/libcommon/include/common/ThreadPool.h index 08d7390e66e..cf3dfea9085 100644 --- a/libs/libcommon/include/common/ThreadPool.h +++ b/libs/libcommon/include/common/ThreadPool.h @@ -36,7 +36,8 @@ class ThreadPool /// Size is constant, all threads are created immediately. /// Every threads will execute pre_worker firstly when they are created. explicit ThreadPool( - size_t m_size, Job pre_worker = [] {}); + size_t m_size, + Job pre_worker = [] {}); /// Add new job. Locks until free thread in pool become available or exception in one of threads was thrown. /// If an exception in some thread was thrown, method silently returns, and exception will be rethrown only on call to 'wait' function. @@ -72,4 +73,5 @@ class ThreadPool void worker(); + void finalize(); }; diff --git a/libs/libcommon/src/ThreadPool.cpp b/libs/libcommon/src/ThreadPool.cpp index 682f873366a..ecc3a327798 100644 --- a/libs/libcommon/src/ThreadPool.cpp +++ b/libs/libcommon/src/ThreadPool.cpp @@ -12,20 +12,50 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include #include +static Poco::Logger * getLogger() +{ + static Poco::Logger * logger = &Poco::Logger::get("ThreadPool"); + return logger; +} + ThreadPool::ThreadPool(size_t m_size, Job pre_worker) : m_size(m_size) { threads.reserve(m_size); - for (size_t i = 0; i < m_size; ++i) - threads.emplace_back([this, pre_worker] { - pre_worker(); - worker(); - }); + try + { + for (size_t i = 0; i < m_size; ++i) + threads.emplace_back([this, pre_worker] { + pre_worker(); + worker(); + }); + } + catch (...) + { + LOG_FMT_ERROR(getLogger(), "ThreadPool failed to allocate threads."); + finalize(); + throw; + } +} + +void ThreadPool::finalize() +{ + { + std::unique_lock lock(mutex); + shutdown = true; + } + + has_new_job_or_shutdown.notify_all(); + + for (auto & thread : threads) + thread.join(); } void ThreadPool::schedule(Job job) @@ -59,15 +89,7 @@ void ThreadPool::wait() ThreadPool::~ThreadPool() { - { - std::unique_lock lock(mutex); - shutdown = true; - } - - has_new_job_or_shutdown.notify_all(); - - for (auto & thread : threads) - thread.join(); + finalize(); } size_t ThreadPool::active() const From 63cae97cdaf4c001b4e1b21096fb6e86f885355f Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Fri, 29 Apr 2022 14:58:54 +0800 Subject: [PATCH 52/79] Export PageStorage V3 config into global config (#4502) ref pingcap/tiflash#3594 --- dbms/src/Interpreters/Settings.h | 8 +++- dbms/src/Server/tests/gtest_server_config.cpp | 12 +++++- dbms/src/Storages/DeltaMerge/StoragePool.cpp | 4 +- dbms/src/Storages/Page/ConfigSettings.cpp | 3 ++ dbms/src/Storages/Page/PageDefines.h | 18 ++++---- dbms/src/Storages/Page/PageStorage.h | 43 ++++++++++++++++++- .../src/Storages/Page/V2/gc/DataCompactor.cpp | 16 ++++++- dbms/src/Storages/Page/V3/PageDirectory.cpp | 6 ++- dbms/src/Storages/Page/V3/PageDirectory.h | 4 +- .../Storages/Page/V3/PageDirectoryFactory.cpp | 8 ++-- .../Storages/Page/V3/PageDirectoryFactory.h | 2 +- dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 6 +-- dbms/src/Storages/Page/V3/PageStorageImpl.h | 26 ++++++++++- dbms/src/Storages/Page/V3/WAL/WALReader.cpp | 22 +++++++--- dbms/src/Storages/Page/V3/WAL/WALReader.h | 15 ++++++- dbms/src/Storages/Page/V3/WALStore.cpp | 16 ++++--- dbms/src/Storages/Page/V3/WALStore.h | 20 +++++++-- .../Page/V3/tests/gtest_page_directory.cpp | 2 +- .../Page/V3/tests/gtest_wal_store.cpp | 17 ++++---- 19 files changed, 191 insertions(+), 57 deletions(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 6adecea4d60..37fb6879473 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -272,8 +272,8 @@ struct Settings M(SettingUInt64, dt_segment_delta_small_column_file_size, 8388608, "Determine whether a column file in delta is small or not. 8MB by default.") \ M(SettingUInt64, dt_segment_stable_pack_rows, DEFAULT_MERGE_BLOCK_SIZE, "Expected stable pack rows in DeltaTree Engine.") \ M(SettingFloat, dt_segment_wait_duration_factor, 1, "The factor of wait duration in a write stall.") \ - M(SettingUInt64, dt_bg_gc_check_interval, 5, "Background gc thread check interval, the unit is second.") \ - M(SettingInt64, dt_bg_gc_max_segments_to_check_every_round, 100, "Max segments to check in every gc round, value less than or equal to 0 means gc no segments.") \ + M(SettingUInt64, dt_bg_gc_check_interval, 5, "Background gc thread check interval, the unit is second.") \ + M(SettingInt64, dt_bg_gc_max_segments_to_check_every_round, 100, "Max segments to check in every gc round, value less than or equal to 0 means gc no segments.") \ M(SettingFloat, dt_bg_gc_ratio_threhold_to_trigger_gc, 1.2, "Trigger segment's gc when the ratio of invalid version exceed this threhold. Values smaller than or equal to 1.0 means gc all " \ "segments") \ M(SettingFloat, dt_bg_gc_delta_delete_ratio_to_trigger_gc, 0.3, "Trigger segment's gc when the ratio of delta delete range to stable exceeds this ratio.") \ @@ -294,6 +294,7 @@ struct Settings "`dt_stroage_num_max_expect_legacy_files`") \ M(SettingFloat, dt_page_gc_low_write_prob, 0.10, "Probability to run gc when write there is few writes.") \ \ + \ M(SettingUInt64, dt_storage_pool_log_write_slots, 4, "Max write concurrency for each StoragePool.log.") \ M(SettingUInt64, dt_storage_pool_log_gc_min_file_num, 10, "Min number of page files to compact") \ M(SettingUInt64, dt_storage_pool_log_gc_min_legacy_num, 3, "Min number of legacy page files to compact") \ @@ -313,6 +314,9 @@ struct Settings M(SettingFloat, dt_storage_pool_meta_gc_max_valid_rate, 0.35, "Max valid rate of deciding a page file can be compact") \ \ M(SettingUInt64, dt_checksum_frame_size, DBMS_DEFAULT_BUFFER_SIZE, "Frame size for delta tree stable storage") \ + \ + M(SettingDouble, dt_storage_blob_heavy_gc_valid_rate, 0.2, "Max valid rate of deciding a blob can be compact") \ + \ M(SettingChecksumAlgorithm, dt_checksum_algorithm, ChecksumAlgo::XXH3, "Checksum algorithm for delta tree stable storage") \ M(SettingCompressionMethod, dt_compression_method, CompressionMethod::LZ4, "The method of data compression when writing.") \ M(SettingInt64, dt_compression_level, 1, "The compression level.") \ diff --git a/dbms/src/Server/tests/gtest_server_config.cpp b/dbms/src/Server/tests/gtest_server_config.cpp index f74e85a86c4..01f2424d939 100644 --- a/dbms/src/Server/tests/gtest_server_config.cpp +++ b/dbms/src/Server/tests/gtest_server_config.cpp @@ -165,6 +165,7 @@ max_rows_in_set = 455 dt_segment_limit_rows = 1000005 dt_enable_rough_set_filter = 0 max_memory_usage = 102000 +dt_storage_blob_heavy_gc_valid_rate = 0.2 dt_storage_pool_data_gc_min_file_num = 8 dt_storage_pool_data_gc_min_legacy_num = 2 dt_storage_pool_data_gc_min_bytes = 256 @@ -221,6 +222,7 @@ dt_compression_level = 1 ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_file_num, 8); ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_legacy_num, 2); ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_bytes, 256); + ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_blob_heavy_gc_valid_rate, 0.2); ASSERT_EQ(global_ctx.getSettingsRef().dt_segment_delta_small_column_file_size, 8388608); ASSERT_EQ(global_ctx.getSettingsRef().dt_segment_delta_small_column_file_rows, 2048); ASSERT_EQ(global_ctx.getSettingsRef().dt_segment_limit_size, 536870912); @@ -274,6 +276,7 @@ try max_rows_in_set = 455 dt_segment_limit_rows = 1000005 dt_enable_rough_set_filter = 0 +dt_storage_blob_heavy_gc_valid_rate = 0.3 max_memory_usage = 102000 dt_storage_pool_data_gc_min_file_num = 8 dt_storage_pool_data_gc_min_legacy_num = 2 @@ -295,16 +298,18 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_NE(cfg.gc_min_legacy_num, settings.dt_storage_pool_data_gc_min_legacy_num); EXPECT_NE(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_NE(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); + EXPECT_NE(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); EXPECT_NE(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_NE(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); - persister.gc(); cfg = persister.page_storage->getSettings(); + EXPECT_NE(cfg.gc_min_files, settings.dt_storage_pool_data_gc_min_file_num); EXPECT_NE(cfg.gc_min_legacy_num, settings.dt_storage_pool_data_gc_min_legacy_num); EXPECT_NE(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_NE(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); + EXPECT_EQ(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); EXPECT_EQ(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_EQ(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); }; @@ -327,6 +332,7 @@ dt_page_gc_low_write_prob = 0.2 ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_legacy_num, 2); ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_bytes, 256); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_max_valid_rate, 0.5); + ASSERT_DOUBLE_EQ(global_ctx.getSettingsRef().dt_storage_blob_heavy_gc_valid_rate, 0.3); ASSERT_EQ(global_ctx.getSettingsRef().dt_open_file_max_idle_seconds, 20); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_page_gc_low_write_prob, 0.2); verify_persister_reload_config(persister); @@ -346,6 +352,7 @@ max_rows_in_set = 455 dt_segment_limit_rows = 1000005 dt_enable_rough_set_filter = 0 max_memory_usage = 102000 +dt_storage_blob_heavy_gc_valid_rate = 0.3 dt_storage_pool_data_gc_min_file_num = 8 dt_storage_pool_data_gc_min_legacy_num = 2 dt_storage_pool_data_gc_min_bytes = 256 @@ -366,6 +373,7 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_NE(cfg.gc_min_legacy_num, settings.dt_storage_pool_data_gc_min_legacy_num); EXPECT_NE(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_NE(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); + EXPECT_NE(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); EXPECT_NE(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_NE(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); @@ -376,6 +384,7 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_EQ(cfg.gc_min_legacy_num, settings.dt_storage_pool_data_gc_min_legacy_num); EXPECT_EQ(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_DOUBLE_EQ(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); + EXPECT_DOUBLE_EQ(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); EXPECT_EQ(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_EQ(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); }; @@ -398,6 +407,7 @@ dt_page_gc_low_write_prob = 0.2 ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_legacy_num, 2); ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_bytes, 256); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_max_valid_rate, 0.5); + ASSERT_DOUBLE_EQ(global_ctx.getSettingsRef().dt_storage_blob_heavy_gc_valid_rate, 0.3); ASSERT_EQ(global_ctx.getSettingsRef().dt_open_file_max_idle_seconds, 20); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_page_gc_low_write_prob, 0.2); verify_storage_pool_reload_config(storage_pool); diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index 13d549550f6..8cc7dd93f48 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -42,11 +42,11 @@ PageStorage::Config extractConfig(const Settings & settings, StorageType subtype config.gc_min_files = settings.dt_storage_pool_##NAME##_gc_min_file_num; \ config.gc_min_bytes = settings.dt_storage_pool_##NAME##_gc_min_bytes; \ config.gc_min_legacy_num = settings.dt_storage_pool_##NAME##_gc_min_legacy_num; \ - config.gc_max_valid_rate = settings.dt_storage_pool_##NAME##_gc_max_valid_rate; + config.gc_max_valid_rate = settings.dt_storage_pool_##NAME##_gc_max_valid_rate; \ + config.blob_heavy_gc_valid_rate = settings.dt_storage_blob_heavy_gc_valid_rate; PageStorage::Config config = getConfigFromSettings(settings); - switch (subtype) { case StorageType::Log: diff --git a/dbms/src/Storages/Page/ConfigSettings.cpp b/dbms/src/Storages/Page/ConfigSettings.cpp index 12704234b22..5995a657b98 100644 --- a/dbms/src/Storages/Page/ConfigSettings.cpp +++ b/dbms/src/Storages/Page/ConfigSettings.cpp @@ -33,6 +33,9 @@ void mergeConfigFromSettings(const DB::Settings & settings, PageStorage::Config config.gc_max_expect_legacy_files = settings.dt_page_num_max_expect_legacy_files; if (settings.dt_page_num_max_gc_valid_rate > 0.0) config.gc_max_valid_rate_bound = settings.dt_page_num_max_gc_valid_rate; + + // V3 setting which export to global setting + config.blob_heavy_gc_valid_rate = settings.dt_storage_blob_heavy_gc_valid_rate; } PageStorage::Config getConfigFromSettings(const DB::Settings & settings) diff --git a/dbms/src/Storages/Page/PageDefines.h b/dbms/src/Storages/Page/PageDefines.h index 1b3d3a90331..d99f6be6bb3 100644 --- a/dbms/src/Storages/Page/PageDefines.h +++ b/dbms/src/Storages/Page/PageDefines.h @@ -30,21 +30,21 @@ using Seconds = std::chrono::seconds; static constexpr UInt64 MB = 1ULL * 1024 * 1024; static constexpr UInt64 GB = MB * 1024; + +// PageStorage V2 define static constexpr UInt64 PAGE_SIZE_STEP = (1 << 10) * 16; // 16 KB -static constexpr UInt64 PAGE_BUFFER_SIZE = DBMS_DEFAULT_BUFFER_SIZE; -static constexpr UInt64 PAGE_MAX_BUFFER_SIZE = 128 * MB; -static constexpr UInt64 PAGE_SPLIT_SIZE = 1 * MB; static constexpr UInt64 PAGE_FILE_MAX_SIZE = 1024 * 2 * MB; static constexpr UInt64 PAGE_FILE_SMALL_SIZE = 2 * MB; static constexpr UInt64 PAGE_FILE_ROLL_SIZE = 128 * MB; -static constexpr UInt64 PAGE_META_ROLL_SIZE = 2 * MB; - -static constexpr UInt64 BLOBFILE_LIMIT_SIZE = 512 * MB; -static constexpr UInt64 BLOBSTORE_CACHED_FD_SIZE = 100; static_assert(PAGE_SIZE_STEP >= ((1 << 10) * 16), "PAGE_SIZE_STEP should be at least 16 KB"); static_assert((PAGE_SIZE_STEP & (PAGE_SIZE_STEP - 1)) == 0, "PAGE_SIZE_STEP should be power of 2"); -static_assert(PAGE_BUFFER_SIZE % PAGE_SIZE_STEP == 0, "PAGE_BUFFER_SIZE should be dividable by PAGE_SIZE_STEP"); + +// PageStorage V3 define +static constexpr UInt64 BLOBFILE_LIMIT_SIZE = 512 * MB; +static constexpr UInt64 BLOBSTORE_CACHED_FD_SIZE = 100; +static constexpr UInt64 PAGE_META_ROLL_SIZE = 2 * MB; +static constexpr UInt64 MAX_PERSISTED_LOG_FILES = 4; using NamespaceId = UInt64; static constexpr NamespaceId MAX_NAMESPACE_ID = UINT64_MAX; @@ -111,7 +111,7 @@ inline size_t alignPage(size_t n) template <> struct fmt::formatter { - constexpr auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) + static constexpr auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) { return ctx.begin(); } diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 3b5e1a18c9d..4da12e2b5e0 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -74,6 +74,9 @@ class PageStorage : private boost::noncopyable struct Config { + //========================================================================================== + // V2 config + //========================================================================================== SettingBool sync_on_write = true; SettingUInt64 file_roll_size = PAGE_FILE_ROLL_SIZE; @@ -109,6 +112,18 @@ class PageStorage : private boost::noncopyable MVCC::VersionSetConfig version_set_config; + //========================================================================================== + // V3 config + //========================================================================================== + SettingUInt64 blob_file_limit_size = BLOBFILE_LIMIT_SIZE; + SettingUInt64 blob_spacemap_type = 2; + SettingUInt64 blob_cached_fd_size = BLOBSTORE_CACHED_FD_SIZE; + SettingDouble blob_heavy_gc_valid_rate = 0.2; + + SettingUInt64 wal_roll_size = PAGE_META_ROLL_SIZE; + SettingUInt64 wal_recover_mode = 0; + SettingUInt64 wal_max_persisted_log_files = MAX_PERSISTED_LOG_FILES; + void reload(const Config & rhs) { // Reload is not atomic, but should be good enough @@ -122,9 +137,18 @@ class PageStorage : private boost::noncopyable prob_do_gc_when_write_is_low = rhs.prob_do_gc_when_write_is_low; // Reload fd idle time open_file_max_idle_time = rhs.open_file_max_idle_time; + + // Reload V3 setting + blob_file_limit_size = rhs.blob_file_limit_size; + blob_spacemap_type = rhs.blob_spacemap_type; + blob_cached_fd_size = rhs.blob_cached_fd_size; + blob_heavy_gc_valid_rate = rhs.blob_heavy_gc_valid_rate; + wal_roll_size = rhs.wal_roll_size; + wal_recover_mode = rhs.wal_recover_mode; + wal_max_persisted_log_files = rhs.wal_max_persisted_log_files; } - String toDebugString() const + String toDebugStringV2() const { return fmt::format( "PageStorage::Config {{gc_min_files: {}, gc_min_bytes:{}, gc_force_hardlink_rate: {:.3f}, gc_max_valid_rate: {:.3f}, " @@ -140,11 +164,26 @@ class PageStorage : private boost::noncopyable prob_do_gc_when_write_is_low, open_file_max_idle_time); } + + String toDebugStringV3() const + { + return fmt::format( + "PageStorage::Config V3 {{" + "blob_file_limit_size: {}, blob_spacemap_type: {}, " + "blob_cached_fd_size: {}, blob_heavy_gc_valid_rate: {:.3f}, " + "wal_roll_size: {}, wal_recover_mode: {}, wal_max_persisted_log_files: {}}}", + blob_file_limit_size.get(), + blob_spacemap_type.get(), + blob_cached_fd_size.get(), + blob_heavy_gc_valid_rate.get(), + wal_roll_size.get(), + wal_recover_mode.get(), + wal_max_persisted_log_files.get()); + } }; void reloadSettings(const Config & new_config) { config.reload(new_config); }; Config getSettings() const { return config; } - public: static PageStoragePtr create( diff --git a/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp b/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp index 8a5cc5a3146..3e5a25125ae 100644 --- a/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp +++ b/dbms/src/Storages/Page/V2/gc/DataCompactor.cpp @@ -81,7 +81,13 @@ DataCompactor::tryMigrate( // } else { - LOG_FMT_DEBUG(log, "{} DataCompactor::tryMigrate exit without compaction [candidates size={}] [total byte size={}], [files without valid page={}] Config{{ {} }}", storage_name, result.candidate_size, result.bytes_migrate, candidates.files_without_valid_pages.size(), config.toDebugString()); + LOG_FMT_DEBUG(log, "{} DataCompactor::tryMigrate exit without compaction [candidates size={}] [total byte size={}], " + "[files without valid page={}] Config{{ {} }}", // + storage_name, + result.candidate_size, + result.bytes_migrate, + candidates.files_without_valid_pages.size(), + config.toDebugStringV2()); } return {result, std::move(migrate_entries_edit)}; @@ -555,7 +561,13 @@ void DataCompactor::logMigrationDetails(const MigrateInfos & infos, } migrate_stream << "]"; remove_stream << "]"; - LOG_FMT_DEBUG(log, "{} Migrate pages to PageFile_{}_{}, migrate: {}, remove: {}, Config{{ {} }}", storage_name, migrate_file_id.first, migrate_file_id.second, migrate_stream.str(), remove_stream.str(), config.toDebugString()); + LOG_FMT_DEBUG(log, "{} Migrate pages to PageFile_{}_{}, migrate: {}, remove: {}, Config{{ {} }}", // + storage_name, + migrate_file_id.first, + migrate_file_id.second, + migrate_stream.str(), + remove_stream.str(), + config.toDebugStringV2()); } diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 4dfe2c6da09..c98c368be80 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -638,10 +638,12 @@ void VersionedPageEntries::collapseTo(const UInt64 seq, const PageIdV3Internal p * PageDirectory methods * *************************/ -PageDirectory::PageDirectory(String storage_name, WALStorePtr && wal_) +PageDirectory::PageDirectory(String storage_name, WALStorePtr && wal_, UInt64 max_persisted_log_files_) : sequence(0) , wal(std::move(wal_)) + , max_persisted_log_files(max_persisted_log_files_) , log(Logger::get("PageDirectory", std::move(storage_name))) + { } @@ -1127,7 +1129,7 @@ bool PageDirectory::tryDumpSnapshot(const WriteLimiterPtr & write_limiter) bool done_any_io = false; // In order not to make read amplification too high, only apply compact logs when ... auto files_snap = wal->getFilesSnapshot(); - if (files_snap.needSave()) + if (files_snap.needSave(max_persisted_log_files)) { // The records persisted in `files_snap` is older than or equal to all records in `edit` auto edit = dumpSnapshotToEdit(); diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index 4cdf51bbf91..b4b12a0d075 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -286,7 +286,7 @@ using PageDirectoryPtr = std::unique_ptr; class PageDirectory { public: - explicit PageDirectory(String storage_name, WALStorePtr && wal); + explicit PageDirectory(String storage_name, WALStorePtr && wal, UInt64 max_persisted_log_files_ = MAX_PERSISTED_LOG_FILES); PageDirectorySnapshotPtr createSnapshot(const String & tracing_id = "") const; @@ -398,7 +398,7 @@ class PageDirectory mutable std::list> external_ids; WALStorePtr wal; - + const UInt64 max_persisted_log_files; LoggerPtr log; }; diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 39664063f51..7f7e7f19989 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -22,10 +22,10 @@ namespace DB::PS::V3 { -PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator) +PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, WALStore::Config config) { - auto [wal, reader] = WALStore::create(storage_name, file_provider, delegator); - PageDirectoryPtr dir = std::make_unique(std::move(storage_name), std::move(wal)); + auto [wal, reader] = WALStore::create(storage_name, file_provider, delegator, config); + PageDirectoryPtr dir = std::make_unique(std::move(storage_name), std::move(wal), config.max_persisted_log_files); loadFromDisk(dir, std::move(reader)); // Reset the `sequence` to the maximum of persisted. @@ -63,7 +63,7 @@ PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderP PageDirectoryPtr PageDirectoryFactory::createFromEdit(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, const PageEntriesEdit & edit) { - auto [wal, reader] = WALStore::create(storage_name, file_provider, delegator); + auto [wal, reader] = WALStore::create(storage_name, file_provider, delegator, WALStore::Config()); (void)reader; PageDirectoryPtr dir = std::make_unique(std::move(storage_name), std::move(wal)); loadEdit(dir, edit); diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h index d22f89ff2bb..278298d7010 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h @@ -46,7 +46,7 @@ class PageDirectoryFactory return *this; } - PageDirectoryPtr create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator); + PageDirectoryPtr create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, WALStore::Config config); // just for test PageDirectoryPtr createFromEdit(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, const PageEntriesEdit & edit); diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index 9ee0616c987..a7fb9374a1c 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -36,13 +36,13 @@ PageStorageImpl::PageStorageImpl( const FileProviderPtr & file_provider_) : DB::PageStorage(name, delegator_, config_, file_provider_) , log(Logger::get("PageStorage", name)) - , blob_store(name, file_provider_, delegator, blob_config) + , blob_store(name, file_provider_, delegator, parseBlobConfig(config_)) { + LOG_FMT_INFO(log, "PageStorageImpl start. Config{{ {} }}", config.toDebugStringV3()); } PageStorageImpl::~PageStorageImpl() = default; - void PageStorageImpl::restore() { // TODO: clean up blobstore. @@ -52,7 +52,7 @@ void PageStorageImpl::restore() PageDirectoryFactory factory; page_directory = factory .setBlobStore(blob_store) - .create(storage_name, file_provider, delegator); + .create(storage_name, file_provider, delegator, parseWALConfig(config)); // factory.max_applied_page_id // TODO: return it to outer function } diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index eb1fc91b2e7..10c35d6ab2b 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -35,6 +36,29 @@ class PageStorageImpl : public DB::PageStorage ~PageStorageImpl(); + static BlobStore::Config parseBlobConfig(const Config & config) + { + BlobStore::Config blob_config; + + blob_config.file_limit_size = config.blob_file_limit_size; + blob_config.cached_fd_size = config.blob_cached_fd_size; + blob_config.spacemap_type = config.blob_spacemap_type; + blob_config.heavy_gc_valid_rate = config.blob_heavy_gc_valid_rate; + + return blob_config; + } + + static WALStore::Config parseWALConfig(const Config & config) + { + WALStore::Config wal_config; + + wal_config.roll_size = config.wal_roll_size; + wal_config.wal_recover_mode = config.wal_recover_mode; + wal_config.max_persisted_log_files = config.wal_max_persisted_log_files; + + return wal_config; + } + void restore() override; void drop() override; @@ -90,8 +114,6 @@ class PageStorageImpl : public DB::PageStorage PageDirectoryPtr page_directory; - BlobStore::Config blob_config; - BlobStore blob_store; std::atomic gc_is_running = false; diff --git a/dbms/src/Storages/Page/V3/WAL/WALReader.cpp b/dbms/src/Storages/Page/V3/WAL/WALReader.cpp index 9a17ee6b7c1..9b914d8800a 100644 --- a/dbms/src/Storages/Page/V3/WAL/WALReader.cpp +++ b/dbms/src/Storages/Page/V3/WAL/WALReader.cpp @@ -122,27 +122,39 @@ WALStoreReader::findCheckpoint(LogFilenameSet && all_files) WALStoreReaderPtr WALStoreReader::create(String storage_name, FileProviderPtr & provider, LogFilenameSet files, + WALRecoveryMode recovery_mode_, const ReadLimiterPtr & read_limiter) { auto [checkpoint, files_to_read] = findCheckpoint(std::move(files)); - auto reader = std::make_shared(std::move(storage_name), provider, checkpoint, std::move(files_to_read), read_limiter); + auto reader = std::make_shared(storage_name, provider, checkpoint, std::move(files_to_read), recovery_mode_, read_limiter); reader->openNextFile(); return reader; } -WALStoreReaderPtr WALStoreReader::create(String storage_name, FileProviderPtr & provider, PSDiskDelegatorPtr & delegator, const ReadLimiterPtr & read_limiter) +WALStoreReaderPtr WALStoreReader::create( + String storage_name, + FileProviderPtr & provider, + PSDiskDelegatorPtr & delegator, + WALRecoveryMode recovery_mode_, + const ReadLimiterPtr & read_limiter) { LogFilenameSet log_files = listAllFiles(delegator, Logger::get("WALStore", storage_name)); - return create(storage_name, provider, std::move(log_files), read_limiter); + return create(std::move(storage_name), provider, std::move(log_files), recovery_mode_, read_limiter); } -WALStoreReader::WALStoreReader(String storage_name, FileProviderPtr & provider_, std::optional checkpoint, LogFilenameSet && files_, const ReadLimiterPtr & read_limiter_) +WALStoreReader::WALStoreReader(String storage_name, + FileProviderPtr & provider_, + std::optional checkpoint, + LogFilenameSet && files_, + WALRecoveryMode recovery_mode_, + const ReadLimiterPtr & read_limiter_) : provider(provider_) , read_limiter(read_limiter_) , checkpoint_read_done(!checkpoint.has_value()) , checkpoint_file(checkpoint) , files_to_read(std::move(files_)) , next_reading_file(files_to_read.begin()) + , recovery_mode(recovery_mode_) , logger(Logger::get("WALStore", std::move(storage_name))) {} @@ -208,7 +220,7 @@ bool WALStoreReader::openNextFile() &reporter, /*verify_checksum*/ true, log_num, - WALRecoveryMode::TolerateCorruptedTailRecords); + recovery_mode); }; if (!checkpoint_read_done) diff --git a/dbms/src/Storages/Page/V3/WAL/WALReader.h b/dbms/src/Storages/Page/V3/WAL/WALReader.h index 24cb8446249..b12f2f35e9d 100644 --- a/dbms/src/Storages/Page/V3/WAL/WALReader.h +++ b/dbms/src/Storages/Page/V3/WAL/WALReader.h @@ -16,6 +16,7 @@ #include #include +#include namespace DB { @@ -49,9 +50,17 @@ class WALStoreReader static std::tuple, LogFilenameSet> findCheckpoint(LogFilenameSet && all_files); - static WALStoreReaderPtr create(String storage_name, FileProviderPtr & provider, LogFilenameSet files, const ReadLimiterPtr & read_limiter = nullptr); + static WALStoreReaderPtr create(String storage_name, + FileProviderPtr & provider, + LogFilenameSet files, + WALRecoveryMode recovery_mode_ = WALRecoveryMode::TolerateCorruptedTailRecords, + const ReadLimiterPtr & read_limiter = nullptr); - static WALStoreReaderPtr create(String storage_name, FileProviderPtr & provider, PSDiskDelegatorPtr & delegator, const ReadLimiterPtr & read_limiter = nullptr); + static WALStoreReaderPtr create(String storage_name, + FileProviderPtr & provider, + PSDiskDelegatorPtr & delegator, + WALRecoveryMode recovery_mode_ = WALRecoveryMode::TolerateCorruptedTailRecords, + const ReadLimiterPtr & read_limiter = nullptr); bool remained() const; @@ -79,6 +88,7 @@ class WALStoreReader FileProviderPtr & provider_, std::optional checkpoint, LogFilenameSet && files_, + WALRecoveryMode recovery_mode_, const ReadLimiterPtr & read_limiter_); WALStoreReader(const WALStoreReader &) = delete; @@ -97,6 +107,7 @@ class WALStoreReader LogFilenameSet::const_iterator next_reading_file; std::unique_ptr reader; + WALRecoveryMode recovery_mode; LoggerPtr logger; }; diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 4b563cbe4a0..6585c6dfdfe 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -40,13 +40,17 @@ namespace DB::PS::V3 std::pair WALStore::create( String storage_name, FileProviderPtr & provider, - PSDiskDelegatorPtr & delegator) + PSDiskDelegatorPtr & delegator, + WALStore::Config config) { - auto reader = WALStoreReader::create(storage_name, provider, delegator); + auto reader = WALStoreReader::create(storage_name, + provider, + delegator, + static_cast(config.wal_recover_mode.get())); // Create a new LogFile for writing new logs auto last_log_num = reader->lastLogNum() + 1; // TODO reuse old file return { - std::unique_ptr(new WALStore(std::move(storage_name), delegator, provider, last_log_num)), + std::unique_ptr(new WALStore(std::move(storage_name), delegator, provider, last_log_num, std::move(config))), reader}; } @@ -54,12 +58,14 @@ WALStore::WALStore( String storage_name, const PSDiskDelegatorPtr & delegator_, const FileProviderPtr & provider_, - Format::LogNumberType last_log_num_) + Format::LogNumberType last_log_num_, + WALStore::Config config_) : delegator(delegator_) , provider(provider_) , last_log_num(last_log_num_) , wal_paths_index(0) , logger(Logger::get("WALStore", std::move(storage_name))) + , config(config_) { } @@ -81,7 +87,7 @@ void WALStore::apply(const PageEntriesEdit & edit, const WriteLimiterPtr & write std::lock_guard lock(log_file_mutex); // Roll to a new log file // TODO: Make it configurable - if (log_file == nullptr || log_file->writtenBytes() > PAGE_META_ROLL_SIZE) + if (log_file == nullptr || log_file->writtenBytes() > config.roll_size) { auto log_num = last_log_num++; auto [new_log_file, filename] = createLogWriter({log_num, 0}, false); diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index ffc744bec9f..8984c10e5e8 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include #include @@ -82,13 +83,21 @@ using WALStoreReaderPtr = std::shared_ptr; class WALStore { public: + struct Config + { + SettingUInt64 roll_size = PAGE_META_ROLL_SIZE; + SettingUInt64 wal_recover_mode = 0; + SettingUInt64 max_persisted_log_files = MAX_PERSISTED_LOG_FILES; + }; + constexpr static const char * wal_folder_prefix = "/wal"; static std::pair create( String storage_name, FileProviderPtr & provider, - PSDiskDelegatorPtr & delegator); + PSDiskDelegatorPtr & delegator, + WALStore::Config config); void apply(PageEntriesEdit & edit, const PageVersionType & version, const WriteLimiterPtr & write_limiter = nullptr); void apply(const PageEntriesEdit & edit, const WriteLimiterPtr & write_limiter = nullptr); @@ -98,10 +107,10 @@ class WALStore Format::LogNumberType current_writting_log_num; LogFilenameSet persisted_log_files; - bool needSave() const + bool needSave(const size_t & max_size) const { // TODO: Make it configurable and check the reasonable of this number - return persisted_log_files.size() > 4; + return persisted_log_files.size() > max_size; } }; @@ -117,7 +126,8 @@ class WALStore String storage_name, const PSDiskDelegatorPtr & delegator_, const FileProviderPtr & provider_, - Format::LogNumberType last_log_num_); + Format::LogNumberType last_log_num_, + WALStore::Config config); std::tuple, LogFilename> createLogWriter( @@ -133,6 +143,8 @@ class WALStore std::unique_ptr log_file; LoggerPtr logger; + + WALStore::Config config; }; } // namespace PS::V3 diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index ae149fbf69b..ad00c47c097 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -54,7 +54,7 @@ class PageDirectoryTest : public DB::base::TiFlashStorageTestBasic FileProviderPtr provider = ctx.getFileProvider(); PSDiskDelegatorPtr delegator = std::make_shared(path); PageDirectoryFactory factory; - dir = factory.create("PageDirectoryTest", provider, delegator); + dir = factory.create("PageDirectoryTest", provider, delegator, WALStore::Config()); } protected: diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp index eef0c6008cd..008a311841c 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp @@ -249,6 +249,7 @@ class WALStoreTest protected: PSDiskDelegatorPtr delegator; + WALStore::Config config; }; TEST_P(WALStoreTest, FindCheckpointFile) @@ -307,7 +308,7 @@ TEST_P(WALStoreTest, Empty) auto provider = ctx.getFileProvider(); auto path = getTemporaryPath(); size_t num_callback_called = 0; - auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator); + auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator, config); ASSERT_NE(wal, nullptr); while (reader->remained()) { @@ -333,7 +334,7 @@ try // Stage 1. empty std::vector size_each_edit; - auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator); + auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator, config); { size_t num_applied_edit = 0; auto reader = WALStoreReader::create(getCurrentTestName(), provider, delegator); @@ -361,7 +362,7 @@ try wal.reset(); reader.reset(); - std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator); + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator, config); { size_t num_applied_edit = 0; while (reader->remained()) @@ -393,7 +394,7 @@ try wal.reset(); reader.reset(); - std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator); + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator, config); { size_t num_applied_edit = 0; while (reader->remained()) @@ -451,7 +452,7 @@ try auto provider = ctx.getFileProvider(); auto path = getTemporaryPath(); - auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator); + auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator, config); ASSERT_NE(wal, nullptr); std::vector size_each_edit; @@ -515,7 +516,7 @@ try { size_t num_applied_edit = 0; - std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator); + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator, config); while (reader->remained()) { auto [ok, edit] = reader->next(); @@ -542,7 +543,7 @@ try auto path = getTemporaryPath(); // Stage 1. empty - auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator); + auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator, config); ASSERT_NE(wal, nullptr); std::mt19937 rd; @@ -575,7 +576,7 @@ try size_t num_edits_read = 0; size_t num_pages_read = 0; - std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator); + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator, config); while (reader->remained()) { auto [ok, edit] = reader->next(); From 934b3d53d0101254092f678e727e4bc084a16d0b Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Mon, 2 May 2022 19:04:53 +0800 Subject: [PATCH 53/79] Support weekofyear pushdown to tiflash (#4791) close pingcap/tiflash#4677 --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- dbms/src/Functions/FunctionsDateTime.cpp | 1 + dbms/src/Functions/FunctionsDateTime.h | 40 ++++++ dbms/src/Functions/tests/gtest_weekofyear.cpp | 121 ++++++++++++++++++ tests/fullstack-test/expr/week_of_year.test | 33 +++++ 5 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Functions/tests/gtest_weekofyear.cpp create mode 100644 tests/fullstack-test/expr/week_of_year.test diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 6f5beec9937..a53216ff1bb 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -516,7 +516,7 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::WeekWithMode, "cast"}, //{tipb::ScalarFuncSig::WeekWithoutMode, "cast"}, //{tipb::ScalarFuncSig::WeekDay, "cast"}, - //{tipb::ScalarFuncSig::WeekOfYear, "cast"}, + {tipb::ScalarFuncSig::WeekOfYear, "tidbWeekOfYear"}, {tipb::ScalarFuncSig::Year, "toYear"}, //{tipb::ScalarFuncSig::YearWeekWithMode, "cast"}, diff --git a/dbms/src/Functions/FunctionsDateTime.cpp b/dbms/src/Functions/FunctionsDateTime.cpp index ebfaf176945..c3ef00b19e1 100644 --- a/dbms/src/Functions/FunctionsDateTime.cpp +++ b/dbms/src/Functions/FunctionsDateTime.cpp @@ -136,6 +136,7 @@ void registerFunctionsDateTime(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index 77e0ff6dca1..3d15186b472 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -3238,6 +3238,45 @@ struct TiDBDayOfYearTransformerImpl return static_cast(val.yearDay()); } }; + +template +struct TiDBWeekOfYearTransformerImpl +{ + static constexpr auto name = "tidbWeekOfYear"; + + static void execute(const Context & context, + const ColumnVector::Container & vec_from, + typename ColumnVector::Container & vec_to, + typename ColumnVector::Container & vec_null_map) + { + bool is_null = false; + for (size_t i = 0; i < vec_from.size(); ++i) + { + MyTimeBase val(vec_from[i]); + vec_to[i] = execute(context, val, is_null); + vec_null_map[i] = is_null; + is_null = false; + } + } + + static ToFieldType execute(const Context & context, const MyTimeBase & val, bool & is_null) + { + // TiDB also considers NO_ZERO_DATE sql_mode. But sql_mode is not handled by TiFlash for now. + if (val.month == 0 || val.day == 0) + { + context.getDAGContext()->handleInvalidTime( + fmt::format("Invalid time value: month({}) or day({}) is zero", val.month, val.day), + Errors::Types::WrongValue); + is_null = true; + return 0; + } + /// Behavior differences from TiDB: + /// for '0000-01-02', weekofyear is the same with MySQL, while TiDB is offset by one day + /// TiDB_weekofyear('0000-01-02') = 52, MySQL/TiFlash_weekofyear('0000-01-02') = 1 + return static_cast(val.week(3)); + } +}; + // Similar to FunctionDateOrDateTimeToSomething, but also handle nullable result and mysql sql mode. template class Transformer, bool return_nullable> class FunctionMyDateOrMyDateTimeToSomething : public IFunction @@ -3336,6 +3375,7 @@ using FunctionToTime = FunctionDateOrDateTimeToSomething; using FunctionToTiDBDayOfWeek = FunctionMyDateOrMyDateTimeToSomething; using FunctionToTiDBDayOfYear = FunctionMyDateOrMyDateTimeToSomething; +using FunctionToTiDBWeekOfYear = FunctionMyDateOrMyDateTimeToSomething; using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething; using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething; diff --git a/dbms/src/Functions/tests/gtest_weekofyear.cpp b/dbms/src/Functions/tests/gtest_weekofyear.cpp new file mode 100644 index 00000000000..6a7a4c122d5 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_weekofyear.cpp @@ -0,0 +1,121 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include +#include + +namespace DB::tests +{ +class TestWeekOfYear : public DB::tests::FunctionTest +{ +}; + +TEST_F(TestWeekOfYear, WeekOfYear) +try +{ + DAGContext * dag_context = context.getDAGContext(); + UInt64 ori_flags = dag_context->getFlags(); + dag_context->addFlag(TiDBSQLFlags::TRUNCATE_AS_WARNING); + /// ColumnVector(nullable) + const String func_name = "tidbWeekOfYear"; + static auto const nullable_datetime_type_ptr = makeNullable(std::make_shared(6)); + static auto const datetime_type_ptr = std::make_shared(6); + static auto const date_type_ptr = std::make_shared(); + auto data_col_ptr = createColumn>( + { + {}, // Null + // FIXME: https://github.com/pingcap/tiflash/issues/4186 + // MyDateTime(2022, 12, 0, 1, 1, 1, 1).toPackedUInt(), + // MyDateTime(2022, 13, 31, 1, 1, 1, 1).toPackedUInt(), + MyDateTime(0, 0, 0, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(0, 1, 1, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(0, 1, 2, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(0, 1, 3, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(1969, 1, 1, 1, 1, 1, 1).toPackedUInt(), + MyDateTime(1969, 1, 6, 1, 1, 1, 1).toPackedUInt(), + MyDateTime(2022, 4, 28, 6, 7, 8, 9).toPackedUInt(), + MyDateTime(2022, 5, 2, 9, 8, 7, 6).toPackedUInt(), + MyDateTime(2022, 5, 9, 9, 8, 7, 6).toPackedUInt(), + MyDateTime(2022, 12, 31, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(2020, 12, 31, 0, 0, 0, 0).toPackedUInt(), + }) + .column; + auto input_col = ColumnWithTypeAndName(data_col_ptr, nullable_datetime_type_ptr, "input"); + auto output_col = createColumn>({{}, {}, 52, 1, 1, 1, 2, 17, 18, 19, 52, 53}); + ASSERT_COLUMN_EQ(output_col, executeFunction(func_name, input_col)); + + /// ColumnVector(non-null) + data_col_ptr = createColumn( + { + MyDateTime(0, 0, 0, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(0, 1, 1, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(0, 1, 2, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(0, 1, 3, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(1969, 1, 1, 1, 1, 1, 1).toPackedUInt(), + MyDateTime(1969, 1, 6, 1, 1, 1, 1).toPackedUInt(), + MyDateTime(2022, 4, 28, 6, 7, 8, 9).toPackedUInt(), + MyDateTime(2022, 5, 2, 9, 8, 7, 6).toPackedUInt(), + MyDateTime(2022, 5, 9, 9, 8, 7, 6).toPackedUInt(), + MyDateTime(2022, 12, 31, 0, 0, 0, 0).toPackedUInt(), + MyDateTime(2020, 12, 31, 0, 0, 0, 0).toPackedUInt(), + }) + .column; + input_col = ColumnWithTypeAndName(data_col_ptr, datetime_type_ptr, "input"); + output_col = createColumn>({{}, 52, 1, 1, 1, 2, 17, 18, 19, 52, 53}); + ASSERT_COLUMN_EQ(output_col, executeFunction(func_name, input_col)); + + /// ColumnConst(non-null) + input_col = ColumnWithTypeAndName(createConstColumn(1, MyDateTime(2022, 5, 9, 9, 8, 7, 6).toPackedUInt()).column, datetime_type_ptr, "input"); + output_col = createConstColumn>(1, {19}); + ASSERT_COLUMN_EQ(output_col, executeFunction(func_name, input_col)); + + /// ColumnConst(nullable) + input_col = ColumnWithTypeAndName(createConstColumn>(1, MyDateTime(2022, 5, 9, 9, 8, 7, 6).toPackedUInt()).column, nullable_datetime_type_ptr, "input"); + output_col = createConstColumn>(1, {19}); + ASSERT_COLUMN_EQ(output_col, executeFunction(func_name, input_col)); + + /// ColumnConst(nullable(null)) + input_col = ColumnWithTypeAndName(createConstColumn>(1, {}).column, nullable_datetime_type_ptr, "input"); + output_col = createConstColumn>(1, {}); + ASSERT_COLUMN_EQ(output_col, executeFunction(func_name, input_col)); + + /// MyDate ColumnVector(non-null) + data_col_ptr = createColumn( + { + MyDate(0, 0, 0).toPackedUInt(), + MyDate(0, 1, 1).toPackedUInt(), + MyDate(0, 1, 2).toPackedUInt(), + MyDate(0, 1, 3).toPackedUInt(), + MyDate(1969, 1, 1).toPackedUInt(), + MyDate(1969, 1, 6).toPackedUInt(), + MyDate(2022, 4, 28).toPackedUInt(), + MyDate(2022, 5, 2).toPackedUInt(), + MyDate(2022, 5, 9).toPackedUInt(), + MyDate(2022, 12, 31).toPackedUInt(), + MyDate(2020, 12, 31).toPackedUInt(), + }) + .column; + input_col = ColumnWithTypeAndName(data_col_ptr, date_type_ptr, "input"); + output_col = createColumn>({{}, 52, 1, 1, 1, 2, 17, 18, 19, 52, 53}); + ASSERT_COLUMN_EQ(output_col, executeFunction(func_name, input_col)); + dag_context->setFlags(ori_flags); +} +CATCH + +} // namespace DB::tests \ No newline at end of file diff --git a/tests/fullstack-test/expr/week_of_year.test b/tests/fullstack-test/expr/week_of_year.test new file mode 100644 index 00000000000..a44fed4be11 --- /dev/null +++ b/tests/fullstack-test/expr/week_of_year.test @@ -0,0 +1,33 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table test.t(a char(20), b datetime, c date); +mysql> insert into test.t values('', '1970-1-1 12:12:12', '1970-1-1'); +mysql> insert into test.t values('123', '1989-6-6 12:12:12', '1989-6-6'); +mysql> insert into test.t values('2022-3-10', '2000-3-4 12:12:12', '2000-3-4'); +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select weekofyear(a), weekofyear(b), weekofyear(c) from test.t; ++---------------+---------------+---------------+ +| weekofyear(a) | weekofyear(b) | weekofyear(c) | ++---------------+---------------+---------------+ +| NULL | 1 | 1 | +| NULL | 23 | 23 | +| 10 | 9 | 9 | ++---------------+---------------+---------------+ + +mysql> drop table if exists test.t; From e877bb2098f41d561fae9c4f6e8b25b445706838 Mon Sep 17 00:00:00 2001 From: yibin Date: Thu, 5 May 2022 09:52:54 +0800 Subject: [PATCH 54/79] Fix gtest mpp tunnel local write unstable issue (#4811) close pingcap/tiflash#4749 --- dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp index a2860c62947..47ce2ee6ee6 100644 --- a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp +++ b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp @@ -469,6 +469,7 @@ try data_packet_ptr->set_data("First"); mpp_tunnel_ptr->write(*data_packet_ptr); mpp_tunnel_ptr->close("Cancel"); + mpp_tunnel_ptr->getThreadManager()->wait(); // Join local read thread GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 2); //Second for err msg GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); @@ -486,6 +487,7 @@ try data_packet_ptr->set_data("First"); mpp_tunnel_ptr->write(*data_packet_ptr); mpp_tunnel_ptr->writeDone(); + mpp_tunnel_ptr->getThreadManager()->wait(); // Join local read thread GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 1); GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); From e59b63eea75d8ef1168cadaa2fd6b6ad8f3c75da Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Thu, 5 May 2022 11:00:55 +0800 Subject: [PATCH 55/79] PageStorage APIs support throw_on_not_exist. (#4736) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/Page.h | 2 + dbms/src/Storages/Page/PageDefines.h | 1 + dbms/src/Storages/Page/PageStorage.h | 45 +++++--- dbms/src/Storages/Page/V1/PageStorage.cpp | 4 +- dbms/src/Storages/Page/V1/PageStorage.h | 8 +- dbms/src/Storages/Page/V2/PageFile.cpp | 70 +++++++++++- dbms/src/Storages/Page/V2/PageFile.h | 1 + dbms/src/Storages/Page/V2/PageStorage.cpp | 103 +++++++++++++++--- dbms/src/Storages/Page/V2/PageStorage.h | 26 +++-- .../gtest_page_storage_multi_writers.cpp | 2 +- dbms/src/Storages/Page/V3/PageDirectory.cpp | 62 ++++++++--- dbms/src/Storages/Page/V3/PageDirectory.h | 20 ++-- dbms/src/Storages/Page/V3/PageEntry.h | 2 + dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 90 ++++++++++++--- dbms/src/Storages/Page/V3/PageStorageImpl.h | 22 ++-- .../Storages/Page/V3/tests/entries_helper.h | 8 +- .../Page/V3/tests/gtest_page_storage.cpp | 68 +++++++++++- 17 files changed, 432 insertions(+), 102 deletions(-) diff --git a/dbms/src/Storages/Page/Page.h b/dbms/src/Storages/Page/Page.h index f90a068c964..b36bf7245d6 100644 --- a/dbms/src/Storages/Page/Page.h +++ b/dbms/src/Storages/Page/Page.h @@ -59,6 +59,8 @@ struct Page std::set field_offsets; public: + inline bool isValid() const { return page_id != INVALID_PAGE_ID; } + ByteBuffer getFieldData(size_t index) const { auto iter = field_offsets.find(FieldOffset(index)); diff --git a/dbms/src/Storages/Page/PageDefines.h b/dbms/src/Storages/Page/PageDefines.h index d99f6be6bb3..8cd0e4c325e 100644 --- a/dbms/src/Storages/Page/PageDefines.h +++ b/dbms/src/Storages/Page/PageDefines.h @@ -54,6 +54,7 @@ static constexpr NamespaceId TEST_NAMESPACE_ID = 1000; using PageId = UInt64; using PageIds = std::vector; using PageIdSet = std::unordered_set; +static constexpr PageId INVALID_PAGE_ID = 0; using PageIdV3Internal = UInt128; using PageIdV3Internals = std::vector; diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 4da12e2b5e0..501a749aaa1 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -218,37 +218,50 @@ class PageStorage : private boost::noncopyable // Get some statistics of all living snapshots and the oldest living snapshot. virtual SnapshotsStatistics getSnapshotsStat() const = 0; + virtual size_t getNumberOfPages() = 0; + void write(WriteBatch && write_batch, const WriteLimiterPtr & write_limiter = nullptr) { writeImpl(std::move(write_batch), write_limiter); } + // If we can't get the entry. + // Then the null entry will be return PageEntry getEntry(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot = {}) { return getEntryImpl(ns_id, page_id, snapshot); } - Page read(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}) + Page read(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}, bool throw_on_not_exist = true) { - return readImpl(ns_id, page_id, read_limiter, snapshot); + return readImpl(ns_id, page_id, read_limiter, snapshot, throw_on_not_exist); } - PageMap read(NamespaceId ns_id, const std::vector & page_ids, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}) + PageMap read(NamespaceId ns_id, const PageIds & page_ids, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}, bool throw_on_not_exist = true) { - return readImpl(ns_id, page_ids, read_limiter, snapshot); + return readImpl(ns_id, page_ids, read_limiter, snapshot, throw_on_not_exist); } - void read(NamespaceId ns_id, const std::vector & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}) + /** + * If throw_on_not_exist is false, Also we do have some of page_id not found. + * Then the return value will record the all of page_id which not found. + */ + PageIds read(NamespaceId ns_id, const PageIds & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}, bool throw_on_not_exist = true) { - readImpl(ns_id, page_ids, handler, read_limiter, snapshot); + return readImpl(ns_id, page_ids, handler, read_limiter, snapshot, throw_on_not_exist); } using FieldIndices = std::vector; using PageReadFields = std::pair; - PageMap read(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}) + PageMap read(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}, bool throw_on_not_exist = true) { - return readImpl(ns_id, page_fields, read_limiter, snapshot); + return readImpl(ns_id, page_fields, read_limiter, snapshot, throw_on_not_exist); + } + + Page read(NamespaceId ns_id, const PageReadFields & page_field, const ReadLimiterPtr & read_limiter = nullptr, SnapshotPtr snapshot = {}, bool throw_on_not_exist = true) + { + return readImpl(ns_id, page_field, read_limiter, snapshot, throw_on_not_exist); } void traverse(const std::function & acceptor, SnapshotPtr snapshot = {}) @@ -256,9 +269,9 @@ class PageStorage : private boost::noncopyable traverseImpl(acceptor, snapshot); } - PageId getNormalPageId(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot = {}) + PageId getNormalPageId(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot = {}, bool throw_on_not_exist = true) { - return getNormalPageIdImpl(ns_id, page_id, snapshot); + return getNormalPageIdImpl(ns_id, page_id, snapshot, throw_on_not_exist); } // We may skip the GC to reduce useless reading by default. @@ -278,17 +291,19 @@ class PageStorage : private boost::noncopyable virtual PageEntry getEntryImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot) = 0; - virtual Page readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) = 0; + virtual Page readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) = 0; + + virtual PageMap readImpl(NamespaceId ns_id, const PageIds & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) = 0; - virtual PageMap readImpl(NamespaceId ns_id, const std::vector & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) = 0; + virtual PageIds readImpl(NamespaceId ns_id, const PageIds & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) = 0; - virtual void readImpl(NamespaceId ns_id, const std::vector & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) = 0; + virtual PageMap readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) = 0; - virtual PageMap readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) = 0; + virtual Page readImpl(NamespaceId ns_id, const PageReadFields & page_field, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) = 0; virtual void traverseImpl(const std::function & acceptor, SnapshotPtr snapshot) = 0; - virtual PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot) = 0; + virtual PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) = 0; virtual bool gcImpl(bool not_skip, const WriteLimiterPtr & write_limiter, const ReadLimiterPtr & read_limiter) = 0; diff --git a/dbms/src/Storages/Page/V1/PageStorage.cpp b/dbms/src/Storages/Page/V1/PageStorage.cpp index 97ceff1e7d8..be4211abf70 100644 --- a/dbms/src/Storages/Page/V1/PageStorage.cpp +++ b/dbms/src/Storages/Page/V1/PageStorage.cpp @@ -358,7 +358,7 @@ Page PageStorage::read(PageId page_id, SnapshotPtr snapshot) return file_reader->read(to_read)[page_id]; } -PageMap PageStorage::read(const std::vector & page_ids, SnapshotPtr snapshot) +PageMap PageStorage::read(const PageIds & page_ids, SnapshotPtr snapshot) { if (!snapshot) { @@ -391,7 +391,7 @@ PageMap PageStorage::read(const std::vector & page_ids, SnapshotPtr snap return page_map; } -void PageStorage::read(const std::vector & page_ids, const PageHandler & handler, SnapshotPtr snapshot) +void PageStorage::read(const PageIds & page_ids, const PageHandler & handler, SnapshotPtr snapshot) { if (!snapshot) { diff --git a/dbms/src/Storages/Page/V1/PageStorage.h b/dbms/src/Storages/Page/V1/PageStorage.h index 84691ce6d53..824f0fcaa90 100644 --- a/dbms/src/Storages/Page/V1/PageStorage.h +++ b/dbms/src/Storages/Page/V1/PageStorage.h @@ -105,8 +105,8 @@ class PageStorage PageEntry getEntry(PageId page_id, SnapshotPtr snapshot); Page read(PageId page_id, SnapshotPtr snapshot); - PageMap read(const std::vector & page_ids, SnapshotPtr snapshot); - void read(const std::vector & page_ids, const PageHandler & handler, SnapshotPtr snapshot); + PageMap read(const PageIds & page_ids, SnapshotPtr snapshot); + void read(const PageIds & page_ids, const PageHandler & handler, SnapshotPtr snapshot); void traverse(const std::function & acceptor, SnapshotPtr snapshot); bool gc(); @@ -197,8 +197,8 @@ class PageReader {} Page read(PageId page_id) const { return storage.read(page_id, snap); } - PageMap read(const std::vector & page_ids) const { return storage.read(page_ids, snap); } - void read(const std::vector & page_ids, PageHandler & handler) const { storage.read(page_ids, handler, snap); }; + PageMap read(const PageIds & page_ids) const { return storage.read(page_ids, snap); } + void read(const PageIds & page_ids, PageHandler & handler) const { storage.read(page_ids, handler, snap); }; PageId getNormalPageId(PageId page_id) const { return storage.getNormalPageId(page_id, snap); } UInt64 getPageChecksum(PageId page_id) const { return storage.getEntry(page_id, snap).checksum; } diff --git a/dbms/src/Storages/Page/V2/PageFile.cpp b/dbms/src/Storages/Page/V2/PageFile.cpp index cec02aa2d85..60846685eb5 100644 --- a/dbms/src/Storages/Page/V2/PageFile.cpp +++ b/dbms/src/Storages/Page/V2/PageFile.cpp @@ -999,7 +999,6 @@ PageMap PageFile::Reader::read(PageFile::Reader::FieldReadInfos & to_read, const for (auto & [page_id, entry, fields] : to_read) { (void)page_id; - (void)entry; // Sort fields to get better read on disk std::sort(fields.begin(), fields.end()); for (const auto field_index : fields) @@ -1070,6 +1069,75 @@ PageMap PageFile::Reader::read(PageFile::Reader::FieldReadInfos & to_read, const return page_map; } +Page PageFile::Reader::read(FieldReadInfo & to_read, const ReadLimiterPtr & read_limiter) +{ + ProfileEvents::increment(ProfileEvents::PSMReadPages, 1); + + size_t buf_size = 0; + + std::sort(to_read.fields.begin(), to_read.fields.end()); + for (const auto field_index : to_read.fields) + { + buf_size += to_read.entry.getFieldSize(field_index); + } + + char * data_buf = static_cast(alloc(buf_size)); + MemHolder mem_holder = createMemHolder(data_buf, [&, buf_size](char * p) { free(p, buf_size); }); + + Page page_rc; + std::set fields_offset_in_page; + + size_t read_size_this_entry = 0; + char * write_offset = data_buf; + + for (const auto field_index : to_read.fields) + { + // TODO: Continuously fields can read by one system call. + const auto [beg_offset, end_offset] = to_read.entry.getFieldOffsets(field_index); + const auto size_to_read = end_offset - beg_offset; + PageUtil::readFile(data_file, to_read.entry.offset + beg_offset, write_offset, size_to_read, read_limiter); + fields_offset_in_page.emplace(field_index, read_size_this_entry); + + if constexpr (PAGE_CHECKSUM_ON_READ) + { + auto expect_checksum = to_read.entry.field_offsets[field_index].second; + auto field_checksum = CityHash_v1_0_2::CityHash64(write_offset, size_to_read); + if (unlikely(to_read.entry.size != 0 && field_checksum != expect_checksum)) + { + throw Exception(fmt::format("Page [{}] field [{}], entry offset [{}], entry size[{}], checksum not match, " + "broken file: {}, expected: 0x{:X}, but: 0x{:X}", + to_read.page_id, + field_index, + to_read.entry.offset, + to_read.entry.size, + data_file_path, + expect_checksum, + field_checksum), + ErrorCodes::CHECKSUM_DOESNT_MATCH); + } + + read_size_this_entry += size_to_read; + write_offset += size_to_read; + } + } + + Page page; + page.page_id = to_read.page_id; + page.data = ByteBuffer(data_buf, write_offset); + page.mem_holder = mem_holder; + page.field_offsets.swap(fields_offset_in_page); + + if (unlikely(write_offset != data_buf + buf_size)) + { + throw Exception(fmt::format("Pos not match, expect to read {} bytes, but only {}.", buf_size, write_offset - data_buf), + ErrorCodes::LOGICAL_ERROR); + } + + last_read_time = Clock::now(); + + return page; +} + bool PageFile::Reader::isIdle(const Seconds & max_idle_time) { if (max_idle_time.count() == 0) diff --git a/dbms/src/Storages/Page/V2/PageFile.h b/dbms/src/Storages/Page/V2/PageFile.h index 78d4063732d..685eee05967 100644 --- a/dbms/src/Storages/Page/V2/PageFile.h +++ b/dbms/src/Storages/Page/V2/PageFile.h @@ -98,6 +98,7 @@ class PageFile : public Allocator }; using FieldReadInfos = std::vector; PageMap read(FieldReadInfos & to_read, const ReadLimiterPtr & read_limiter = nullptr); + Page read(FieldReadInfo & to_read, const ReadLimiterPtr & read_limiter = nullptr); bool isIdle(const Seconds & max_idle_time); diff --git a/dbms/src/Storages/Page/V2/PageStorage.cpp b/dbms/src/Storages/Page/V2/PageStorage.cpp index 922c8dc2f1a..32367c4e708 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.cpp +++ b/dbms/src/Storages/Page/V2/PageStorage.cpp @@ -51,6 +51,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; +extern const int NOT_IMPLEMENTED; } // namespace ErrorCodes namespace FailPoints @@ -361,13 +362,18 @@ PageId PageStorage::getMaxId(NamespaceId /*ns_id*/) return versioned_page_entries.getSnapshot("")->version()->maxId(); } -PageId PageStorage::getNormalPageIdImpl(NamespaceId /*ns_id*/, PageId page_id, SnapshotPtr snapshot) +PageId PageStorage::getNormalPageIdImpl(NamespaceId /*ns_id*/, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { snapshot = this->getSnapshot(""); } + if (!throw_on_not_exist) + { + throw Exception("Not support throw_on_not_exist on V2", ErrorCodes::NOT_IMPLEMENTED); + } + auto [is_ref_id, normal_page_id] = toConcreteSnapshot(snapshot)->version()->isRefId(page_id); return is_ref_id ? normal_page_id : page_id; } @@ -592,35 +598,58 @@ SnapshotsStatistics PageStorage::getSnapshotsStat() const return versioned_page_entries.getSnapshotsStat(); } -DB::Page PageStorage::readImpl(NamespaceId /*ns_id*/, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +size_t PageStorage::getNumberOfPages() +{ + const auto & concrete_snap = getConcreteSnapshot(); + if (concrete_snap) + { + return concrete_snap->version()->numPages(); + } + else + { + throw Exception("Can't get concrete snapshot", ErrorCodes::LOGICAL_ERROR); + } +} + +DB::Page PageStorage::readImpl(NamespaceId /*ns_id*/, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { snapshot = this->getSnapshot(""); } + if (!throw_on_not_exist) + { + throw Exception("Not support throw_on_not_exist on V2", ErrorCodes::NOT_IMPLEMENTED); + } + const auto page_entry = toConcreteSnapshot(snapshot)->version()->find(page_id); if (!page_entry) - throw Exception("Page " + DB::toString(page_id) + " not found", ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("Page {} not found", page_id), ErrorCodes::LOGICAL_ERROR); const auto file_id_level = page_entry->fileIdLevel(); PageIdAndEntries to_read = {{page_id, *page_entry}}; auto file_reader = getReader(file_id_level); return file_reader->read(to_read, read_limiter)[page_id]; } -PageMap PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +PageMap PageStorage::readImpl(NamespaceId /*ns_id*/, const PageIds & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { snapshot = this->getSnapshot(""); } + if (!throw_on_not_exist) + { + throw Exception("Not support throw_on_not_exist on V2", ErrorCodes::NOT_IMPLEMENTED); + } + std::map> file_read_infos; for (auto page_id : page_ids) { const auto page_entry = toConcreteSnapshot(snapshot)->version()->find(page_id); if (!page_entry) - throw Exception("Page " + DB::toString(page_id) + " not found", ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("Page {} not found", page_id), ErrorCodes::LOGICAL_ERROR); auto file_id_level = page_entry->fileIdLevel(); auto & [page_id_and_entries, file_reader] = file_read_infos[file_id_level]; page_id_and_entries.emplace_back(page_id, *page_entry); @@ -632,7 +661,7 @@ PageMap PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & } catch (DB::Exception & e) { - e.addMessage("(while reading Page[" + DB::toString(page_id) + "] of " + storage_name + ")"); + e.addMessage(fmt::format("(while reading Page[{}] of {})", page_id, storage_name)); throw; } } @@ -651,19 +680,24 @@ PageMap PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & return page_map; } -void PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +PageIds PageStorage::readImpl(NamespaceId /*ns_id*/, const PageIds & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { snapshot = this->getSnapshot(""); } + if (!throw_on_not_exist) + { + throw Exception("Not support throw_on_not_exist on V2", ErrorCodes::NOT_IMPLEMENTED); + } + std::map> file_read_infos; for (auto page_id : page_ids) { const auto page_entry = toConcreteSnapshot(snapshot)->version()->find(page_id); if (!page_entry) - throw Exception("Page " + DB::toString(page_id) + " not found", ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("Page {} not found", page_id), ErrorCodes::LOGICAL_ERROR); auto file_id_level = page_entry->fileIdLevel(); auto & [page_id_and_entries, file_reader] = file_read_infos[file_id_level]; page_id_and_entries.emplace_back(page_id, *page_entry); @@ -675,7 +709,7 @@ void PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & pa } catch (DB::Exception & e) { - e.addMessage("(while reading Page[" + DB::toString(page_id) + "] of " + storage_name + ")"); + e.addMessage(fmt::format("(while reading Page[{}] of {})", page_id, storage_name)); throw; } } @@ -689,21 +723,28 @@ void PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & pa reader->read(page_id_and_entries, handler, read_limiter); } + + return {}; } -PageMap PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +PageMap PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { snapshot = this->getSnapshot(""); } + if (!throw_on_not_exist) + { + throw Exception("Not support throw_on_not_exist on V2", ErrorCodes::NOT_IMPLEMENTED); + } + std::map> file_read_infos; for (const auto & [page_id, field_indices] : page_fields) { const auto page_entry = toConcreteSnapshot(snapshot)->version()->find(page_id); if (!page_entry) - throw Exception("Page " + DB::toString(page_id) + " not found", ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("Page {} not found", page_id), ErrorCodes::LOGICAL_ERROR); const auto file_id_level = page_entry->fileIdLevel(); auto & [file_reader, field_infos] = file_read_infos[file_id_level]; field_infos.emplace_back(page_id, *page_entry, field_indices); @@ -715,7 +756,7 @@ PageMap PageStorage::readImpl(NamespaceId /*ns_id*/, const std::vectorgetSnapshot(""); + } + + if (!throw_on_not_exist) + { + throw Exception("Not support throw_on_not_exist on V2", ErrorCodes::NOT_IMPLEMENTED); + } + + const PageId & page_id = page_field.first; + const auto page_entry = toConcreteSnapshot(snapshot)->version()->find(page_id); + + if (!page_entry) + throw Exception(fmt::format("Page {} not found", page_id), ErrorCodes::LOGICAL_ERROR); + const auto file_id_level = page_entry->fileIdLevel(); + + ReaderPtr file_reader; + try + { + file_reader = getReader(file_id_level); + } + catch (DB::Exception & e) + { + e.addMessage(fmt::format("(while reading Page[{}] of {})", page_id, storage_name)); + throw; + } + + PageFile::Reader::FieldReadInfo field_info(page_id, *page_entry, page_field.second); + return file_reader->read(field_info, read_limiter); +} + void PageStorage::traverseImpl(const std::function & acceptor, SnapshotPtr snapshot) { if (!snapshot) @@ -749,7 +824,7 @@ void PageStorage::traverseImpl(const std::function { const auto page_entry = concrete_snapshot->version()->find(page_id); if (unlikely(!page_entry)) - throw Exception("Page[" + DB::toString(page_id) + "] not found when traversing PageStorage", ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("Page[{}] not found when traversing PageStorage", page_id), ErrorCodes::LOGICAL_ERROR); file_and_pages[page_entry->fileIdLevel()].emplace_back(page_id); } } @@ -757,7 +832,7 @@ void PageStorage::traverseImpl(const std::function for (const auto & p : file_and_pages) { // namespace id is not used in V2, so it's value is not important here - auto pages = readImpl(MAX_NAMESPACE_ID, p.second, nullptr, snapshot); + auto pages = readImpl(MAX_NAMESPACE_ID, p.second, nullptr, snapshot, true); for (const auto & id_page : pages) { acceptor(id_page.second); diff --git a/dbms/src/Storages/Page/V2/PageStorage.h b/dbms/src/Storages/Page/V2/PageStorage.h index 01633f9a052..6cefb6407d2 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.h +++ b/dbms/src/Storages/Page/V2/PageStorage.h @@ -97,7 +97,7 @@ class PageStorage : public DB::PageStorage PageId getMaxId(NamespaceId ns_id) override; - PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot) override; + PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) override; DB::PageStorage::SnapshotPtr getSnapshot(const String & tracing_id) override; @@ -107,17 +107,21 @@ class PageStorage : public DB::PageStorage SnapshotsStatistics getSnapshotsStat() const override; + size_t getNumberOfPages() override; + void writeImpl(DB::WriteBatch && wb, const WriteLimiterPtr & write_limiter) override; DB::PageEntry getEntryImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot) override; - DB::Page readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + DB::Page readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; + + PageMap readImpl(NamespaceId ns_id, const PageIds & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; - PageMap readImpl(NamespaceId ns_id, const std::vector & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + PageIds readImpl(NamespaceId ns_id, const PageIds & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; - void readImpl(NamespaceId ns_id, const std::vector & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + PageMap readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; - PageMap readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + DB::Page readImpl(NamespaceId ns_id, const PageReadFields & page_field, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; void traverseImpl(const std::function & acceptor, SnapshotPtr snapshot) override; @@ -197,12 +201,12 @@ class PageStorage : public DB::PageStorage void write(DB::WriteBatch && wb) { return writeImpl(std::move(wb), nullptr); } DB::PageEntry getEntry(PageId page_id) { return getEntryImpl(TEST_NAMESPACE_ID, page_id, nullptr); } DB::PageEntry getEntry(PageId page_id, SnapshotPtr snapshot) { return getEntryImpl(TEST_NAMESPACE_ID, page_id, snapshot); }; - DB::Page read(PageId page_id) { return readImpl(TEST_NAMESPACE_ID, page_id, nullptr, nullptr); } - DB::Page read(PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) { return readImpl(TEST_NAMESPACE_ID, page_id, read_limiter, snapshot); } - PageMap read(const std::vector & page_ids) { return readImpl(TEST_NAMESPACE_ID, page_ids, nullptr, nullptr); } - PageMap read(const std::vector & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) { return readImpl(TEST_NAMESPACE_ID, page_ids, read_limiter, snapshot); }; - void read(const std::vector & page_ids, const PageHandler & handler) { return readImpl(TEST_NAMESPACE_ID, page_ids, handler, nullptr, nullptr); } - PageMap read(const std::vector & page_fields) { return readImpl(TEST_NAMESPACE_ID, page_fields, nullptr, nullptr); } + DB::Page read(PageId page_id) { return readImpl(TEST_NAMESPACE_ID, page_id, nullptr, nullptr, true); } + DB::Page read(PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) { return readImpl(TEST_NAMESPACE_ID, page_id, read_limiter, snapshot, true); } + PageMap read(const PageIds & page_ids) { return readImpl(TEST_NAMESPACE_ID, page_ids, nullptr, nullptr, true); } + PageMap read(const PageIds & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) { return readImpl(TEST_NAMESPACE_ID, page_ids, read_limiter, snapshot, true); }; + PageIds read(const PageIds & page_ids, const PageHandler & handler) { return readImpl(TEST_NAMESPACE_ID, page_ids, handler, nullptr, nullptr, true); } + PageMap read(const std::vector & page_fields) { return readImpl(TEST_NAMESPACE_ID, page_fields, nullptr, nullptr, true); } void traverse(const std::function & acceptor) { return traverseImpl(acceptor, nullptr); } bool gc() { return gcImpl(false, nullptr, nullptr); } #endif diff --git a/dbms/src/Storages/Page/V2/tests/gtest_page_storage_multi_writers.cpp b/dbms/src/Storages/Page/V2/tests/gtest_page_storage_multi_writers.cpp index f6dd2b0ccf9..8d688538b3e 100644 --- a/dbms/src/Storages/Page/V2/tests/gtest_page_storage_multi_writers.cpp +++ b/dbms/src/Storages/Page/V2/tests/gtest_page_storage_multi_writers.cpp @@ -224,7 +224,7 @@ class PSReader : public Poco::Runnable LOG_TRACE(&Poco::Logger::get("root"), e.displayText()); } #else - std::vector pageIds; + PageIds pageIds; for (size_t i = 0; i < 5; ++i) { pageIds.emplace_back(random() % ctx.MAX_PAGE_ID); diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index c98c368be80..aef4e9e1922 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -747,11 +747,13 @@ PageIDAndEntryV3 PageDirectory::get(PageIdV3Internal page_id, const PageDirector throw Exception(fmt::format("Fail to get entry [page_id={}] [seq={}] [resolve_id={}] [resolve_ver={}]", page_id, snap->sequence, id_to_resolve, ver_to_resolve), ErrorCodes::PS_ENTRY_NO_VALID_VERSION); } -PageIDAndEntriesV3 PageDirectory::get(const PageIdV3Internals & page_ids, const PageDirectorySnapshotPtr & snap) const +std::pair PageDirectory::get(const PageIdV3Internals & page_ids, const PageDirectorySnapshotPtr & snap, bool throw_on_not_exist) const { PageEntryV3 entry_got; + PageIds page_not_found = {}; + const PageVersionType init_ver_to_resolve(snap->sequence, 0); - auto get_one = [&entry_got, init_ver_to_resolve, this](PageIdV3Internal page_id, PageVersionType ver_to_resolve, size_t idx) { + auto get_one = [&entry_got, init_ver_to_resolve, throw_on_not_exist, this](PageIdV3Internal page_id, PageVersionType ver_to_resolve, size_t idx) { PageIdV3Internal id_to_resolve = page_id; bool ok = true; while (ok) @@ -762,7 +764,14 @@ PageIDAndEntriesV3 PageDirectory::get(const PageIdV3Internals & page_ids, const iter = mvcc_table_directory.find(id_to_resolve); if (iter == mvcc_table_directory.end()) { - throw Exception(fmt::format("Invalid page id, entry not exist [page_id={}] [resolve_id={}]", page_id, id_to_resolve), ErrorCodes::PS_ENTRY_NOT_EXISTS); + if (throw_on_not_exist) + { + throw Exception(fmt::format("Invalid page id, entry not exist [page_id={}] [resolve_id={}]", page_id, id_to_resolve), ErrorCodes::PS_ENTRY_NOT_EXISTS); + } + else + { + return false; + } } } auto [need_collapse, next_id_to_resolve, next_ver_to_resolve] = iter->second->resolveToPageId(ver_to_resolve.sequence, id_to_resolve != page_id, &entry_got); @@ -794,17 +803,21 @@ PageIDAndEntriesV3 PageDirectory::get(const PageIdV3Internals & page_ids, const { id_entries.emplace_back(page_ids[idx], entry_got); } + else + { + page_not_found.emplace_back(page_ids[idx]); + } } - return id_entries; + return std::make_pair(id_entries, page_not_found); } -PageIdV3Internal PageDirectory::getNormalPageId(PageIdV3Internal page_id, const PageDirectorySnapshotPtr & snap) const +PageIdV3Internal PageDirectory::getNormalPageId(PageIdV3Internal page_id, const PageDirectorySnapshotPtr & snap, bool throw_on_not_exist) const { PageIdV3Internal id_to_resolve = page_id; PageVersionType ver_to_resolve(snap->sequence, 0); - bool ok = true; - while (ok) + bool keep_resolve = true; + while (keep_resolve) { MVCCMapType::const_iterator iter; { @@ -812,7 +825,14 @@ PageIdV3Internal PageDirectory::getNormalPageId(PageIdV3Internal page_id, const iter = mvcc_table_directory.find(id_to_resolve); if (iter == mvcc_table_directory.end()) { - throw Exception(fmt::format("Invalid page id [page_id={}] [resolve_id={}]", page_id, id_to_resolve)); + if (throw_on_not_exist) + { + throw Exception(fmt::format("Invalid page id [page_id={}] [resolve_id={}]", page_id, id_to_resolve)); + } + else + { + return PageIdV3Internal(0, INVALID_PAGE_ID); + } } } auto [need_collapse, next_id_to_resolve, next_ver_to_resolve] = iter->second->resolveToPageId(ver_to_resolve.sequence, id_to_resolve != page_id, nullptr); @@ -821,12 +841,14 @@ PageIdV3Internal PageDirectory::getNormalPageId(PageIdV3Internal page_id, const case VersionedPageEntries::RESOLVE_TO_NORMAL: return id_to_resolve; case VersionedPageEntries::RESOLVE_FAIL: - ok = false; + // resolve failed + keep_resolve = false; break; case VersionedPageEntries::RESOLVE_TO_REF: if (id_to_resolve == next_id_to_resolve) { - ok = false; + // dead-loop, so break the `while(keep_resolve)` + keep_resolve = false; break; } id_to_resolve = next_id_to_resolve; @@ -834,12 +856,20 @@ PageIdV3Internal PageDirectory::getNormalPageId(PageIdV3Internal page_id, const break; // continue the resolving } } - throw Exception(fmt::format( - "fail to get normal id [page_id={}] [seq={}] [resolve_id={}] [resolve_ver={}]", - page_id, - snap->sequence, - id_to_resolve, - ver_to_resolve)); + + if (throw_on_not_exist) + { + throw Exception(fmt::format( + "fail to get normal id [page_id={}] [seq={}] [resolve_id={}] [resolve_ver={}]", + page_id, + snap->sequence, + id_to_resolve, + ver_to_resolve)); + } + else + { + return PageIdV3Internal(0, INVALID_PAGE_ID); + } } PageId PageDirectory::getMaxId(NamespaceId ns_id) const diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index b4b12a0d075..8263f5ef5bb 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -295,23 +295,27 @@ class PageDirectory PageIDAndEntryV3 get(PageIdV3Internal page_id, const PageDirectorySnapshotPtr & snap, bool throw_on_not_exist = true) const; PageIDAndEntryV3 get(PageIdV3Internal page_id, const DB::PageStorageSnapshotPtr & snap) const { - return get(page_id, toConcreteSnapshot(snap)); + return get(page_id, toConcreteSnapshot(snap), /*throw_on_not_exist=*/true); } PageIDAndEntryV3 getOrNull(PageIdV3Internal page_id, const DB::PageStorageSnapshotPtr & snap) const { return get(page_id, toConcreteSnapshot(snap), /*throw_on_not_exist=*/false); } - PageIDAndEntriesV3 get(const PageIdV3Internals & page_ids, const PageDirectorySnapshotPtr & snap) const; + std::pair get(const PageIdV3Internals & page_ids, const PageDirectorySnapshotPtr & snap, bool throw_on_not_exist = true) const; PageIDAndEntriesV3 get(const PageIdV3Internals & page_ids, const DB::PageStorageSnapshotPtr & snap) const { - return get(page_ids, toConcreteSnapshot(snap)); + return std::get<0>(get(page_ids, toConcreteSnapshot(snap), /*throw_on_not_exist=*/true)); + } + std::pair getOrNull(PageIdV3Internals page_ids, const DB::PageStorageSnapshotPtr & snap) const + { + return get(page_ids, toConcreteSnapshot(snap), /*throw_on_not_exist=*/false); } - PageIdV3Internal getNormalPageId(PageIdV3Internal page_id, const PageDirectorySnapshotPtr & snap) const; - PageIdV3Internal getNormalPageId(PageIdV3Internal page_id, const DB::PageStorageSnapshotPtr & snap) const + PageIdV3Internal getNormalPageId(PageIdV3Internal page_id, const PageDirectorySnapshotPtr & snap, bool throw_on_not_exist) const; + PageIdV3Internal getNormalPageId(PageIdV3Internal page_id, const DB::PageStorageSnapshotPtr & snap, bool throw_on_not_exist) const { - return getNormalPageId(page_id, toConcreteSnapshot(snap)); + return getNormalPageId(page_id, toConcreteSnapshot(snap), throw_on_not_exist); } #ifndef NDEBUG // Just for tests, refactor them out later @@ -325,11 +329,11 @@ class PageDirectory } PageIdV3Internal getNormalPageId(PageId page_id, const PageDirectorySnapshotPtr & snap) const { - return getNormalPageId(buildV3Id(TEST_NAMESPACE_ID, page_id), snap); + return getNormalPageId(buildV3Id(TEST_NAMESPACE_ID, page_id), snap, /*throw_on_not_exist*/ true); } PageIdV3Internal getNormalPageId(PageId page_id, const DB::PageStorageSnapshotPtr & snap) const { - return getNormalPageId(buildV3Id(TEST_NAMESPACE_ID, page_id), toConcreteSnapshot(snap)); + return getNormalPageId(buildV3Id(TEST_NAMESPACE_ID, page_id), toConcreteSnapshot(snap), /*throw_on_not_exist*/ true); } #endif diff --git a/dbms/src/Storages/Page/V3/PageEntry.h b/dbms/src/Storages/Page/V3/PageEntry.h index d8ed1b6e9a5..bbc3915647c 100644 --- a/dbms/src/Storages/Page/V3/PageEntry.h +++ b/dbms/src/Storages/Page/V3/PageEntry.h @@ -40,6 +40,8 @@ struct PageEntryV3 PageFieldOffsetChecksums field_offsets{}; public: + inline bool isValid() const { return file_id != INVALID_BLOBFILE_ID; } + size_t getFieldSize(size_t index) const { if (unlikely(index >= field_offsets.size())) diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index a7fb9374a1c..bb48f18c658 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -66,14 +66,14 @@ PageId PageStorageImpl::getMaxId(NamespaceId ns_id) return page_directory->getMaxId(ns_id); } -PageId PageStorageImpl::getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot) +PageId PageStorageImpl::getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { snapshot = this->getSnapshot(""); } - return page_directory->getNormalPageId(buildV3Id(ns_id, page_id), snapshot).low; + return page_directory->getNormalPageId(buildV3Id(ns_id, page_id), snapshot, throw_on_not_exist).low; } DB::PageStorage::SnapshotPtr PageStorageImpl::getSnapshot(const String & tracing_id) @@ -86,6 +86,11 @@ SnapshotsStatistics PageStorageImpl::getSnapshotsStat() const return page_directory->getSnapshotsStat(); } +size_t PageStorageImpl::getNumberOfPages() +{ + return page_directory->numPages(); +} + void PageStorageImpl::writeImpl(DB::WriteBatch && write_batch, const WriteLimiterPtr & write_limiter) { if (unlikely(write_batch.empty())) @@ -126,18 +131,25 @@ DB::PageEntry PageStorageImpl::getEntryImpl(NamespaceId ns_id, PageId page_id, S } } -DB::Page PageStorageImpl::readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +DB::Page PageStorageImpl::readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { snapshot = this->getSnapshot(""); } - auto page_entry = page_directory->get(buildV3Id(ns_id, page_id), snapshot); + auto page_entry = throw_on_not_exist ? page_directory->get(buildV3Id(ns_id, page_id), snapshot) : page_directory->getOrNull(buildV3Id(ns_id, page_id), snapshot); + if (!page_entry.second.isValid()) + { + Page page_not_found; + page_not_found.page_id = INVALID_PAGE_ID; + return page_not_found; + } + return blob_store.read(page_entry, read_limiter); } -PageMap PageStorageImpl::readImpl(NamespaceId ns_id, const std::vector & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +PageMap PageStorageImpl::readImpl(NamespaceId ns_id, const PageIds & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { @@ -147,11 +159,27 @@ PageMap PageStorageImpl::readImpl(NamespaceId ns_id, const std::vector & PageIdV3Internals page_id_v3s; for (auto p_id : page_ids) page_id_v3s.emplace_back(buildV3Id(ns_id, p_id)); - auto page_entries = page_directory->get(page_id_v3s, snapshot); - return blob_store.read(page_entries, read_limiter); + + if (throw_on_not_exist) + { + auto page_entries = page_directory->get(page_id_v3s, snapshot); + return blob_store.read(page_entries, read_limiter); + } + else + { + auto [page_entries, page_ids_not_found] = page_directory->getOrNull(page_id_v3s, snapshot); + auto page_map = blob_store.read(page_entries, read_limiter); + for (const auto & page_id_not_found : page_ids_not_found) + { + Page page_not_found; + page_not_found.page_id = INVALID_PAGE_ID; + page_map[page_id_not_found] = page_not_found; + } + return page_map; + } } -void PageStorageImpl::readImpl(NamespaceId ns_id, const std::vector & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +PageIds PageStorageImpl::readImpl(NamespaceId ns_id, const PageIds & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { @@ -161,11 +189,22 @@ void PageStorageImpl::readImpl(NamespaceId ns_id, const std::vector & pa PageIdV3Internals page_id_v3s; for (auto p_id : page_ids) page_id_v3s.emplace_back(buildV3Id(ns_id, p_id)); - auto page_entries = page_directory->get(page_id_v3s, snapshot); - blob_store.read(page_entries, handler, read_limiter); + + if (throw_on_not_exist) + { + auto page_entries = page_directory->get(page_id_v3s, snapshot); + blob_store.read(page_entries, handler, read_limiter); + return {}; + } + else + { + auto [page_entries, page_ids_not_found] = page_directory->getOrNull(page_id_v3s, snapshot); + blob_store.read(page_entries, handler, read_limiter); + return page_ids_not_found; + } } -PageMap PageStorageImpl::readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) +PageMap PageStorageImpl::readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) { @@ -173,15 +212,34 @@ PageMap PageStorageImpl::readImpl(NamespaceId ns_id, const std::vectorget(buildV3Id(ns_id, page_id), snapshot); - (void)id; - auto info = BlobStore::FieldReadInfo(buildV3Id(ns_id, page_id), entry, field_indices); - read_infos.emplace_back(info); + const auto & [id, entry] = throw_on_not_exist ? page_directory->get(buildV3Id(ns_id, page_id), snapshot) : page_directory->getOrNull(buildV3Id(ns_id, page_id), snapshot); + if (entry.isValid()) + { + auto info = BlobStore::FieldReadInfo(buildV3Id(ns_id, page_id), entry, field_indices); + read_infos.emplace_back(info); + } + else + { + page_ids_not_found.emplace_back(id); + } } - return blob_store.read(read_infos, read_limiter); + auto page_map = blob_store.read(read_infos, read_limiter); + for (const auto & page_id_not_found : page_ids_not_found) + { + Page page_not_found; + page_not_found.page_id = INVALID_PAGE_ID; + page_map[page_id_not_found] = page_not_found; + } + return page_map; +} + +Page PageStorageImpl::readImpl(NamespaceId /*ns_id*/, const PageReadFields & /*page_field*/, const ReadLimiterPtr & /*read_limiter*/, SnapshotPtr /*snapshot*/, bool /*throw_on_not_exist*/) +{ + throw Exception("Not support read single filed on V3", ErrorCodes::NOT_IMPLEMENTED); } void PageStorageImpl::traverseImpl(const std::function & acceptor, SnapshotPtr snapshot) diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index 10c35d6ab2b..272cbf73a7d 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -65,23 +65,27 @@ class PageStorageImpl : public DB::PageStorage PageId getMaxId(NamespaceId ns_id) override; - PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot) override; + PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) override; DB::PageStorage::SnapshotPtr getSnapshot(const String & tracing_id) override; SnapshotsStatistics getSnapshotsStat() const override; + size_t getNumberOfPages() override; + void writeImpl(DB::WriteBatch && write_batch, const WriteLimiterPtr & write_limiter) override; DB::PageEntry getEntryImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot) override; - DB::Page readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + DB::Page readImpl(NamespaceId ns_id, PageId page_id, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; + + PageMap readImpl(NamespaceId ns_id, const PageIds & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; - PageMap readImpl(NamespaceId ns_id, const std::vector & page_ids, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + PageIds readImpl(NamespaceId ns_id, const PageIds & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; - void readImpl(NamespaceId ns_id, const std::vector & page_ids, const PageHandler & handler, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + PageMap readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; - PageMap readImpl(NamespaceId ns_id, const std::vector & page_fields, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot) override; + Page readImpl(NamespaceId ns_id, const PageReadFields & page_field, const ReadLimiterPtr & read_limiter, SnapshotPtr snapshot, bool throw_on_not_exist) override; void traverseImpl(const std::function & acceptor, SnapshotPtr snapshot) override; @@ -99,10 +103,10 @@ class PageStorageImpl : public DB::PageStorage // Just for tests, refactor them out later DB::PageStorage::SnapshotPtr getSnapshot() { return getSnapshot(""); } DB::PageEntry getEntry(PageId page_id) { return getEntryImpl(TEST_NAMESPACE_ID, page_id, nullptr); } - DB::Page read(PageId page_id) { return readImpl(TEST_NAMESPACE_ID, page_id, nullptr, nullptr); } - PageMap read(const std::vector & page_ids) { return readImpl(TEST_NAMESPACE_ID, page_ids, nullptr, nullptr); } - void read(const std::vector & page_ids, const PageHandler & handler) { return readImpl(TEST_NAMESPACE_ID, page_ids, handler, nullptr, nullptr); } - PageMap read(const std::vector & page_fields) { return readImpl(TEST_NAMESPACE_ID, page_fields, nullptr, nullptr); } + DB::Page read(PageId page_id) { return readImpl(TEST_NAMESPACE_ID, page_id, nullptr, nullptr, true); } + PageMap read(const PageIds & page_ids) { return readImpl(TEST_NAMESPACE_ID, page_ids, nullptr, nullptr, true); } + PageIds read(const PageIds & page_ids, const PageHandler & handler) { return readImpl(TEST_NAMESPACE_ID, page_ids, handler, nullptr, nullptr, true); } + PageMap read(const std::vector & page_fields) { return readImpl(TEST_NAMESPACE_ID, page_fields, nullptr, nullptr, true); } #endif friend class PageDirectoryFactory; diff --git a/dbms/src/Storages/Page/V3/tests/entries_helper.h b/dbms/src/Storages/Page/V3/tests/entries_helper.h index c7277624987..cce59919ec8 100644 --- a/dbms/src/Storages/Page/V3/tests/entries_helper.h +++ b/dbms/src/Storages/Page/V3/tests/entries_helper.h @@ -185,7 +185,8 @@ inline ::testing::AssertionResult getEntriesCompare( String error; try { - auto id_entries = dir->get(page_ids, snap); + auto [id_entries, page_ids_not_found] = dir->get(page_ids, snap); + (void)page_ids_not_found; return check_id_entries(expected_entries, id_entries); } catch (DB::Exception & ex) @@ -258,13 +259,14 @@ inline ::testing::AssertionResult getEntriesNotExist( String error; try { - auto id_entry = dir->get(page_ids, snap); + auto [id_entries, page_ids_not_found] = dir->get(page_ids, snap); + (void)page_ids_not_found; error = fmt::format( "Expect entry [id={}] from {} with snap{} not exist, but got {}", page_ids_expr, dir_expr, snap_expr, - toString(id_entry)); + toString(id_entries)); } catch (DB::Exception & ex) { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index ee23f244725..2b4978bd13b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -122,6 +122,70 @@ try } CATCH +TEST_F(PageStorageTest, readNotThrowOnNotFound) +try +{ + const size_t buf_sz = 100; + char c_buff[buf_sz] = {0}; + + { + const auto & page = page_storage->readImpl(TEST_NAMESPACE_ID, 1, nullptr, nullptr, false); + ASSERT_FALSE(page.isValid()); + } + + { + WriteBatch batch; + batch.putPage(1, 0, std::make_shared(c_buff, buf_sz), buf_sz); + batch.putPage(3, 0, std::make_shared(c_buff, buf_sz), buf_sz); + batch.putPage(4, 0, std::make_shared(c_buff, buf_sz), buf_sz, {20, 20, 30, 30}); + page_storage->write(std::move(batch)); + } + + { + PageIds page_ids = {1, 2, 5}; + // readImpl(TEST_NAMESPACE_ID, page_ids, nullptr, nullptr, true); + auto page_maps = page_storage->readImpl(TEST_NAMESPACE_ID, page_ids, nullptr, nullptr, false); + ASSERT_EQ(page_maps[1].page_id, 1); + ASSERT_FALSE(page_maps[2].isValid()); + ASSERT_FALSE(page_maps[5].isValid()); + + const auto & page1 = page_storage->readImpl(TEST_NAMESPACE_ID, 1, nullptr, nullptr, false); + ASSERT_EQ(page1.page_id, 1); + + const auto & page2 = page_storage->readImpl(TEST_NAMESPACE_ID, 2, nullptr, nullptr, false); + ASSERT_FALSE(page2.isValid()); + + std::vector fields; + PageStorage::PageReadFields field1; + field1.first = 4; + field1.second = {0, 1, 2}; + fields.emplace_back(field1); + + PageStorage::PageReadFields field2; + field2.first = 6; + field2.second = {0, 1, 2}; + fields.emplace_back(field2); + + page_maps = page_storage->readImpl(TEST_NAMESPACE_ID, fields, nullptr, nullptr, false); + ASSERT_EQ(page_maps[4].page_id, 4); + ASSERT_FALSE(page_maps[6].isValid()); + + PageIds page_ids_not_found = page_storage->readImpl( + TEST_NAMESPACE_ID, + page_ids, + [](PageId /*page_id*/, const Page & /*page*/) {}, + nullptr, + nullptr, + false); + + std::sort(page_ids_not_found.begin(), page_ids_not_found.end()); + ASSERT_EQ(page_ids_not_found.size(), 2); + ASSERT_EQ(page_ids_not_found[0], 2); + ASSERT_EQ(page_ids_not_found[1], 5); + } +} +CATCH + TEST_F(PageStorageTest, WriteMultipleBatchRead1) try { @@ -670,7 +734,7 @@ TEST_F(PageStorageWith2PagesTest, PutDuplicateRefPages) WriteBatch batch2; batch2.putRefPage(3, 1); - page_storage->write(std::move(batch)); + page_storage->write(std::move(batch2)); // now Page1's entry has ref count == 2 but not 3 } PageEntry entry1 = page_storage->getEntry(1); @@ -717,7 +781,7 @@ TEST_F(PageStorageWith2PagesTest, PutCollapseDuplicatedRefPages) WriteBatch batch2; // RefPage4 -> Page1, duplicated due to ref-path-collapse batch2.putRefPage(4, 1); - page_storage->write(std::move(batch)); + page_storage->write(std::move(batch2)); // now Page1's entry has ref count == 3 but not 2 } From fd367d46e2487fc37e654f3c5597ce4d89099260 Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Thu, 5 May 2022 11:48:55 +0800 Subject: [PATCH 56/79] refactor: Remove unused FunctionIPv6NumToString from ClickHouse. (#4797) ref pingcap/tiflash#4640 --- dbms/src/Functions/FunctionsCoding.cpp | 1 - dbms/src/Functions/FunctionsCoding.h | 74 -------------------------- 2 files changed, 75 deletions(-) diff --git a/dbms/src/Functions/FunctionsCoding.cpp b/dbms/src/Functions/FunctionsCoding.cpp index 918a71693a0..01c821aa1e1 100644 --- a/dbms/src/Functions/FunctionsCoding.cpp +++ b/dbms/src/Functions/FunctionsCoding.cpp @@ -30,7 +30,6 @@ struct NameFunctionIPv4NumToStringClassC void registerFunctionsCoding(FunctionFactory & factory) { factory.registerFunction(); - factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction>(); diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h index 1214605e90c..33891f0c014 100644 --- a/dbms/src/Functions/FunctionsCoding.h +++ b/dbms/src/Functions/FunctionsCoding.h @@ -131,80 +131,6 @@ void formatIP(UInt32 ip, char *& out) ++out; } -class FunctionIPv6NumToString : public IFunction -{ -public: - static constexpr auto name = "IPv6NumToString"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const Block &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto * ptr = checkAndGetDataType(arguments[0].get()); - if (!ptr || ptr->getN() != ipv6_bytes_length) - throw Exception( - fmt::format("Illegal type {} of argument of function {}, expected FixedString({})", - arguments[0]->getName(), - getName(), - ipv6_bytes_length), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) const override - { - const auto & col_type_name = block.getByPosition(arguments[0]); - const ColumnPtr & column = col_type_name.column; - - if (const auto * col_in = checkAndGetColumn(column.get())) - { - if (col_in->getN() != ipv6_bytes_length) - throw Exception( - fmt::format("Illegal type {} of column {} argument of function {}, expected FixedString({})", - col_type_name.type->getName(), - col_in->getName(), - getName(), - ipv6_bytes_length), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto size = col_in->size(); - const auto & vec_in = col_in->getChars(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars_t & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1)); - offsets_res.resize(size); - - auto * begin = reinterpret_cast(&vec_res[0]); - auto * pos = begin; - - for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_bytes_length, ++i) - { - formatIPv6(&vec_in[offset], pos); - offsets_res[i] = pos - begin; - } - - vec_res.resize(pos - begin); - - block.getByPosition(result).column = std::move(col_res); - } - else - throw Exception( - fmt::format("Illegal column {} of argument of function {}", block.getByPosition(arguments[0]).column->getName(), getName()), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - class FunctionTiDBIPv6NumToString : public IFunction { private: From 702b7d17ecbb940bbcb49d633983b7c951c84c8f Mon Sep 17 00:00:00 2001 From: ds Date: Thu, 5 May 2022 14:36:55 +0800 Subject: [PATCH 57/79] Not moving local object in return statement (#4815) close pingcap/tiflash#4824 --- dbms/src/Common/DynamicThreadPool.h | 2 +- dbms/src/Common/packTask.h | 2 +- dbms/src/Common/wrapInvocable.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/DynamicThreadPool.h b/dbms/src/Common/DynamicThreadPool.h index 54f232ae8b0..efd3130ba92 100644 --- a/dbms/src/Common/DynamicThreadPool.h +++ b/dbms/src/Common/DynamicThreadPool.h @@ -56,7 +56,7 @@ class DynamicThreadPool auto task = packTask(propagate_memory_tracker, std::forward(func), std::forward(args)...); auto future = task.get_future(); scheduleTask(std::make_unique>(std::move(task))); - return std::move(future); + return future; } // wrap func into a lambda and users can't get the status of execution. diff --git a/dbms/src/Common/packTask.h b/dbms/src/Common/packTask.h index de3057f59ac..80cdbcf007b 100644 --- a/dbms/src/Common/packTask.h +++ b/dbms/src/Common/packTask.h @@ -31,6 +31,6 @@ inline auto packTask(bool propagate_memory_tracker, Func && func, Args &&... arg using PackagedTask = std::packaged_task; PackagedTask task{std::move(capture)}; - return std::move(task); + return task; } } // namespace DB diff --git a/dbms/src/Common/wrapInvocable.h b/dbms/src/Common/wrapInvocable.h index eca1d390538..d6cee519835 100644 --- a/dbms/src/Common/wrapInvocable.h +++ b/dbms/src/Common/wrapInvocable.h @@ -36,6 +36,6 @@ inline auto wrapInvocable(bool propagate_memory_tracker, Func && func, Args &&.. return std::apply(std::move(func), std::move(args)); }; - return std::move(capture); + return capture; } } // namespace DB From 5aa1d147736ad8ce271e74e5e7a8926939c8cb77 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Thu, 5 May 2022 19:44:56 +0800 Subject: [PATCH 58/79] update TiFlash proxy to disable raft-engine (#4822) close pingcap/tiflash#4821 --- contrib/tiflash-proxy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index cb1f8d04fb3..a03434d3758 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit cb1f8d04fb31a49be60f4c67afb3d4aee58fa2a7 +Subproject commit a03434d3758bee4fa335ce87da5f772eebe8f9cc From e10c6eded7de50fee020be7b883b4e56fe5990a3 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Thu, 5 May 2022 20:40:55 +0800 Subject: [PATCH 59/79] Migrate table scan related code to `DAGStorageInterpreter` (#4783) ref pingcap/tiflash#4118 --- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 298 +------------- .../Coprocessor/DAGQueryBlockInterpreter.h | 10 - .../Coprocessor/DAGStorageInterpreter.cpp | 379 ++++++++++++++++-- .../Flash/Coprocessor/DAGStorageInterpreter.h | 41 +- .../Flash/Coprocessor/InterpreterUtils.cpp | 14 + dbms/src/Flash/Coprocessor/InterpreterUtils.h | 8 + dbms/src/Flash/Coprocessor/PushDownFilter.cpp | 65 +++ dbms/src/Flash/Coprocessor/PushDownFilter.h | 39 ++ dbms/src/Flash/Coprocessor/RemoteRequest.cpp | 19 +- dbms/src/Flash/Coprocessor/RemoteRequest.h | 14 +- dbms/src/Flash/Coprocessor/TiDBTableScan.cpp | 6 +- dbms/src/Flash/Coprocessor/TiDBTableScan.h | 8 +- 12 files changed, 529 insertions(+), 372 deletions(-) create mode 100644 dbms/src/Flash/Coprocessor/PushDownFilter.cpp create mode 100644 dbms/src/Flash/Coprocessor/PushDownFilter.h diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 561011e2d95..45a3c1e9471 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -39,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +52,6 @@ namespace DB { namespace FailPoints { -extern const char pause_after_copr_streams_acquired[]; extern const char minimum_block_size_for_cross_join[]; } // namespace FailPoints @@ -88,23 +87,6 @@ struct AnalysisResult bool is_final_agg; }; -// add timezone cast for timestamp type, this is used to support session level timezone -bool addExtraCastsAfterTs( - DAGExpressionAnalyzer & analyzer, - const std::vector & need_cast_column, - ExpressionActionsChain & chain, - const TiDBTableScan & table_scan) -{ - bool has_need_cast_column = false; - for (auto b : need_cast_column) - { - has_need_cast_column |= (b != ExtraCastAfterTSMode::None); - } - if (!has_need_cast_column) - return false; - return analyzer.appendExtraCastsAfterTS(chain, need_cast_column, table_scan); -} - AnalysisResult analyzeExpressions( Context & context, DAGExpressionAnalyzer & analyzer, @@ -169,223 +151,16 @@ AnalysisResult analyzeExpressions( //todo need call prependProjectInput?? return res; } - -void setQuotaAndLimitsOnTableScan(Context & context, DAGPipeline & pipeline) -{ - const Settings & settings = context.getSettingsRef(); - - IProfilingBlockInputStream::LocalLimits limits; - limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; - limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); - limits.max_execution_time = settings.max_execution_time; - limits.timeout_overflow_mode = settings.timeout_overflow_mode; - - /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, - * because the initiating server has a summary of the execution of the request on all servers. - * - * But limits on data size to read and maximum execution time are reasonable to check both on initiator and - * additionally on each remote server, because these limits are checked per block of data processed, - * and remote servers may process way more blocks of data than are received by initiator. - */ - limits.min_execution_speed = settings.min_execution_speed; - limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; - - QuotaForIntervals & quota = context.getQuota(); - - pipeline.transform([&](auto & stream) { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) - { - p_stream->setLimits(limits); - p_stream->setQuota(quota); - } - }); -} - } // namespace -ExpressionActionsPtr generateProjectExpressionActions( - const BlockInputStreamPtr & stream, - const Context & context, - const NamesWithAliases & project_cols) -{ - auto columns = stream->getHeader(); - NamesAndTypesList input_column; - for (const auto & column : columns.getColumnsWithTypeAndName()) - { - input_column.emplace_back(column.name, column.type); - } - ExpressionActionsPtr project = std::make_shared(input_column, context.getSettingsRef()); - project->add(ExpressionAction::project(project_cols)); - return project; -} - void DAGQueryBlockInterpreter::handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline) { - bool has_region_to_read = false; - for (const auto physical_table_id : table_scan.getPhysicalTableIDs()) - { - const auto & table_regions_info = dagContext().getTableRegionsInfoByTableID(physical_table_id); - if (!table_regions_info.local_regions.empty() || !table_regions_info.remote_regions.empty()) - { - has_region_to_read = true; - break; - } - } - if (!has_region_to_read) - throw TiFlashException(fmt::format("Dag Request does not have region to read for table: {}", table_scan.getLogicalTableID()), Errors::Coprocessor::BadRequest); - // construct pushed down filter conditions. - std::vector conditions; - if (query_block.selection) - { - for (const auto & condition : query_block.selection->selection().conditions()) - conditions.push_back(&condition); - } + const auto push_down_filter = PushDownFilter::toPushDownFilter(query_block.selection); - DAGStorageInterpreter storage_interpreter(context, query_block, table_scan, conditions, max_streams); + DAGStorageInterpreter storage_interpreter(context, table_scan, push_down_filter, max_streams); storage_interpreter.execute(pipeline); analyzer = std::move(storage_interpreter.analyzer); - - - auto remote_requests = std::move(storage_interpreter.remote_requests); - auto null_stream_if_empty = std::move(storage_interpreter.null_stream_if_empty); - - // It is impossible to have no joined stream. - assert(pipeline.streams_with_non_joined_data.empty()); - // after executeRemoteQueryImpl, remote read stream will be appended in pipeline.streams. - size_t remote_read_streams_start_index = pipeline.streams.size(); - - // For those regions which are not presented in this tiflash node, we will try to fetch streams by key ranges from other tiflash nodes, only happens in batch cop / mpp mode. - if (!remote_requests.empty()) - executeRemoteQueryImpl(pipeline, remote_requests); - - /// record local and remote io input stream - auto & table_scan_io_input_streams = dagContext().getInBoundIOInputStreamsMap()[query_block.source_name]; - pipeline.transform([&](auto & stream) { table_scan_io_input_streams.push_back(stream); }); - - if (pipeline.streams.empty()) - { - pipeline.streams.emplace_back(null_stream_if_empty); - // reset remote_read_streams_start_index for null_stream_if_empty. - remote_read_streams_start_index = 1; - } - - /// Theoretically we could move addTableLock to DAGStorageInterpreter, but we don't wants to the table to be dropped - /// during the lifetime of this query, and sometimes if there is no local region, we will use the RemoteBlockInputStream - /// or even the null_stream to hold the lock, so I would like too keep the addTableLock in DAGQueryBlockInterpreter - pipeline.transform([&](auto & stream) { - // todo do not need to hold all locks in each stream, if the stream is reading from table a - // it only needs to hold the lock of table a - for (auto & lock : storage_interpreter.drop_locks) - stream->addTableLock(lock); - }); - - /// Set the limits and quota for reading data, the speed and time of the query. - setQuotaAndLimitsOnTableScan(context, pipeline); - FAIL_POINT_PAUSE(FailPoints::pause_after_copr_streams_acquired); - - /// handle timezone/duration cast for local and remote table scan. - executeCastAfterTableScan( - table_scan, - storage_interpreter.is_need_add_cast_column, - remote_read_streams_start_index, - pipeline); - recordProfileStreams(pipeline, query_block.source_name); - - /// handle pushed down filter for local and remote table scan. - if (query_block.selection) - { - executePushedDownFilter(conditions, remote_read_streams_start_index, pipeline); - recordProfileStreams(pipeline, query_block.selection_name); - } -} - -void DAGQueryBlockInterpreter::executePushedDownFilter( - const std::vector & conditions, - size_t remote_read_streams_start_index, - DAGPipeline & pipeline) -{ - ExpressionActionsChain chain; - analyzer->initChain(chain, analyzer->getCurrentInputColumns()); - String filter_column_name = analyzer->appendWhere(chain, conditions); - ExpressionActionsPtr before_where = chain.getLastActions(); - chain.addStep(); - - // remove useless tmp column and keep the schema of local streams and remote streams the same. - NamesWithAliases project_cols; - for (const auto & col : analyzer->getCurrentInputColumns()) - { - chain.getLastStep().required_output.push_back(col.name); - project_cols.emplace_back(col.name, col.name); - } - chain.getLastActions()->add(ExpressionAction::project(project_cols)); - ExpressionActionsPtr project_after_where = chain.getLastActions(); - chain.finalize(); - chain.clear(); - - assert(pipeline.streams_with_non_joined_data.empty()); - assert(remote_read_streams_start_index <= pipeline.streams.size()); - // for remote read, filter had been pushed down, don't need to execute again. - for (size_t i = 0; i < remote_read_streams_start_index; ++i) - { - auto & stream = pipeline.streams[i]; - stream = std::make_shared(stream, before_where, filter_column_name, log->identifier()); - // after filter, do project action to keep the schema of local streams and remote streams the same. - stream = std::make_shared(stream, project_after_where, log->identifier()); - } -} - -void DAGQueryBlockInterpreter::executeCastAfterTableScan( - const TiDBTableScan & table_scan, - const std::vector & is_need_add_cast_column, - size_t remote_read_streams_start_index, - DAGPipeline & pipeline) -{ - auto original_source_columns = analyzer->getCurrentInputColumns(); - - ExpressionActionsChain chain; - analyzer->initChain(chain, original_source_columns); - - // execute timezone cast or duration cast if needed for local table scan - if (addExtraCastsAfterTs(*analyzer, is_need_add_cast_column, chain, table_scan)) - { - ExpressionActionsPtr extra_cast = chain.getLastActions(); - chain.finalize(); - chain.clear(); - - // After `addExtraCastsAfterTs`, analyzer->getCurrentInputColumns() has been modified. - // For remote read, `timezone cast and duration cast` had been pushed down, don't need to execute cast expressions. - // To keep the schema of local read streams and remote read streams the same, do project action for remote read streams. - NamesWithAliases project_for_remote_read; - const auto & after_cast_source_columns = analyzer->getCurrentInputColumns(); - for (size_t i = 0; i < after_cast_source_columns.size(); ++i) - { - project_for_remote_read.emplace_back(original_source_columns[i].name, after_cast_source_columns[i].name); - } - assert(!project_for_remote_read.empty()); - assert(pipeline.streams_with_non_joined_data.empty()); - assert(remote_read_streams_start_index <= pipeline.streams.size()); - size_t i = 0; - // local streams - while (i < remote_read_streams_start_index) - { - auto & stream = pipeline.streams[i++]; - stream = std::make_shared(stream, extra_cast, log->identifier()); - } - // remote streams - if (i < pipeline.streams.size()) - { - ExpressionActionsPtr project_for_cop_read = generateProjectExpressionActions( - pipeline.streams[i], - context, - project_for_remote_read); - while (i < pipeline.streams.size()) - { - auto & stream = pipeline.streams[i++]; - stream = std::make_shared(stream, project_for_cop_read, log->identifier()); - } - } - } } void DAGQueryBlockInterpreter::prepareJoin( @@ -867,71 +642,6 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons pipeline.transform([&profile_streams](auto & stream) { profile_streams.push_back(stream); }); } -bool schemaMatch(const DAGSchema & left, const DAGSchema & right) -{ - if (left.size() != right.size()) - return false; - for (size_t i = 0; i < left.size(); i++) - { - const auto & left_ci = left[i]; - const auto & right_ci = right[i]; - if (left_ci.second.tp != right_ci.second.tp) - return false; - if (left_ci.second.flag != right_ci.second.flag) - return false; - } - return true; -} - -void DAGQueryBlockInterpreter::executeRemoteQueryImpl( - DAGPipeline & pipeline, - std::vector & remote_requests) -{ - assert(!remote_requests.empty()); - DAGSchema & schema = remote_requests[0].schema; -#ifndef NDEBUG - for (size_t i = 1; i < remote_requests.size(); i++) - { - if (!schemaMatch(schema, remote_requests[i].schema)) - throw Exception("Schema mismatch between different partitions for partition table"); - } -#endif - bool has_enforce_encode_type = remote_requests[0].dag_request.has_force_encode_type() && remote_requests[0].dag_request.force_encode_type(); - pingcap::kv::Cluster * cluster = context.getTMTContext().getKVCluster(); - std::vector all_tasks; - for (const auto & remote_request : remote_requests) - { - pingcap::coprocessor::RequestPtr req = std::make_shared(); - remote_request.dag_request.SerializeToString(&(req->data)); - req->tp = pingcap::coprocessor::ReqType::DAG; - req->start_ts = context.getSettingsRef().read_tso; - req->schema_version = context.getSettingsRef().schema_version; - - pingcap::kv::Backoffer bo(pingcap::kv::copBuildTaskMaxBackoff); - pingcap::kv::StoreType store_type = pingcap::kv::StoreType::TiFlash; - auto tasks = pingcap::coprocessor::buildCopTasks(bo, cluster, remote_request.key_ranges, req, store_type, &Poco::Logger::get("pingcap/coprocessor")); - all_tasks.insert(all_tasks.end(), tasks.begin(), tasks.end()); - } - - size_t concurrent_num = std::min(context.getSettingsRef().max_threads, all_tasks.size()); - size_t task_per_thread = all_tasks.size() / concurrent_num; - size_t rest_task = all_tasks.size() % concurrent_num; - for (size_t i = 0, task_start = 0; i < concurrent_num; i++) - { - size_t task_end = task_start + task_per_thread; - if (i < rest_task) - task_end++; - if (task_end == task_start) - continue; - std::vector tasks(all_tasks.begin() + task_start, all_tasks.begin() + task_end); - - auto coprocessor_reader = std::make_shared(schema, cluster, tasks, has_enforce_encode_type, 1); - BlockInputStreamPtr input = std::make_shared(coprocessor_reader, log->identifier(), query_block.source_name); - pipeline.streams.push_back(input); - task_start = task_end; - } -} - void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) { auto it = dagContext().getMPPExchangeReceiverMap().find(query_block.source_name); @@ -1041,7 +751,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) } else if (query_block.isTableScanSource()) { - TiDBTableScan table_scan(query_block.source, dagContext()); + TiDBTableScan table_scan(query_block.source, query_block.source_name, dagContext()); handleTableScan(table_scan, pipeline); dagContext().table_scan_executor_id = query_block.source_name; } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 8e6908dec80..84253afbc45 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -58,12 +58,6 @@ class DAGQueryBlockInterpreter #endif void executeImpl(DAGPipeline & pipeline); void handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); - void executeCastAfterTableScan( - const TiDBTableScan & table_scan, - const std::vector & is_need_add_cast_column, - size_t remote_read_streams_start_index, - DAGPipeline & pipeline); - void executePushedDownFilter(const std::vector & conditions, size_t remote_read_streams_start_index, DAGPipeline & pipeline); void handleJoin(const tipb::Join & join, DAGPipeline & pipeline, SubqueryForSet & right_query); void prepareJoin( const google::protobuf::RepeatedPtrField & keys, @@ -108,10 +102,6 @@ class DAGQueryBlockInterpreter void restorePipelineConcurrency(DAGPipeline & pipeline); - void executeRemoteQueryImpl( - DAGPipeline & pipeline, - std::vector & remote_requests); - DAGContext & dagContext() const { return *context.getDAGContext(); } Context & context; diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index 0a3e6396ece..f514293e7d6 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -15,9 +15,15 @@ #include #include #include +#include +#include +#include #include +#include +#include #include #include +#include #include #include #include @@ -33,6 +39,7 @@ extern const char region_exception_after_read_from_storage_some_error[]; extern const char region_exception_after_read_from_storage_all_error[]; extern const char pause_after_learner_read[]; extern const char force_remote_read_for_batch_cop[]; +extern const char pause_after_copr_streams_acquired[]; } // namespace FailPoints namespace @@ -73,7 +80,7 @@ MakeRegionQueryInfos( if (r.key_ranges.empty()) { throw TiFlashException( - "Income key ranges is empty for region: " + std::to_string(r.region_id), + fmt::format("Income key ranges is empty for region: {}", r.region_id), Errors::Coprocessor::BadRequest); } if (region_force_retry.count(id)) @@ -103,14 +110,16 @@ MakeRegionQueryInfos( if (!computeMappedTableID(*p.first, table_id_in_range) || table_id_in_range != physical_table_id) { throw TiFlashException( - "Income key ranges is illegal for region: " + std::to_string(r.region_id) - + ", table id in key range is " + std::to_string(table_id_in_range) + ", table id in region is " - + std::to_string(physical_table_id), + fmt::format( + "Income key ranges is illegal for region: {}, table id in key range is {}, table id in region is {}", + r.region_id, + table_id_in_range, + physical_table_id), Errors::Coprocessor::BadRequest); } if (p.first->compare(*info.range_in_table.first) < 0 || p.second->compare(*info.range_in_table.second) > 0) throw TiFlashException( - "Income key ranges is illegal for region: " + std::to_string(r.region_id), + fmt::format("Income key ranges is illegal for region: {}", r.region_id), Errors::Coprocessor::BadRequest); } info.required_handle_ranges = r.key_ranges; @@ -127,18 +136,78 @@ MakeRegionQueryInfos( return std::make_tuple(std::move(region_need_retry), status_res); } +bool hasRegionToRead(const DAGContext & dag_context, const TiDBTableScan & table_scan) +{ + bool has_region_to_read = false; + for (const auto physical_table_id : table_scan.getPhysicalTableIDs()) + { + const auto & table_regions_info = dag_context.getTableRegionsInfoByTableID(physical_table_id); + if (!table_regions_info.local_regions.empty() || !table_regions_info.remote_regions.empty()) + { + has_region_to_read = true; + break; + } + } + return has_region_to_read; +} + +void setQuotaAndLimitsOnTableScan(Context & context, DAGPipeline & pipeline) +{ + const Settings & settings = context.getSettingsRef(); + + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); + limits.max_execution_time = settings.max_execution_time; + limits.timeout_overflow_mode = settings.timeout_overflow_mode; + + /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, + * because the initiating server has a summary of the execution of the request on all servers. + * + * But limits on data size to read and maximum execution time are reasonable to check both on initiator and + * additionally on each remote server, because these limits are checked per block of data processed, + * and remote servers may process way more blocks of data than are received by initiator. + */ + limits.min_execution_speed = settings.min_execution_speed; + limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + + QuotaForIntervals & quota = context.getQuota(); + + pipeline.transform([&](auto & stream) { + if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + { + p_stream->setLimits(limits); + p_stream->setQuota(quota); + } + }); +} + +// add timezone cast for timestamp type, this is used to support session level timezone +bool addExtraCastsAfterTs( + DAGExpressionAnalyzer & analyzer, + const std::vector & need_cast_column, + ExpressionActionsChain & chain, + const TiDBTableScan & table_scan) +{ + bool has_need_cast_column = false; + for (auto b : need_cast_column) + { + has_need_cast_column |= (b != ExtraCastAfterTSMode::None); + } + if (!has_need_cast_column) + return false; + return analyzer.appendExtraCastsAfterTS(chain, need_cast_column, table_scan); +} } // namespace DAGStorageInterpreter::DAGStorageInterpreter( Context & context_, - const DAGQueryBlock & query_block_, const TiDBTableScan & table_scan_, - const std::vector & conditions_, + const PushDownFilter & push_down_filter_, size_t max_streams_) : context(context_) - , query_block(query_block_) , table_scan(table_scan_) - , conditions(conditions_) + , push_down_filter(push_down_filter_) , max_streams(max_streams_) , log(Logger::get("DAGStorageInterpreter", context.getDAGContext()->log ? context.getDAGContext()->log->identifier() : "")) , logical_table_id(table_scan.getLogicalTableID()) @@ -146,9 +215,80 @@ DAGStorageInterpreter::DAGStorageInterpreter( , tmt(context.getTMTContext()) , mvcc_query_info(new MvccQueryInfo(true, settings.read_tso)) { + if (unlikely(!hasRegionToRead(dagContext(), table_scan))) + { + throw TiFlashException( + fmt::format("Dag Request does not have region to read for table: {}", logical_table_id), + Errors::Coprocessor::BadRequest); + } } void DAGStorageInterpreter::execute(DAGPipeline & pipeline) +{ + prepare(); + + executeImpl(pipeline); +} + +void DAGStorageInterpreter::executeImpl(DAGPipeline & pipeline) +{ + if (!mvcc_query_info->regions_query_info.empty()) + doLocalRead(pipeline, settings.max_block_size); + + null_stream_if_empty = std::make_shared(storage_for_logical_table->getSampleBlockForColumns(required_columns)); + + // Should build these vars under protect of `table_structure_lock`. + buildRemoteRequests(); + + releaseAlterLocks(); + + // It is impossible to have no joined stream. + assert(pipeline.streams_with_non_joined_data.empty()); + // after executeRemoteQuery, remote read stream will be appended in pipeline.streams. + size_t remote_read_streams_start_index = pipeline.streams.size(); + + // For those regions which are not presented in this tiflash node, we will try to fetch streams by key ranges from other tiflash nodes, only happens in batch cop / mpp mode. + if (!remote_requests.empty()) + executeRemoteQuery(pipeline); + + /// record local and remote io input stream + auto & table_scan_io_input_streams = dagContext().getInBoundIOInputStreamsMap()[table_scan.getTableScanExecutorID()]; + pipeline.transform([&](auto & stream) { table_scan_io_input_streams.push_back(stream); }); + + if (pipeline.streams.empty()) + { + pipeline.streams.emplace_back(null_stream_if_empty); + // reset remote_read_streams_start_index for null_stream_if_empty. + remote_read_streams_start_index = 1; + } + + /// We don't want the table to be dropped during the lifetime of this query, + /// and sometimes if there is no local region, we will use the RemoteBlockInputStream + /// or even the null_stream to hold the lock. + pipeline.transform([&](auto & stream) { + // todo do not need to hold all locks in each stream, if the stream is reading from table a + // it only needs to hold the lock of table a + for (auto & lock : drop_locks) + stream->addTableLock(lock); + }); + + /// Set the limits and quota for reading data, the speed and time of the query. + setQuotaAndLimitsOnTableScan(context, pipeline); + FAIL_POINT_PAUSE(FailPoints::pause_after_copr_streams_acquired); + + /// handle timezone/duration cast for local and remote table scan. + executeCastAfterTableScan(remote_read_streams_start_index, pipeline); + recordProfileStreams(pipeline, table_scan.getTableScanExecutorID()); + + /// handle pushed down filter for local and remote table scan. + if (push_down_filter.hasValue()) + { + executePushedDownFilter(remote_read_streams_start_index, pipeline); + recordProfileStreams(pipeline, push_down_filter.executor_id); + } +} + +void DAGStorageInterpreter::prepare() { const DAGContext & dag_context = *context.getDAGContext(); if (dag_context.isBatchCop() || dag_context.isMPPTask()) @@ -165,16 +305,159 @@ void DAGStorageInterpreter::execute(DAGPipeline & pipeline) analyzer = std::make_unique(std::move(source_columns), context); FAIL_POINT_PAUSE(FailPoints::pause_after_learner_read); +} - if (!mvcc_query_info->regions_query_info.empty()) - doLocalRead(pipeline, settings.max_block_size); +void DAGStorageInterpreter::executePushedDownFilter( + size_t remote_read_streams_start_index, + DAGPipeline & pipeline) +{ + ExpressionActionsChain chain; + analyzer->initChain(chain, analyzer->getCurrentInputColumns()); + String filter_column_name = analyzer->appendWhere(chain, push_down_filter.conditions); + ExpressionActionsPtr before_where = chain.getLastActions(); + chain.addStep(); + + // remove useless tmp column and keep the schema of local streams and remote streams the same. + NamesWithAliases project_cols; + for (const auto & col : analyzer->getCurrentInputColumns()) + { + chain.getLastStep().required_output.push_back(col.name); + project_cols.emplace_back(col.name, col.name); + } + chain.getLastActions()->add(ExpressionAction::project(project_cols)); + ExpressionActionsPtr project_after_where = chain.getLastActions(); + chain.finalize(); + chain.clear(); + + assert(pipeline.streams_with_non_joined_data.empty()); + assert(remote_read_streams_start_index <= pipeline.streams.size()); + // for remote read, filter had been pushed down, don't need to execute again. + for (size_t i = 0; i < remote_read_streams_start_index; ++i) + { + auto & stream = pipeline.streams[i]; + stream = std::make_shared(stream, before_where, filter_column_name, log->identifier()); + // after filter, do project action to keep the schema of local streams and remote streams the same. + stream = std::make_shared(stream, project_after_where, log->identifier()); + } +} - null_stream_if_empty = std::make_shared(storage_for_logical_table->getSampleBlockForColumns(required_columns)); +void DAGStorageInterpreter::executeCastAfterTableScan( + size_t remote_read_streams_start_index, + DAGPipeline & pipeline) +{ + auto original_source_columns = analyzer->getCurrentInputColumns(); - // Should build these vars under protect of `table_structure_lock`. - buildRemoteRequests(); + ExpressionActionsChain chain; + analyzer->initChain(chain, original_source_columns); - releaseAlterLocks(); + // execute timezone cast or duration cast if needed for local table scan + if (addExtraCastsAfterTs(*analyzer, is_need_add_cast_column, chain, table_scan)) + { + ExpressionActionsPtr extra_cast = chain.getLastActions(); + chain.finalize(); + chain.clear(); + + // After `addExtraCastsAfterTs`, analyzer->getCurrentInputColumns() has been modified. + // For remote read, `timezone cast and duration cast` had been pushed down, don't need to execute cast expressions. + // To keep the schema of local read streams and remote read streams the same, do project action for remote read streams. + NamesWithAliases project_for_remote_read; + const auto & after_cast_source_columns = analyzer->getCurrentInputColumns(); + for (size_t i = 0; i < after_cast_source_columns.size(); ++i) + { + project_for_remote_read.emplace_back(original_source_columns[i].name, after_cast_source_columns[i].name); + } + assert(!project_for_remote_read.empty()); + assert(pipeline.streams_with_non_joined_data.empty()); + assert(remote_read_streams_start_index <= pipeline.streams.size()); + size_t i = 0; + // local streams + while (i < remote_read_streams_start_index) + { + auto & stream = pipeline.streams[i++]; + stream = std::make_shared(stream, extra_cast, log->identifier()); + } + // remote streams + if (i < pipeline.streams.size()) + { + ExpressionActionsPtr project_for_cop_read = generateProjectExpressionActions( + pipeline.streams[i], + context, + project_for_remote_read); + while (i < pipeline.streams.size()) + { + auto & stream = pipeline.streams[i++]; + stream = std::make_shared(stream, project_for_cop_read, log->identifier()); + } + } + } +} + +void DAGStorageInterpreter::executeRemoteQuery(DAGPipeline & pipeline) +{ + assert(!remote_requests.empty()); + DAGSchema & schema = remote_requests[0].schema; +#ifndef NDEBUG + auto schema_match = [&schema](const DAGSchema & other) { + if (schema.size() != other.size()) + return false; + for (size_t i = 0; i < schema.size(); ++i) + { + if (schema[i].second.tp != other[i].second.tp || schema[i].second.flag != other[i].second.flag) + return false; + } + return true; + }; + for (size_t i = 1; i < remote_requests.size(); ++i) + { + if (!schema_match(remote_requests[i].schema)) + throw Exception("Schema mismatch between different partitions for partition table"); + } +#endif + bool has_enforce_encode_type = remote_requests[0].dag_request.has_force_encode_type() && remote_requests[0].dag_request.force_encode_type(); + pingcap::kv::Cluster * cluster = tmt.getKVCluster(); + std::vector all_tasks; + for (const auto & remote_request : remote_requests) + { + pingcap::coprocessor::RequestPtr req = std::make_shared(); + remote_request.dag_request.SerializeToString(&(req->data)); + req->tp = pingcap::coprocessor::ReqType::DAG; + req->start_ts = context.getSettingsRef().read_tso; + req->schema_version = context.getSettingsRef().schema_version; + + pingcap::kv::Backoffer bo(pingcap::kv::copBuildTaskMaxBackoff); + pingcap::kv::StoreType store_type = pingcap::kv::StoreType::TiFlash; + auto tasks = pingcap::coprocessor::buildCopTasks(bo, cluster, remote_request.key_ranges, req, store_type, &Poco::Logger::get("pingcap/coprocessor")); + all_tasks.insert(all_tasks.end(), tasks.begin(), tasks.end()); + } + + size_t concurrent_num = std::min(context.getSettingsRef().max_threads, all_tasks.size()); + size_t task_per_thread = all_tasks.size() / concurrent_num; + size_t rest_task = all_tasks.size() % concurrent_num; + for (size_t i = 0, task_start = 0; i < concurrent_num; ++i) + { + size_t task_end = task_start + task_per_thread; + if (i < rest_task) + task_end++; + if (task_end == task_start) + continue; + std::vector tasks(all_tasks.begin() + task_start, all_tasks.begin() + task_end); + + auto coprocessor_reader = std::make_shared(schema, cluster, tasks, has_enforce_encode_type, 1); + BlockInputStreamPtr input = std::make_shared(coprocessor_reader, log->identifier(), table_scan.getTableScanExecutorID()); + pipeline.streams.push_back(input); + task_start = task_end; + } +} + +DAGContext & DAGStorageInterpreter::dagContext() const +{ + return *context.getDAGContext(); +} + +void DAGStorageInterpreter::recordProfileStreams(DAGPipeline & pipeline, const String & key) +{ + auto & profile_streams = dagContext().getProfileStreamsMap()[key]; + pipeline.transform([&profile_streams](auto & stream) { profile_streams.push_back(stream); }); } LearnerReadSnapshot DAGStorageInterpreter::doCopLearnerRead() @@ -252,7 +535,7 @@ LearnerReadSnapshot DAGStorageInterpreter::doBatchCopLearnerRead() } catch (DB::Exception & e) { - e.addMessage("(while doing learner read for table, logical table_id: " + DB::toString(logical_table_id) + ")"); + e.addMessage(fmt::format("(while doing learner read for table, logical table_id: {})", logical_table_id)); throw; } } @@ -266,7 +549,7 @@ std::unordered_map DAGStorageInterpreter::generateSele /// to avoid null point exception query_info.query = makeDummyQuery(); query_info.dag_query = std::make_unique( - conditions, + push_down_filter.conditions, analyzer->getPreparedSets(), analyzer->getCurrentInputColumns(), context.getTimezoneInfo()); @@ -418,11 +701,18 @@ void DAGStorageInterpreter::doLocalRead(DAGPipeline & pipeline, size_t max_block { // Throw an exception for TiDB / TiSpark to retry if (table_id == logical_table_id) - e.addMessage("(while creating InputStreams from storage `" + storage->getDatabaseName() + "`.`" + storage->getTableName() - + "`, table_id: " + DB::toString(table_id) + ")"); + e.addMessage(fmt::format( + "(while creating InputStreams from storage `{}`.`{}`, table_id: {})", + storage->getDatabaseName(), + storage->getTableName(), + table_id)); else - e.addMessage("(while creating InputStreams from storage `" + storage->getDatabaseName() + "`.`" + storage->getTableName() - + "`, table_id: " + DB::toString(table_id) + ", logical_table_id: " + DB::toString(logical_table_id) + ")"); + e.addMessage(fmt::format( + "(while creating InputStreams from storage `{}`.`{}`, table_id: {}, logical_table_id: {})", + storage->getDatabaseName(), + storage->getTableName(), + table_id, + logical_table_id)); throw; } } @@ -430,11 +720,18 @@ void DAGStorageInterpreter::doLocalRead(DAGPipeline & pipeline, size_t max_block { /// Other unknown exceptions if (table_id == logical_table_id) - e.addMessage("(while creating InputStreams from storage `" + storage->getDatabaseName() + "`.`" + storage->getTableName() - + "`, table_id: " + DB::toString(table_id) + ")"); + e.addMessage(fmt::format( + "(while creating InputStreams from storage `{}`.`{}`, table_id: {})", + storage->getDatabaseName(), + storage->getTableName(), + table_id)); else - e.addMessage("(while creating InputStreams from storage `" + storage->getDatabaseName() + "`.`" + storage->getTableName() - + "`, table_id: " + DB::toString(table_id) + ", logical_table_id: " + DB::toString(logical_table_id) + ")"); + e.addMessage(fmt::format( + "(while creating InputStreams from storage `{}`.`{}`, table_id: {}, logical_table_id: {})", + storage->getDatabaseName(), + storage->getTableName(), + table_id, + logical_table_id)); throw; } } @@ -450,7 +747,7 @@ std::unordered_map DAG auto logical_table_storage = tmt.getStorages().get(logical_table_id); if (!logical_table_storage) { - throw TiFlashException("Table " + std::to_string(logical_table_id) + " doesn't exist.", Errors::Table::NotExists); + throw TiFlashException(fmt::format("Table {} doesn't exist.", logical_table_id), Errors::Table::NotExists); } storages_with_lock[logical_table_id] = {logical_table_storage, logical_table_storage->lockStructureForShare(context.getCurrentQueryId())}; if (table_scan.isPartitionTableScan()) @@ -460,7 +757,7 @@ std::unordered_map DAG auto physical_table_storage = tmt.getStorages().get(physical_table_id); if (!physical_table_storage) { - throw TiFlashException("Table " + std::to_string(physical_table_id) + " doesn't exist.", Errors::Table::NotExists); + throw TiFlashException(fmt::format("Table {} doesn't exist.", physical_table_id), Errors::Table::NotExists); } storages_with_lock[physical_table_id] = {physical_table_storage, physical_table_storage->lockStructureForShare(context.getCurrentQueryId())}; } @@ -479,16 +776,20 @@ std::unordered_map DAG if (!table_store) { if (schema_synced) - throw TiFlashException("Table " + std::to_string(table_id) + " doesn't exist.", Errors::Table::NotExists); + throw TiFlashException(fmt::format("Table {} doesn't exist.", table_id), Errors::Table::NotExists); else return {{}, {}, {}, false}; } if (table_store->engineType() != ::TiDB::StorageEngine::TMT && table_store->engineType() != ::TiDB::StorageEngine::DT) { - throw TiFlashException("Specifying schema_version for non-managed storage: " + table_store->getName() - + ", table: " + table_store->getTableName() + ", id: " + DB::toString(table_id) + " is not allowed", - Errors::Coprocessor::Internal); + throw TiFlashException( + fmt::format( + "Specifying schema_version for non-managed storage: {}, table: {}, id: {} is not allowed", + table_store->getName(), + table_store->getTableName(), + table_id), + Errors::Coprocessor::Internal); } auto lock = table_store->lockStructureForShare(context.getCurrentQueryId()); @@ -502,9 +803,9 @@ std::unordered_map DAG auto storage_schema_version = table_store->getTableInfo().schema_version; // Not allow storage > query in any case, one example is time travel queries. if (storage_schema_version > query_schema_version) - throw TiFlashException("Table " + std::to_string(table_id) + " schema version " + std::to_string(storage_schema_version) - + " newer than query schema version " + std::to_string(query_schema_version), - Errors::Table::SchemaVersionError); + throw TiFlashException( + fmt::format("Table {} schema version {} newer than query schema version {}", table_id, storage_schema_version, query_schema_version), + Errors::Table::SchemaVersionError); // From now on we have storage <= query. // If schema was synced, it implies that global >= query, as mentioned above we have storage <= query, we are OK to serve. if (schema_synced) @@ -605,10 +906,9 @@ std::tuple> DAGStorageIn // todo handle alias column if (max_columns_to_read && table_scan.getColumnSize() > max_columns_to_read) { - throw TiFlashException("Limit for number of columns to read exceeded. " - "Requested: " - + toString(table_scan.getColumnSize()) + ", maximum: " + toString(max_columns_to_read), - Errors::BroadcastJoin::TooManyColumns); + throw TiFlashException( + fmt::format("Limit for number of columns to read exceeded. Requested: {}, maximum: {}", table_scan.getColumnSize(), max_columns_to_read), + Errors::BroadcastJoin::TooManyColumns); } Names required_columns_tmp; @@ -672,7 +972,6 @@ void DAGStorageInterpreter::buildRemoteRequests() retry_regions_map[region_id_to_table_id_map[r.get().region_id]].emplace_back(r); } - for (const auto physical_table_id : table_scan.getPhysicalTableIDs()) { const auto & retry_regions = retry_regions_map[physical_table_id]; @@ -687,7 +986,7 @@ void DAGStorageInterpreter::buildRemoteRequests() *context.getDAGContext(), table_scan, storages_with_structure_lock[physical_table_id].storage->getTableInfo(), - query_block.selection, + push_down_filter, log)); } } diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h index d6760d8daab..a1d88083468 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h @@ -17,8 +17,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -50,9 +49,8 @@ class DAGStorageInterpreter public: DAGStorageInterpreter( Context & context_, - const DAGQueryBlock & query_block_, const TiDBTableScan & table_scan, - const std::vector & conditions_, + const PushDownFilter & push_down_filter_, size_t max_streams_); DAGStorageInterpreter(DAGStorageInterpreter &&) = delete; @@ -63,12 +61,6 @@ class DAGStorageInterpreter /// Members will be transfered to DAGQueryBlockInterpreter after execute std::unique_ptr analyzer; - std::vector is_need_add_cast_column; - /// it shouldn't be hash map because duplicated region id may occur if merge regions to retry of dag. - RegionRetryList region_retry_from_local_region; - TableLockHolders drop_locks; - std::vector remote_requests; - BlockInputStreamPtr null_stream_if_empty; private: struct StorageWithStructureLock @@ -92,12 +84,37 @@ class DAGStorageInterpreter std::unordered_map generateSelectQueryInfos(); + DAGContext & dagContext() const; + + void recordProfileStreams(DAGPipeline & pipeline, const String & key); + + void executeRemoteQuery(DAGPipeline & pipeline); + + void executeCastAfterTableScan( + size_t remote_read_streams_start_index, + DAGPipeline & pipeline); + + void executePushedDownFilter( + size_t remote_read_streams_start_index, + DAGPipeline & pipeline); + + void prepare(); + + void executeImpl(DAGPipeline & pipeline); + +private: + std::vector is_need_add_cast_column; + /// it shouldn't be hash map because duplicated region id may occur if merge regions to retry of dag. + RegionRetryList region_retry_from_local_region; + TableLockHolders drop_locks; + std::vector remote_requests; + BlockInputStreamPtr null_stream_if_empty; + /// passed from caller, doesn't change during DAGStorageInterpreter's lifetime Context & context; - const DAGQueryBlock & query_block; const TiDBTableScan & table_scan; - const std::vector & conditions; + const PushDownFilter & push_down_filter; size_t max_streams; LoggerPtr log; diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index 25ce6aa9a06..69060071997 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB { @@ -79,4 +80,17 @@ void executeUnion( pipeline.streams.push_back(non_joined_data_stream); } } + +ExpressionActionsPtr generateProjectExpressionActions( + const BlockInputStreamPtr & stream, + const Context & context, + const NamesWithAliases & project_cols) +{ + NamesAndTypesList input_column; + for (const auto & column : stream->getHeader()) + input_column.emplace_back(column.name, column.type); + ExpressionActionsPtr project = std::make_shared(input_column, context.getSettingsRef()); + project->add(ExpressionAction::project(project_cols)); + return project; +} } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.h b/dbms/src/Flash/Coprocessor/InterpreterUtils.h index 932b8f404c5..91e6d483220 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.h +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.h @@ -16,9 +16,12 @@ #include #include +#include namespace DB { +class Context; + void restoreConcurrency( DAGPipeline & pipeline, size_t concurrency, @@ -35,4 +38,9 @@ void executeUnion( size_t max_streams, const LoggerPtr & log, bool ignore_block = false); + +ExpressionActionsPtr generateProjectExpressionActions( + const BlockInputStreamPtr & stream, + const Context & context, + const NamesWithAliases & project_cols); } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/PushDownFilter.cpp b/dbms/src/Flash/Coprocessor/PushDownFilter.cpp new file mode 100644 index 00000000000..54638f899b3 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/PushDownFilter.cpp @@ -0,0 +1,65 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB +{ +PushDownFilter::PushDownFilter( + const String & executor_id_, + const std::vector & conditions_) + : executor_id(executor_id_) + , conditions(conditions_) +{ + if (unlikely(conditions.empty() != executor_id.empty())) + { + throw TiFlashException( + "for PushDownFilter, conditions and executor_id should both be empty or neither should be empty", + Errors::Coprocessor::BadRequest); + } +} + +tipb::Executor * PushDownFilter::constructSelectionForRemoteRead(tipb::Executor * mutable_executor) const +{ + if (hasValue()) + { + mutable_executor->set_tp(tipb::ExecType::TypeSelection); + mutable_executor->set_executor_id(executor_id); + auto * new_selection = mutable_executor->mutable_selection(); + for (const auto & condition : conditions) + *new_selection->add_conditions() = *condition; + return new_selection->mutable_child(); + } + else + { + return mutable_executor; + } +} + +PushDownFilter PushDownFilter::toPushDownFilter(const tipb::Executor * filter_executor) +{ + if (!filter_executor || !filter_executor->has_selection()) + { + return {"", {}}; + } + + std::vector conditions; + for (const auto & condition : filter_executor->selection().conditions()) + conditions.push_back(&condition); + + return {filter_executor->executor_id(), conditions}; +} +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/PushDownFilter.h b/dbms/src/Flash/Coprocessor/PushDownFilter.h new file mode 100644 index 00000000000..0c461ef42e3 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/PushDownFilter.h @@ -0,0 +1,39 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include + +namespace DB +{ +struct PushDownFilter +{ + static PushDownFilter toPushDownFilter(const tipb::Executor * filter_executor); + + PushDownFilter( + const String & executor_id_, + const std::vector & conditions_); + + bool hasValue() const { return !conditions.empty(); } + + tipb::Executor * constructSelectionForRemoteRead(tipb::Executor * mutable_executor) const; + + String executor_id; + std::vector conditions; +}; +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/RemoteRequest.cpp b/dbms/src/Flash/Coprocessor/RemoteRequest.cpp index 0ef4a9de4d6..086cdb43d20 100644 --- a/dbms/src/Flash/Coprocessor/RemoteRequest.cpp +++ b/dbms/src/Flash/Coprocessor/RemoteRequest.cpp @@ -18,7 +18,13 @@ namespace DB { -RemoteRequest RemoteRequest::build(const RegionRetryList & retry_regions, DAGContext & dag_context, const TiDBTableScan & table_scan, const TiDB::TableInfo & table_info, const tipb::Executor * selection, LoggerPtr & log) +RemoteRequest RemoteRequest::build( + const RegionRetryList & retry_regions, + DAGContext & dag_context, + const TiDBTableScan & table_scan, + const TiDB::TableInfo & table_info, + const PushDownFilter & push_down_filter, + const LoggerPtr & log) { auto print_retry_regions = [&retry_regions, &table_info] { FmtBuffer buffer; @@ -35,16 +41,7 @@ RemoteRequest RemoteRequest::build(const RegionRetryList & retry_regions, DAGCon DAGSchema schema; tipb::DAGRequest dag_req; - auto * executor = dag_req.mutable_root_executor(); - if (selection != nullptr) - { - executor->set_tp(tipb::ExecType::TypeSelection); - executor->set_executor_id(selection->executor_id()); - auto * new_selection = executor->mutable_selection(); - for (const auto & condition : selection->selection().conditions()) - *new_selection->add_conditions() = condition; - executor = new_selection->mutable_child(); - } + auto * executor = push_down_filter.constructSelectionForRemoteRead(dag_req.mutable_root_executor()); { tipb::Executor * ts_exec = executor; diff --git a/dbms/src/Flash/Coprocessor/RemoteRequest.h b/dbms/src/Flash/Coprocessor/RemoteRequest.h index 37662ce36e2..1e42e18a7bd 100644 --- a/dbms/src/Flash/Coprocessor/RemoteRequest.h +++ b/dbms/src/Flash/Coprocessor/RemoteRequest.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -34,7 +35,10 @@ using DAGSchema = std::vector; struct RemoteRequest { - RemoteRequest(tipb::DAGRequest && dag_request_, DAGSchema && schema_, std::vector && key_ranges_) + RemoteRequest( + tipb::DAGRequest && dag_request_, + DAGSchema && schema_, + std::vector && key_ranges_) : dag_request(std::move(dag_request_)) , schema(std::move(schema_)) , key_ranges(std::move(key_ranges_)) @@ -43,6 +47,12 @@ struct RemoteRequest DAGSchema schema; /// the sorted key ranges std::vector key_ranges; - static RemoteRequest build(const RegionRetryList & retry_regions, DAGContext & dag_context, const TiDBTableScan & table_scan, const TiDB::TableInfo & table_info, const tipb::Executor * selection, LoggerPtr & log); + static RemoteRequest build( + const RegionRetryList & retry_regions, + DAGContext & dag_context, + const TiDBTableScan & table_scan, + const TiDB::TableInfo & table_info, + const PushDownFilter & push_down_filter, + const LoggerPtr & log); }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp b/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp index cf19ac4efc1..7d7ad2f2b57 100644 --- a/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp +++ b/dbms/src/Flash/Coprocessor/TiDBTableScan.cpp @@ -16,8 +16,12 @@ namespace DB { -TiDBTableScan::TiDBTableScan(const tipb::Executor * table_scan_, const DAGContext & dag_context) +TiDBTableScan::TiDBTableScan( + const tipb::Executor * table_scan_, + const String & executor_id_, + const DAGContext & dag_context) : table_scan(table_scan_) + , executor_id(executor_id_) , is_partition_table_scan(table_scan->tp() == tipb::TypePartitionTableScan) , columns(is_partition_table_scan ? table_scan->partition_table_scan().columns() : table_scan->tbl_scan().columns()) { diff --git a/dbms/src/Flash/Coprocessor/TiDBTableScan.h b/dbms/src/Flash/Coprocessor/TiDBTableScan.h index 31c145709a6..3c7703de7bf 100644 --- a/dbms/src/Flash/Coprocessor/TiDBTableScan.h +++ b/dbms/src/Flash/Coprocessor/TiDBTableScan.h @@ -24,7 +24,10 @@ namespace DB class TiDBTableScan { public: - TiDBTableScan(const tipb::Executor * table_scan_, const DAGContext & dag_context); + TiDBTableScan( + const tipb::Executor * table_scan_, + const String & executor_id_, + const DAGContext & dag_context); bool isPartitionTableScan() const { return is_partition_table_scan; @@ -48,11 +51,12 @@ class TiDBTableScan } String getTableScanExecutorID() const { - return table_scan->executor_id(); + return executor_id; } private: const tipb::Executor * table_scan; + String executor_id; bool is_partition_table_scan; const google::protobuf::RepeatedPtrField & columns; /// logical_table_id is the table id for a TiDB' table, while if the From 4ad156b924c88233bb7d9da43f2f47d249a16cdb Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Fri, 6 May 2022 14:48:56 +0800 Subject: [PATCH 60/79] *: remove useless BatchCommandsHandler. (#4828) close pingcap/tiflash#4825 --- dbms/src/Flash/BatchCommandsHandler.cpp | 140 ------------------------ dbms/src/Flash/BatchCommandsHandler.h | 72 ------------ dbms/src/Flash/EstablishCall.cpp | 4 +- dbms/src/Flash/FlashService.cpp | 64 +---------- dbms/src/Flash/FlashService.h | 10 +- 5 files changed, 6 insertions(+), 284 deletions(-) delete mode 100644 dbms/src/Flash/BatchCommandsHandler.cpp delete mode 100644 dbms/src/Flash/BatchCommandsHandler.h diff --git a/dbms/src/Flash/BatchCommandsHandler.cpp b/dbms/src/Flash/BatchCommandsHandler.cpp deleted file mode 100644 index 262165157c9..00000000000 --- a/dbms/src/Flash/BatchCommandsHandler.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include - -#include - -namespace DB -{ -BatchCommandsContext::BatchCommandsContext( - Context & db_context_, - DBContextCreationFunc && db_context_creation_func_, - grpc::ServerContext & grpc_server_context_) - : db_context(db_context_) - , db_context_creation_func(std::move(db_context_creation_func_)) - , grpc_server_context(grpc_server_context_) -{} - -BatchCommandsHandler::BatchCommandsHandler(BatchCommandsContext & batch_commands_context_, const tikvpb::BatchCommandsRequest & request_, tikvpb::BatchCommandsResponse & response_) - : batch_commands_context(batch_commands_context_) - , request(request_) - , response(response_) - , log(&Poco::Logger::get("BatchCommandsHandler")) -{} - -ThreadPool::Job BatchCommandsHandler::handleCommandJob( - const tikvpb::BatchCommandsRequest::Request & req, - tikvpb::BatchCommandsResponse::Response & resp, - grpc::Status & ret) const -{ - return [&]() { - auto start_time = std::chrono::system_clock::now(); - SCOPE_EXIT({ - std::chrono::duration duration_sec = std::chrono::system_clock::now() - start_time; - GET_METRIC(tiflash_coprocessor_request_handle_seconds, type_batch).Observe(duration_sec.count()); - }); - - if (!req.has_coprocessor()) - { - ret = grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); - return; - } - - GET_METRIC(tiflash_coprocessor_request_count, type_batch_cop).Increment(); - GET_METRIC(tiflash_coprocessor_handling_request_count, type_batch_cop).Increment(); - SCOPE_EXIT({ GET_METRIC(tiflash_coprocessor_handling_request_count, type_batch_cop).Decrement(); }); - - const auto & cop_req = req.coprocessor(); - auto * cop_resp = resp.mutable_coprocessor(); - - auto [context, status] = batch_commands_context.db_context_creation_func(&batch_commands_context.grpc_server_context); - if (!status.ok()) - { - ret = status; - return; - } - - CoprocessorContext cop_context(*context, cop_req.context(), batch_commands_context.grpc_server_context); - CoprocessorHandler cop_handler(cop_context, &cop_req, cop_resp); - - ret = cop_handler.execute(); - }; -} - -grpc::Status BatchCommandsHandler::execute() -{ - if (request.requests_size() == 0) - return grpc::Status::OK; - - // TODO: Fill transport_layer_load into BatchCommandsResponse. - - /// Shortcut for only one request by not going to thread pool. - if (request.requests_size() == 1) - { - LOG_FMT_DEBUG(log, "Handling the only batch command in place."); - - const auto & req = request.requests(0); - auto * resp = response.add_responses(); - response.add_request_ids(request.request_ids(0)); - auto ret = grpc::Status::OK; - handleCommandJob(req, *resp, ret)(); - return ret; - } - - /// Use thread pool to handle requests concurrently. - const Settings & settings = batch_commands_context.db_context.getSettingsRef(); - size_t max_threads = settings.batch_commands_threads ? static_cast(settings.batch_commands_threads) - : static_cast(settings.max_threads); - - LOG_FMT_DEBUG( - log, - "Handling {} batch commands using {} threads.", - request.requests_size(), - max_threads); - - ThreadPool thread_pool(max_threads); - - std::vector rets; - rets.reserve(request.requests_size()); - size_t i = 0; - - for (const auto & req : request.requests()) - { - auto * resp = response.add_responses(); - response.add_request_ids(request.request_ids(i++)); - rets.emplace_back(grpc::Status::OK); - - thread_pool.schedule(handleCommandJob(req, *resp, rets.back())); - } - - thread_pool.wait(); - - // Iterate all return values of each individual commands, returns the first non-OK one if any. - for (const auto & ret : rets) - { - if (!ret.ok()) - { - response.Clear(); - return ret; - } - } - - return grpc::Status::OK; -} - -} // namespace DB diff --git a/dbms/src/Flash/BatchCommandsHandler.h b/dbms/src/Flash/BatchCommandsHandler.h deleted file mode 100644 index 5f7384ae38b..00000000000 --- a/dbms/src/Flash/BatchCommandsHandler.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#ifdef __clang__ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif -#include -#include -#pragma GCC diagnostic pop - -namespace DB -{ -struct BatchCommandsContext -{ - /// Context for this batch commands. - Context & db_context; - - /// Context creation function for each individual command - they should be handled isolated, - /// given that context is being used to pass arguments regarding queries. - using DBContextCreationFunc = std::function(const grpc::ServerContext *)>; - DBContextCreationFunc db_context_creation_func; - - const grpc::ServerContext & grpc_server_context; - - BatchCommandsContext( - Context & db_context_, - DBContextCreationFunc && db_context_creation_func_, - grpc::ServerContext & grpc_server_context_); -}; - -class BatchCommandsHandler -{ -public: - BatchCommandsHandler(BatchCommandsContext & batch_commands_context_, const tikvpb::BatchCommandsRequest & request_, tikvpb::BatchCommandsResponse & response_); - - ~BatchCommandsHandler() = default; - - grpc::Status execute(); - -protected: - ThreadPool::Job handleCommandJob( - const tikvpb::BatchCommandsRequest::Request & req, - tikvpb::BatchCommandsResponse::Response & resp, - grpc::Status & ret) const; - -protected: - const BatchCommandsContext & batch_commands_context; - const tikvpb::BatchCommandsRequest & request; - tikvpb::BatchCommandsResponse & response; - - Poco::Logger * log; -}; - -} // namespace DB diff --git a/dbms/src/Flash/EstablishCall.cpp b/dbms/src/Flash/EstablishCall.cpp index cf9d2e2ed6f..8af81e30962 100644 --- a/dbms/src/Flash/EstablishCall.cpp +++ b/dbms/src/Flash/EstablishCall.cpp @@ -31,7 +31,7 @@ EstablishCallData::EstablishCallData(AsyncFlashService * service, grpc::ServerCo // As part of the initial CREATE state, we *request* that the system // start processing requests. In this request, "this" acts are // the tag uniquely identifying the request. - service->RequestEstablishMPPConnection(&ctx, &request, &responder, cq, notify_cq, this); + service->requestEstablishMPPConnection(&ctx, &request, &responder, cq, notify_cq, this); } EstablishCallData::~EstablishCallData() @@ -71,7 +71,7 @@ void EstablishCallData::initRpc() std::exception_ptr eptr = nullptr; try { - service->EstablishMPPConnectionSyncOrAsync(&ctx, &request, nullptr, this); + service->establishMPPConnectionSyncOrAsync(&ctx, &request, nullptr, this); } catch (...) { diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 167ee238249..4e2a2ae88bc 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -213,7 +212,7 @@ ::grpc::Status returnStatus(EstablishCallData * calldata, const grpc::Status & s return status; } -::grpc::Status FlashService::EstablishMPPConnectionSyncOrAsync(::grpc::ServerContext * grpc_context, +::grpc::Status FlashService::establishMPPConnectionSyncOrAsync(::grpc::ServerContext * grpc_context, const ::mpp::EstablishMPPConnectionRequest * request, ::grpc::ServerWriter<::mpp::MPPDataPacket> * sync_writer, EstablishCallData * calldata) @@ -345,67 +344,6 @@ ::grpc::Status FlashService::CancelMPPTask( return grpc::Status::OK; } -// This function is deprecated. -grpc::Status FlashService::BatchCommands( - grpc::ServerContext * grpc_context, - grpc::ServerReaderWriter<::tikvpb::BatchCommandsResponse, tikvpb::BatchCommandsRequest> * stream) -{ - CPUAffinityManager::getInstance().bindSelfGrpcThread(); - if (!security_config.checkGrpcContext(grpc_context)) - { - return grpc::Status(grpc::PERMISSION_DENIED, tls_err_msg); - } - - auto [context, status] = createDBContext(grpc_context); - if (!status.ok()) - { - return status; - } - - tikvpb::BatchCommandsRequest request; - while (stream->Read(&request)) - { - tikvpb::BatchCommandsResponse response; - GET_METRIC(tiflash_coprocessor_request_count, type_batch).Increment(); - GET_METRIC(tiflash_coprocessor_handling_request_count, type_batch).Increment(); - SCOPE_EXIT({ GET_METRIC(tiflash_coprocessor_handling_request_count, type_batch).Decrement(); }); - auto start_time = std::chrono::system_clock::now(); - SCOPE_EXIT({ - std::chrono::duration duration_sec = std::chrono::system_clock::now() - start_time; - GET_METRIC(tiflash_coprocessor_request_duration_seconds, type_batch).Observe(duration_sec.count()); - GET_METRIC(tiflash_coprocessor_response_bytes).Increment(response.ByteSizeLong()); - }); - - LOG_FMT_DEBUG(log, "Handling batch commands: {}", request.DebugString()); - - BatchCommandsContext batch_commands_context( - *context, - [this](const grpc::ServerContext * grpc_server_context) { return createDBContext(grpc_server_context); }, - *grpc_context); - BatchCommandsHandler batch_commands_handler(batch_commands_context, request, response); - auto ret = batch_commands_handler.execute(); - if (!ret.ok()) - { - LOG_FMT_DEBUG( - log, - "Handle batch commands request done: {}, {}", - ret.error_code(), - ret.error_message()); - return ret; - } - - if (!stream->Write(response)) - { - LOG_FMT_DEBUG(log, "Write response failed for unknown reason."); - return grpc::Status(grpc::StatusCode::UNKNOWN, "Write response failed for unknown reason."); - } - - LOG_FMT_DEBUG(log, "Handle batch commands request done: {}, {}", ret.error_code(), ret.error_message()); - } - - return grpc::Status::OK; -} - String getClientMetaVarWithDefault(const grpc::ServerContext * grpc_context, const String & name, const String & default_val) { if (auto it = grpc_context->client_metadata().find(name); it != grpc_context->client_metadata().end()) diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index 582259259ff..916f0ef1296 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -49,9 +49,6 @@ class FlashService : public tikvpb::Tikv::Service const coprocessor::Request * request, coprocessor::Response * response) override; - grpc::Status BatchCommands(grpc::ServerContext * grpc_context, - grpc::ServerReaderWriter * stream) override; - ::grpc::Status BatchCoprocessor(::grpc::ServerContext * context, const ::coprocessor::BatchRequest * request, ::grpc::ServerWriter<::coprocessor::BatchResponse> * writer) override; @@ -66,11 +63,11 @@ class FlashService : public tikvpb::Tikv::Service const ::mpp::IsAliveRequest * request, ::mpp::IsAliveResponse * response) override; - ::grpc::Status EstablishMPPConnectionSyncOrAsync(::grpc::ServerContext * context, const ::mpp::EstablishMPPConnectionRequest * request, ::grpc::ServerWriter<::mpp::MPPDataPacket> * sync_writer, EstablishCallData * calldata); + ::grpc::Status establishMPPConnectionSyncOrAsync(::grpc::ServerContext * context, const ::mpp::EstablishMPPConnectionRequest * request, ::grpc::ServerWriter<::mpp::MPPDataPacket> * sync_writer, EstablishCallData * calldata); ::grpc::Status EstablishMPPConnection(::grpc::ServerContext * context, const ::mpp::EstablishMPPConnectionRequest * request, ::grpc::ServerWriter<::mpp::MPPDataPacket> * sync_writer) override { - return EstablishMPPConnectionSyncOrAsync(context, request, sync_writer, nullptr); + return establishMPPConnectionSyncOrAsync(context, request, sync_writer, nullptr); } ::grpc::Status CancelMPPTask(::grpc::ServerContext * context, const ::mpp::CancelTaskRequest * request, ::mpp::CancelTaskResponse * response) override; @@ -110,8 +107,7 @@ class AsyncFlashService final : public FlashService abort(); return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } - - void RequestEstablishMPPConnection(::grpc::ServerContext * context, ::mpp::EstablishMPPConnectionRequest * request, ::grpc::ServerAsyncWriter<::mpp::MPPDataPacket> * writer, ::grpc::CompletionQueue * new_call_cq, ::grpc::ServerCompletionQueue * notification_cq, void * tag) + void requestEstablishMPPConnection(::grpc::ServerContext * context, ::mpp::EstablishMPPConnectionRequest * request, ::grpc::ServerAsyncWriter<::mpp::MPPDataPacket> * writer, ::grpc::CompletionQueue * new_call_cq, ::grpc::ServerCompletionQueue * notification_cq, void * tag) { ::grpc::Service::RequestAsyncServerStreaming(EstablishMPPConnectionApiID, context, request, writer, new_call_cq, notification_cq, tag); } From 0462017271b4f62af9a54019b8f386e3b495582d Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Fri, 6 May 2022 19:02:21 +0800 Subject: [PATCH 61/79] enable PageStorageV3 by default (#4831) * fix enable_ps_v3 default value * avoid abandon segment after drop it * Skip some tests Signed-off-by: JaySon-Huang * Revert "Skip some tests" This reverts commit 27010a6ec14f263a0f9a2c4755adedde31054bdd. * allocate table id for mock tests/created by ch-client Signed-off-by: JaySon-Huang Co-authored-by: JaySon-Huang Co-authored-by: JaySon-Huang --- dbms/src/Debug/MockTiDB.h | 2 ++ dbms/src/Server/StorageConfigParser.cpp | 32 +++++++++---------- dbms/src/Server/StorageConfigParser.h | 2 +- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 5 ++- dbms/src/Storages/StorageDeltaMerge.cpp | 8 +++++ 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/dbms/src/Debug/MockTiDB.h b/dbms/src/Debug/MockTiDB.h index 982e4941548..cb09f9e305a 100644 --- a/dbms/src/Debug/MockTiDB.h +++ b/dbms/src/Debug/MockTiDB.h @@ -135,6 +135,8 @@ class MockTiDB : public ext::Singleton Int64 getVersion() { return version; } + TableID newTableID() { return table_id_allocator++; } + private: TablePtr dropTableInternal(Context & context, const String & database_name, const String & table_name, bool drop_regions); TablePtr getTableByNameInternal(const String & database_name, const String & table_name); diff --git a/dbms/src/Server/StorageConfigParser.cpp b/dbms/src/Server/StorageConfigParser.cpp index 89bb49da33a..653a4eb947f 100644 --- a/dbms/src/Server/StorageConfigParser.cpp +++ b/dbms/src/Server/StorageConfigParser.cpp @@ -197,24 +197,24 @@ void TiFlashStorageConfig::parseMisc(const String & storage_section, Poco::Logge format_version = *version; } - if (auto lazily_init = table->get_qualified_as("lazily_init_store"); lazily_init) - { - lazily_init_store = (*lazily_init != 0); - } - - - if (table->contains("enable_ps_v3")) - { - if (auto enable_v3 = table->get_qualified_as("enable_ps_v3"); enable_v3) + auto get_bool_config_or_default = [&](const String & name, bool default_value) { + if (auto value = table->get_qualified_as(name); value) { - enable_ps_v3 = (*enable_v3 != 0); + return (*value != 0); } - } - else - { - // default open enable_ps_v3 - enable_ps_v3 = true; - } + else if (auto value_b = table->get_qualified_as(name); value_b) + { + return *value_b; + } + else + { + return default_value; + } + }; + + lazily_init_store = get_bool_config_or_default("lazily_init_store", lazily_init_store); + // config for experimental feature, may remove later + enable_ps_v3 = get_bool_config_or_default("enable_ps_v3", enable_ps_v3); LOG_FMT_INFO(log, "format_version {} lazily_init_store {} enable_ps_v3 {}", format_version, lazily_init_store, enable_ps_v3); diff --git a/dbms/src/Server/StorageConfigParser.h b/dbms/src/Server/StorageConfigParser.h index 6f42b4a454b..65df7be3174 100644 --- a/dbms/src/Server/StorageConfigParser.h +++ b/dbms/src/Server/StorageConfigParser.h @@ -103,7 +103,7 @@ struct TiFlashStorageConfig UInt64 format_version = 0; bool lazily_init_store = true; - bool enable_ps_v3 = false; + bool enable_ps_v3 = true; public: TiFlashStorageConfig() = default; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 80b1d81f817..d7cd1f6fd95 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -400,13 +400,16 @@ void DeltaMergeStore::drop() assert(previous_segment->nextSegmentId() == segment_id_to_drop); auto previous_lock = previous_segment->mustGetUpdateLock(); auto new_previous_segment = previous_segment->dropNextSegment(wbs); - previous_segment->abandon(*dm_context); + // No need to abandon previous_segment, because it's delta and stable is managed by the new_previous_segment. + // Abandon previous_segment will actually abandon new_previous_segment segments.emplace(new_previous_segment->getRowKeyRange().getEnd(), new_previous_segment); id_to_segment.emplace(previous_segment_id, new_previous_segment); } // The order to drop the meta and data of this segment doesn't matter, // Because there is no segment pointing to this segment, // so it won't be restored again even the drop process was interrupted by restart + segments.erase(segment_to_drop->getRowKeyRange().getEnd()); + id_to_segment.erase(segment_id_to_drop); auto drop_lock = segment_to_drop->mustGetUpdateLock(); segment_to_drop->abandon(*dm_context); segment_to_drop->drop(global_context.getFileProvider(), wbs); diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 1f496ade671..1c6853afeef 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,7 @@ #include #include #include +#include #include @@ -95,6 +97,12 @@ StorageDeltaMerge::StorageDeltaMerge( is_common_handle = tidb_table_info.is_common_handle; pk_is_handle = tidb_table_info.pk_is_handle; } + else + { + const auto mock_table_id = MockTiDB::instance().newTableID(); + tidb_table_info.id = mock_table_id; + LOG_FMT_WARNING(log, "Allocate table id for mock test [id={}]", mock_table_id); + } table_column_info = std::make_unique(db_name_, table_name_, primary_expr_ast_); From 343e309733de335deae4637e14703c74e829f5be Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 6 May 2022 20:06:56 +0800 Subject: [PATCH 62/79] fix ifnull bug (#4832) close pingcap/tiflash#4829 --- .../DAGExpressionAnalyzerHelper.cpp | 10 +++---- tests/fullstack-test/expr/ifnull.test | 28 +++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 tests/fullstack-test/expr/ifnull.test diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index 5173636bb04..cabd88e0ba7 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -102,7 +102,7 @@ String DAGExpressionAnalyzerHelper::buildIfNullFunction( const ExpressionActionsPtr & actions) { // rewrite IFNULL function with multiIf - // ifNull(arg1, arg2) -> multiIf(isNull(arg1), arg2, arg1) + // ifNull(arg1, arg2) -> multiIf(isNull(arg1), arg2, assumeNotNull(arg1)) // todo if arg1 is not nullable, then just return arg1 is ok const String & func_name = "multiIf"; Names argument_names; @@ -112,13 +112,13 @@ String DAGExpressionAnalyzerHelper::buildIfNullFunction( } String condition_arg_name = analyzer->getActions(expr.children(0), actions, false); - String tmp_else_arg_name = analyzer->getActions(expr.children(1), actions, false); + String else_arg_name = analyzer->getActions(expr.children(1), actions, false); String is_null_result = analyzer->applyFunction("isNull", {condition_arg_name}, actions, getCollatorFromExpr(expr)); - String not_null_else_arg_name = analyzer->applyFunction("assumeNotNull", {tmp_else_arg_name}, actions, nullptr); + String not_null_condition_arg_name = analyzer->applyFunction("assumeNotNull", {condition_arg_name}, actions, nullptr); argument_names.push_back(std::move(is_null_result)); - argument_names.push_back(std::move(not_null_else_arg_name)); - argument_names.push_back(std::move(condition_arg_name)); + argument_names.push_back(std::move(else_arg_name)); + argument_names.push_back(std::move(not_null_condition_arg_name)); return analyzer->applyFunction(func_name, argument_names, actions, getCollatorFromExpr(expr)); } diff --git a/tests/fullstack-test/expr/ifnull.test b/tests/fullstack-test/expr/ifnull.test new file mode 100644 index 00000000000..490d519421c --- /dev/null +++ b/tests/fullstack-test/expr/ifnull.test @@ -0,0 +1,28 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table test.t(a int, b int); +mysql> insert into test.t values(1, null),(null, 1); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t +mysql> set @@tidb_isolation_read_engines='tiflash'; select * from test.t where not ifnull(a > b, null); +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select a, b, a>b, ifnull(a > b, null), not ifnull(a > b, null) from test.t; ++------+------+------+---------------------+-------------------------+ +| a | b | a>b | ifnull(a > b, null) | not ifnull(a > b, null) | ++------+------+------+---------------------+-------------------------+ +| 1 | NULL | NULL | NULL | NULL | +| NULL | 1 | NULL | NULL | NULL | ++------+------+------+---------------------+-------------------------+ +mysql> drop table if exists test.t; From e813955612681f2c823302e51da3751373f4a5a5 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Fri, 6 May 2022 22:38:56 +0800 Subject: [PATCH 63/79] Fix a encryption problem in pagestorage v3. (#4177) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/BlobFile.cpp | 5 +- .../Storages/Page/V3/LogFile/LogWriter.cpp | 2 +- dbms/src/Storages/Page/V3/WAL/WALReader.cpp | 8 +-- .../Page/V3/tests/gtest_page_storage.cpp | 67 +++++++++++++++++++ 4 files changed, 73 insertions(+), 9 deletions(-) diff --git a/dbms/src/Storages/Page/V3/BlobFile.cpp b/dbms/src/Storages/Page/V3/BlobFile.cpp index 27b1c60f5d3..dd45e981c38 100644 --- a/dbms/src/Storages/Page/V3/BlobFile.cpp +++ b/dbms/src/Storages/Page/V3/BlobFile.cpp @@ -34,14 +34,13 @@ BlobFile::BlobFile(String parent_path_, , delegator(std::move(delegator_)) , parent_path(std::move(parent_path_)) { - // TODO: support encryption file + Poco::File file_in_disk(getPath()); wrfile = file_provider->newWriteReadableFile( getPath(), getEncryptionPath(), false, - /*create_new_encryption_info_*/ false); + /*create_new_encryption_info_*/ !file_in_disk.exists()); - Poco::File file_in_disk(getPath()); file_size = file_in_disk.getSize(); { std::lock_guard lock(file_size_lock); diff --git a/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp b/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp index be14637415e..c9dda390569 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp +++ b/dbms/src/Storages/Page/V3/LogFile/LogWriter.cpp @@ -43,7 +43,7 @@ LogWriter::LogWriter( path, EncryptionPath(path, ""), false, - /*create_new_encryption_info_*/ false); + /*create_new_encryption_info_*/ true); buffer = static_cast(alloc(buffer_size)); write_buffer = WriteBuffer(buffer, buffer_size); diff --git a/dbms/src/Storages/Page/V3/WAL/WALReader.cpp b/dbms/src/Storages/Page/V3/WAL/WALReader.cpp index 9b914d8800a..6b67126d9ad 100644 --- a/dbms/src/Storages/Page/V3/WAL/WALReader.cpp +++ b/dbms/src/Storages/Page/V3/WAL/WALReader.cpp @@ -199,17 +199,15 @@ bool WALStoreReader::openNextFile() } auto do_open = [this](const LogFilename & next_file) { - const auto & parent_path = next_file.parent_path; const auto log_num = next_file.log_num; - const auto level_num = next_file.level_num; - const auto filename = fmt::format("log_{}_{}", log_num, level_num); - const auto fullname = fmt::format("{}/{}", parent_path, filename); + const auto filename = next_file.filename(next_file.stage); + const auto fullname = next_file.fullname(next_file.stage); LOG_FMT_DEBUG(logger, "Open log file for reading [file={}]", fullname); auto read_buf = createReadBufferFromFileBaseByFileProvider( provider, fullname, - EncryptionPath{parent_path, filename}, + EncryptionPath{fullname, ""}, /*estimated_size*/ Format::BLOCK_SIZE, /*aio_threshold*/ 0, /*read_limiter*/ read_limiter, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 2b4978bd13b..fc3e25f1ba2 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include +#include #include #include #include @@ -109,6 +112,70 @@ try } CATCH +TEST_F(PageStorageTest, WriteReadWithEncryption) +try +{ + const UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + KeyManagerPtr key_manager = std::make_shared(true); + const auto enc_file_provider = std::make_shared(key_manager, true); + auto delegator = std::make_shared(getTemporaryPath()); + auto page_storage_enc = std::make_shared("test.t", delegator, config, enc_file_provider); + page_storage_enc->restore(); + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(2, tag, buff, buf_sz); + page_storage_enc->write(std::move(batch)); + } + + // Make sure that we can't restore from no-enc pagestore. + // Because WALStore can't get any record from it. + + page_storage->restore(); + ASSERT_ANY_THROW(page_storage->read(1)); + + page_storage_enc = std::make_shared("test.t", delegator, config, enc_file_provider); + page_storage_enc->restore(); + + DB::Page page1 = page_storage_enc->read(1); + ASSERT_EQ(page1.data.size(), buf_sz); + ASSERT_EQ(page1.page_id, 1UL); + for (size_t i = 0; i < buf_sz; ++i) + { + EXPECT_EQ(*(page1.data.begin() + i), static_cast(i % 0xff)); + } + DB::Page page2 = page_storage_enc->read(2); + ASSERT_EQ(page2.data.size(), buf_sz); + ASSERT_EQ(page2.page_id, 2UL); + for (size_t i = 0; i < buf_sz; ++i) + { + EXPECT_EQ(*(page2.data.begin() + i), static_cast(i % 0xff)); + } + + char c_buff_read[buf_sz] = {0}; + + // Make sure in-disk data is encrypted. + + RandomAccessFilePtr file_read = std::make_shared(fmt::format("{}/{}{}", getTemporaryPath(), BlobFile::BLOB_PREFIX_NAME, 1), + -1, + nullptr); + file_read->pread(c_buff_read, buf_sz, 0); + ASSERT_NE(c_buff_read, c_buff); + file_read->pread(c_buff_read, buf_sz, buf_sz); + ASSERT_NE(c_buff_read, c_buff); +} +CATCH + + TEST_F(PageStorageTest, ReadNULL) try { From 04da47f1e848012cb465705894c8ecdd3d81a40b Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Fri, 6 May 2022 23:46:55 +0800 Subject: [PATCH 64/79] Add some tests to test PageStorageV3 IO limiter (#4094) ref pingcap/tiflash#3594 --- dbms/src/Encryption/RateLimiter.h | 2 +- .../Page/V3/tests/gtest_blob_store.cpp | 111 +++++++++ .../Page/V3/tests/gtest_page_storage.cpp | 211 ++++++++++++++++++ dbms/src/TestUtils/MockReadLimiter.h | 40 ++++ 4 files changed, 363 insertions(+), 1 deletion(-) create mode 100644 dbms/src/TestUtils/MockReadLimiter.h diff --git a/dbms/src/Encryption/RateLimiter.h b/dbms/src/Encryption/RateLimiter.h index 7bca11e2499..7f2f19822f3 100644 --- a/dbms/src/Encryption/RateLimiter.h +++ b/dbms/src/Encryption/RateLimiter.h @@ -151,7 +151,7 @@ using WriteLimiterPtr = std::shared_ptr; // `get_io_stat_period_us` is the interval between calling getIOStatistic_. // // Other parameters are the same as WriteLimiter. -class ReadLimiter final : public WriteLimiter +class ReadLimiter : public WriteLimiter { public: ReadLimiter( diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index a141e21ee2a..c8facc39b80 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include #include +#include #include namespace DB::PS::V3::tests @@ -572,6 +574,115 @@ TEST_F(BlobStoreTest, testWriteRead) ASSERT_EQ(index, buff_nums); } +TEST_F(BlobStoreTest, testWriteReadWithIOLimiter) +{ + const auto file_provider = DB::tests::TiFlashTestEnv::getContext().getFileProvider(); + + PageId page_id = 50; + size_t wb_nums = 5; + size_t buff_size = 10ul * 1024; + const size_t rate_target = buff_size - 1; + + auto blob_store = BlobStore(getCurrentTestName(), file_provider, delegator, config); + char c_buff[wb_nums * buff_size]; + + WriteBatch wbs[wb_nums]; + PageEntriesEdit edits[wb_nums]; + + for (size_t i = 0; i < wb_nums; ++i) + { + for (size_t j = 0; j < buff_size; ++j) + { + c_buff[j + i * buff_size] = static_cast((j & 0xff) + i); + } + + ReadBufferPtr buff = std::make_shared(const_cast(c_buff + i * buff_size), buff_size); + wbs[i].putPage(page_id++, /* tag */ 0, buff, buff_size); + } + + WriteLimiterPtr write_limiter = std::make_shared(rate_target, LimiterType::UNKNOW, 20); + + AtomicStopwatch write_watch; + for (size_t i = 0; i < wb_nums; ++i) + { + edits[i] = blob_store.write(wbs[i], write_limiter); + } + auto write_elapsed = write_watch.elapsedSeconds(); + auto write_actual_rate = write_limiter->getTotalBytesThrough() / write_elapsed; + + // It must lower than 1.30 + // But we do have some disk rw, so don't set GE + EXPECT_LE(write_actual_rate / rate_target, 1.30); + + Int64 consumed = 0; + auto get_stat = [&consumed]() { + return consumed; + }; + + char c_buff_read[wb_nums * buff_size]; + { + ReadLimiterPtr read_limiter = std::make_shared(get_stat, + rate_target, + LimiterType::UNKNOW); + + AtomicStopwatch read_watch; + for (size_t i = 0; i < wb_nums; ++i) + { + for (const auto & record : edits[i].getRecords()) + { + blob_store.read(record.entry.file_id, + record.entry.offset, + c_buff_read + i * buff_size, + record.entry.size, + read_limiter); + } + } + + auto read_elapsed = read_watch.elapsedSeconds(); + auto read_actual_rate = read_limiter->getTotalBytesThrough() / read_elapsed; + EXPECT_LE(read_actual_rate / rate_target, 1.30); + } + + PageIDAndEntriesV3 entries = {}; + for (size_t i = 0; i < wb_nums; ++i) + { + for (const auto & record : edits[i].getRecords()) + { + entries.emplace_back(std::make_pair(record.page_id, record.entry)); + } + } + + { + ReadLimiterPtr read_limiter = std::make_shared(get_stat, + rate_target, + LimiterType::UNKNOW); + + AtomicStopwatch read_watch; + + // Test `PageMap` read + blob_store.read(entries, read_limiter); + auto read_elapsed = read_watch.elapsedSeconds(); + auto read_actual_rate = read_limiter->getTotalBytesThrough() / read_elapsed; + EXPECT_LE(read_actual_rate / rate_target, 1.30); + } + + { + ReadLimiterPtr read_limiter = std::make_shared(get_stat, + rate_target, + LimiterType::UNKNOW); + + AtomicStopwatch read_watch; + + // Test single `Page` read + for (auto & entry : entries) + { + blob_store.read(entry, read_limiter); + } + auto read_elapsed = read_watch.elapsedSeconds(); + auto read_actual_rate = read_limiter->getTotalBytesThrough() / read_elapsed; + EXPECT_LE(read_actual_rate / rate_target, 1.30); + } +} TEST_F(BlobStoreTest, testWriteReadWithFiled) try { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index fc3e25f1ba2..7bbc882f62b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -12,9 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. + #include #include #include +#include #include #include #include @@ -27,6 +29,7 @@ #include #include #include +#include #include #include @@ -112,6 +115,214 @@ try } CATCH +TEST_F(PageStorageTest, WriteReadWithIOLimiter) +try +{ + // In this case, WalStore throput is very low. + // Because we only have 5 record to write. + size_t wb_nums = 5; + PageId page_id = 50; + size_t buff_size = 100ul * 1024; + const size_t rate_target = buff_size - 1; + + char c_buff[wb_nums * buff_size]; + + WriteBatch wbs[wb_nums]; + PageEntriesEdit edits[wb_nums]; + + for (size_t i = 0; i < wb_nums; ++i) + { + for (size_t j = 0; j < buff_size; ++j) + { + c_buff[j + i * buff_size] = static_cast((j & 0xff) + i); + } + + ReadBufferPtr buff = std::make_shared(const_cast(c_buff + i * buff_size), buff_size); + wbs[i].putPage(page_id + i, /* tag */ 0, buff, buff_size); + } + WriteLimiterPtr write_limiter = std::make_shared(rate_target, LimiterType::UNKNOW, 20); + + AtomicStopwatch write_watch; + for (size_t i = 0; i < wb_nums; ++i) + { + page_storage->write(std::move(wbs[i]), write_limiter); + } + auto write_elapsed = write_watch.elapsedSeconds(); + auto write_actual_rate = write_limiter->getTotalBytesThrough() / write_elapsed; + + // It must lower than 1.30 + // But we do have some disk rw, so don't set GE + EXPECT_LE(write_actual_rate / rate_target, 1.30); + + Int64 consumed = 0; + auto get_stat = [&consumed]() { + return consumed; + }; + + { + ReadLimiterPtr read_limiter = std::make_shared(get_stat, + rate_target, + LimiterType::UNKNOW); + + AtomicStopwatch read_watch; + for (size_t i = 0; i < wb_nums; ++i) + { + page_storage->readImpl(TEST_NAMESPACE_ID, page_id + i, read_limiter, nullptr, true); + } + + auto read_elapsed = read_watch.elapsedSeconds(); + auto read_actual_rate = read_limiter->getTotalBytesThrough() / read_elapsed; + EXPECT_LE(read_actual_rate / rate_target, 1.30); + } + + { + ReadLimiterPtr read_limiter = std::make_shared(get_stat, + rate_target, + LimiterType::UNKNOW); + + std::vector page_ids; + for (size_t i = 0; i < wb_nums; ++i) + { + page_ids.emplace_back(page_id + i); + } + + AtomicStopwatch read_watch; + page_storage->readImpl(TEST_NAMESPACE_ID, page_ids, read_limiter, nullptr, true); + + auto read_elapsed = read_watch.elapsedSeconds(); + auto read_actual_rate = read_limiter->getTotalBytesThrough() / read_elapsed; + EXPECT_LE(read_actual_rate / rate_target, 1.30); + } +} +CATCH + +TEST_F(PageStorageTest, GCWithReadLimiter) +try +{ + // In this case, WALStore throput is very low. + // Because we only have 10 record to write. + const size_t buff_size = 10ul * 1024; + char c_buff[buff_size]; + + const size_t num_repeat = 5; + + // put page [1,num_repeat] + for (size_t n = 1; n <= num_repeat; ++n) + { + WriteBatch batch; + memset(c_buff, n, buff_size); + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(n, 0, buff, buff_size); + page_storage->write(std::move(batch)); + } + + // put page [num_repeat + 1, num_repeat * 6] + for (size_t n = num_repeat + 1; n <= num_repeat * 6; ++n) + { + WriteBatch batch; + memset(c_buff, n, buff_size); + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buff_size); + page_storage->write(std::move(batch)); + } + + const size_t rate_target = buff_size - 1; + + Int64 consumed = 0; + auto get_stat = [&consumed]() { + return consumed; + }; + ReadLimiterPtr read_limiter = std::make_shared(get_stat, + rate_target, + LimiterType::UNKNOW); + + AtomicStopwatch read_watch; + page_storage->gc(/*not_skip*/ false, nullptr, read_limiter); + + auto elapsed = read_watch.elapsedSeconds(); + auto read_actual_rate = read_limiter->getTotalBytesThrough() / elapsed; + EXPECT_LE(read_actual_rate / rate_target, 1.30); +} +CATCH + +TEST_F(PageStorageTest, GCWithWriteLimiter) +try +{ + // In this case, BlobStore throput is very low. + // Because we only need 1024* 150bytes to new blob. + const size_t buff_size = 10; + char c_buff[buff_size]; + + const size_t num_repeat = 1024 * 300ul; + + for (size_t n = 1; n <= num_repeat; ++n) + { + WriteBatch batch; + memset(c_buff, n, buff_size); + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(n <= num_repeat / 2 ? n : 1, 0, buff, buff_size); + page_storage->write(std::move(batch)); + } + + const size_t rate_target = DB::PAGE_META_ROLL_SIZE - 1; + + WriteLimiterPtr write_limiter = std::make_shared(rate_target, LimiterType::UNKNOW, 20); + + AtomicStopwatch write_watch; + page_storage->gc(/*not_skip*/ false, write_limiter, nullptr); + + auto elapsed = write_watch.elapsedSeconds(); + auto read_actual_rate = write_limiter->getTotalBytesThrough() / elapsed; + + EXPECT_LE(read_actual_rate / rate_target, 1.30); +} +CATCH + +TEST_F(PageStorageTest, GCWithWriteLimiter2) +try +{ + // In this case, BlobStore throput is very low. + // Because we only need 1bytes * to new blob. + const size_t buff_size = 1024 * 300ul; + char c_buff[buff_size]; + + const size_t num_repeat = 8; + + // put page [1,num_repeat] + for (size_t n = 1; n <= num_repeat; ++n) + { + WriteBatch batch; + memset(c_buff, n, buff_size); + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(n, 0, buff, buff_size); + page_storage->write(std::move(batch)); + } + + // put page [num_repeat + 1, num_repeat * 6] + for (size_t n = num_repeat + 1; n <= num_repeat * 6; ++n) + { + WriteBatch batch; + memset(c_buff, n, buff_size); + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buff_size); + page_storage->write(std::move(batch)); + } + + // It is meanless, Because in GC, BlobStore will compact all data(<512M) in single IO + // But we still can make sure through is corrent. + const size_t rate_target = buff_size - 1; + + WriteLimiterPtr write_limiter = std::make_shared(rate_target, LimiterType::UNKNOW, 20); + + AtomicStopwatch write_watch; + page_storage->gc(/*not_skip*/ false, write_limiter, nullptr); + + auto elapsed = write_watch.elapsedSeconds(); + auto read_actual_rate = write_limiter->getTotalBytesThrough() / elapsed; + EXPECT_LE(read_actual_rate / rate_target, 1.30); +} +CATCH + TEST_F(PageStorageTest, WriteReadWithEncryption) try { diff --git a/dbms/src/TestUtils/MockReadLimiter.h b/dbms/src/TestUtils/MockReadLimiter.h new file mode 100644 index 00000000000..8acc96371e3 --- /dev/null +++ b/dbms/src/TestUtils/MockReadLimiter.h @@ -0,0 +1,40 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB +{ +class MockReadLimiter final : public ReadLimiter +{ +public: + MockReadLimiter( + std::function getIOStatistic_, + Int64 rate_limit_per_sec_, + LimiterType type_ = LimiterType::UNKNOW, + Int64 get_io_stat_period_us = 2000, + UInt64 refill_period_ms_ = 100) + : ReadLimiter(getIOStatistic_, rate_limit_per_sec_, type_, get_io_stat_period_us, refill_period_ms_) + { + } + +protected: + void consumeBytes(Int64 bytes) override + { + // Need soft limit here. + WriteLimiter::consumeBytes(bytes); + } +}; + +} // namespace DB \ No newline at end of file From 097f93da581dae61ad78058e0fe496b9b686a580 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Sat, 7 May 2022 12:06:32 +0800 Subject: [PATCH 65/79] BlobStore:Add a block_alignment_bytes to make sure write have been align (#4242) ref pingcap/tiflash#3594 --- dbms/src/Interpreters/Settings.h | 3 +- dbms/src/Server/tests/gtest_server_config.cpp | 10 ++++++ dbms/src/Storages/DeltaMerge/StoragePool.cpp | 3 +- dbms/src/Storages/Page/ConfigSettings.cpp | 1 + dbms/src/Storages/Page/PageStorage.h | 6 +++- dbms/src/Storages/Page/V3/BlobStore.cpp | 34 ++++++++++++++----- dbms/src/Storages/Page/V3/BlobStore.h | 14 ++++++++ dbms/src/Storages/Page/V3/PageEntry.h | 5 +++ dbms/src/Storages/Page/V3/PageStorageImpl.h | 1 + dbms/src/Storages/Page/V3/WAL/serialize.cpp | 2 ++ 10 files changed, 68 insertions(+), 11 deletions(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 37fb6879473..d08644bd83c 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -315,7 +315,8 @@ struct Settings \ M(SettingUInt64, dt_checksum_frame_size, DBMS_DEFAULT_BUFFER_SIZE, "Frame size for delta tree stable storage") \ \ - M(SettingDouble, dt_storage_blob_heavy_gc_valid_rate, 0.2, "Max valid rate of deciding a blob can be compact") \ + M(SettingDouble, dt_storage_blob_heavy_gc_valid_rate, 0.2, "Max valid rate of deciding a blob can be compact") \ + M(SettingDouble, dt_storage_blob_block_alignment_bytes, 0, "Blob IO alignment size") \ \ M(SettingChecksumAlgorithm, dt_checksum_algorithm, ChecksumAlgo::XXH3, "Checksum algorithm for delta tree stable storage") \ M(SettingCompressionMethod, dt_compression_method, CompressionMethod::LZ4, "The method of data compression when writing.") \ diff --git a/dbms/src/Server/tests/gtest_server_config.cpp b/dbms/src/Server/tests/gtest_server_config.cpp index 01f2424d939..69e1fe4cb6a 100644 --- a/dbms/src/Server/tests/gtest_server_config.cpp +++ b/dbms/src/Server/tests/gtest_server_config.cpp @@ -166,6 +166,7 @@ dt_segment_limit_rows = 1000005 dt_enable_rough_set_filter = 0 max_memory_usage = 102000 dt_storage_blob_heavy_gc_valid_rate = 0.2 +dt_storage_blob_block_alignment_bytes = 0 dt_storage_pool_data_gc_min_file_num = 8 dt_storage_pool_data_gc_min_legacy_num = 2 dt_storage_pool_data_gc_min_bytes = 256 @@ -223,6 +224,7 @@ dt_compression_level = 1 ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_legacy_num, 2); ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_bytes, 256); ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_blob_heavy_gc_valid_rate, 0.2); + ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_blob_block_alignment_bytes, 0); ASSERT_EQ(global_ctx.getSettingsRef().dt_segment_delta_small_column_file_size, 8388608); ASSERT_EQ(global_ctx.getSettingsRef().dt_segment_delta_small_column_file_rows, 2048); ASSERT_EQ(global_ctx.getSettingsRef().dt_segment_limit_size, 536870912); @@ -277,6 +279,7 @@ max_rows_in_set = 455 dt_segment_limit_rows = 1000005 dt_enable_rough_set_filter = 0 dt_storage_blob_heavy_gc_valid_rate = 0.3 +dt_storage_blob_block_alignment_bytes = 4096 max_memory_usage = 102000 dt_storage_pool_data_gc_min_file_num = 8 dt_storage_pool_data_gc_min_legacy_num = 2 @@ -299,6 +302,7 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_NE(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_NE(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); EXPECT_NE(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); + EXPECT_NE(cfg.blob_block_alignment_bytes, settings.dt_storage_blob_block_alignment_bytes); EXPECT_NE(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_NE(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); persister.gc(); @@ -310,6 +314,7 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_NE(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_NE(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); EXPECT_EQ(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); + EXPECT_EQ(cfg.blob_block_alignment_bytes, settings.dt_storage_blob_block_alignment_bytes); EXPECT_EQ(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_EQ(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); }; @@ -333,6 +338,7 @@ dt_page_gc_low_write_prob = 0.2 ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_bytes, 256); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_max_valid_rate, 0.5); ASSERT_DOUBLE_EQ(global_ctx.getSettingsRef().dt_storage_blob_heavy_gc_valid_rate, 0.3); + ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_blob_block_alignment_bytes, 4096); ASSERT_EQ(global_ctx.getSettingsRef().dt_open_file_max_idle_seconds, 20); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_page_gc_low_write_prob, 0.2); verify_persister_reload_config(persister); @@ -353,6 +359,7 @@ dt_segment_limit_rows = 1000005 dt_enable_rough_set_filter = 0 max_memory_usage = 102000 dt_storage_blob_heavy_gc_valid_rate = 0.3 +dt_storage_blob_block_alignment_bytes = 4096 dt_storage_pool_data_gc_min_file_num = 8 dt_storage_pool_data_gc_min_legacy_num = 2 dt_storage_pool_data_gc_min_bytes = 256 @@ -374,6 +381,7 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_NE(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_NE(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); EXPECT_NE(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); + EXPECT_NE(cfg.blob_block_alignment_bytes, settings.dt_storage_blob_block_alignment_bytes); EXPECT_NE(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_NE(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); @@ -385,6 +393,7 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_EQ(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); EXPECT_DOUBLE_EQ(cfg.gc_max_valid_rate, settings.dt_storage_pool_data_gc_max_valid_rate); EXPECT_DOUBLE_EQ(cfg.blob_heavy_gc_valid_rate, settings.dt_storage_blob_heavy_gc_valid_rate); + EXPECT_EQ(cfg.blob_block_alignment_bytes, settings.dt_storage_blob_block_alignment_bytes); EXPECT_EQ(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_EQ(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); }; @@ -408,6 +417,7 @@ dt_page_gc_low_write_prob = 0.2 ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_min_bytes, 256); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_storage_pool_data_gc_max_valid_rate, 0.5); ASSERT_DOUBLE_EQ(global_ctx.getSettingsRef().dt_storage_blob_heavy_gc_valid_rate, 0.3); + ASSERT_EQ(global_ctx.getSettingsRef().dt_storage_blob_block_alignment_bytes, 4096); ASSERT_EQ(global_ctx.getSettingsRef().dt_open_file_max_idle_seconds, 20); ASSERT_FLOAT_EQ(global_ctx.getSettingsRef().dt_page_gc_low_write_prob, 0.2); verify_storage_pool_reload_config(storage_pool); diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index 8cc7dd93f48..bee6b174b55 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -43,7 +43,8 @@ PageStorage::Config extractConfig(const Settings & settings, StorageType subtype config.gc_min_bytes = settings.dt_storage_pool_##NAME##_gc_min_bytes; \ config.gc_min_legacy_num = settings.dt_storage_pool_##NAME##_gc_min_legacy_num; \ config.gc_max_valid_rate = settings.dt_storage_pool_##NAME##_gc_max_valid_rate; \ - config.blob_heavy_gc_valid_rate = settings.dt_storage_blob_heavy_gc_valid_rate; + config.blob_heavy_gc_valid_rate = settings.dt_storage_blob_heavy_gc_valid_rate; \ + config.blob_block_alignment_bytes = settings.dt_storage_blob_block_alignment_bytes; PageStorage::Config config = getConfigFromSettings(settings); diff --git a/dbms/src/Storages/Page/ConfigSettings.cpp b/dbms/src/Storages/Page/ConfigSettings.cpp index 5995a657b98..6962dc08baf 100644 --- a/dbms/src/Storages/Page/ConfigSettings.cpp +++ b/dbms/src/Storages/Page/ConfigSettings.cpp @@ -36,6 +36,7 @@ void mergeConfigFromSettings(const DB::Settings & settings, PageStorage::Config // V3 setting which export to global setting config.blob_heavy_gc_valid_rate = settings.dt_storage_blob_heavy_gc_valid_rate; + config.blob_block_alignment_bytes = settings.dt_storage_blob_block_alignment_bytes; } PageStorage::Config getConfigFromSettings(const DB::Settings & settings) diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 501a749aaa1..29fea2d30e3 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -119,6 +119,7 @@ class PageStorage : private boost::noncopyable SettingUInt64 blob_spacemap_type = 2; SettingUInt64 blob_cached_fd_size = BLOBSTORE_CACHED_FD_SIZE; SettingDouble blob_heavy_gc_valid_rate = 0.2; + SettingUInt64 blob_block_alignment_bytes = 0; SettingUInt64 wal_roll_size = PAGE_META_ROLL_SIZE; SettingUInt64 wal_recover_mode = 0; @@ -143,6 +144,8 @@ class PageStorage : private boost::noncopyable blob_spacemap_type = rhs.blob_spacemap_type; blob_cached_fd_size = rhs.blob_cached_fd_size; blob_heavy_gc_valid_rate = rhs.blob_heavy_gc_valid_rate; + blob_block_alignment_bytes = rhs.blob_block_alignment_bytes; + wal_roll_size = rhs.wal_roll_size; wal_recover_mode = rhs.wal_recover_mode; wal_max_persisted_log_files = rhs.wal_max_persisted_log_files; @@ -170,12 +173,13 @@ class PageStorage : private boost::noncopyable return fmt::format( "PageStorage::Config V3 {{" "blob_file_limit_size: {}, blob_spacemap_type: {}, " - "blob_cached_fd_size: {}, blob_heavy_gc_valid_rate: {:.3f}, " + "blob_cached_fd_size: {}, blob_heavy_gc_valid_rate: {:.3f}, blob_block_alignment_bytes: {}, " "wal_roll_size: {}, wal_recover_mode: {}, wal_max_persisted_log_files: {}}}", blob_file_limit_size.get(), blob_spacemap_type.get(), blob_cached_fd_size.get(), blob_heavy_gc_valid_rate.get(), + blob_block_alignment_bytes.get(), wal_roll_size.get(), wal_recover_mode.get(), wal_max_persisted_log_files.get()); diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 526c0d27e25..b95a4521af7 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -151,10 +151,21 @@ PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & wr free(buffer, all_page_data_size); }); char * buffer_pos = buffer; - auto [blob_id, offset_in_file] = getPosFromStats(all_page_data_size); + + // Calculate alignment space + size_t replenish_size = 0; + if (config.block_alignment_bytes != 0 && all_page_data_size % config.block_alignment_bytes != 0) + { + replenish_size = config.block_alignment_bytes - all_page_data_size % config.block_alignment_bytes; + } + + size_t actually_allocated_size = all_page_data_size + replenish_size; + + auto [blob_id, offset_in_file] = getPosFromStats(actually_allocated_size); size_t offset_in_allocated = 0; + for (auto & write : wb.getWrites()) { switch (write.type) @@ -172,6 +183,12 @@ PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & wr entry.offset = offset_in_file + offset_in_allocated; offset_in_allocated += write.size; + // The last put write + if (offset_in_allocated == all_page_data_size) + { + entry.padded_size = replenish_size; + } + digest.update(buffer_pos, write.size); entry.checksum = digest.checksum(); @@ -217,13 +234,14 @@ PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & wr if (buffer_pos != buffer + all_page_data_size) { - removePosFromStats(blob_id, offset_in_file, all_page_data_size); + removePosFromStats(blob_id, offset_in_file, actually_allocated_size); throw Exception( fmt::format( "write batch have a invalid total size, or something wrong in parse write batch " - "[expect_offset={}] [actual_offset={}]", + "[expect_offset={}] [actual_offset={}] [actually_allocated_size={}]", all_page_data_size, - (buffer_pos - buffer)), + (buffer_pos - buffer), + actually_allocated_size), ErrorCodes::LOGICAL_ERROR); } @@ -234,8 +252,8 @@ PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & wr } catch (DB::Exception & e) { - removePosFromStats(blob_id, offset_in_file, all_page_data_size); - LOG_FMT_ERROR(log, "[blob_id={}] [offset_in_file={}] [size={}] write failed.", blob_id, offset_in_file, all_page_data_size); + removePosFromStats(blob_id, offset_in_file, actually_allocated_size); + LOG_FMT_ERROR(log, "[blob_id={}] [offset_in_file={}] [size={}] [actually_allocated_size={}] write failed.", blob_id, offset_in_file, all_page_data_size, actually_allocated_size); throw e; } @@ -256,7 +274,7 @@ void BlobStore::remove(const PageEntriesV3 & del_entries) try { - removePosFromStats(entry.file_id, entry.offset, entry.size); + removePosFromStats(entry.file_id, entry.offset, entry.getTotalSize()); } catch (DB::Exception & e) { @@ -961,7 +979,7 @@ BlobStore::BlobStats::BlobStats(LoggerPtr log_, PSDiskDelegatorPtr delegator_, B void BlobStore::BlobStats::restoreByEntry(const PageEntryV3 & entry) { auto stat = blobIdToStat(entry.file_id); - stat->restoreSpaceMap(entry.offset, entry.size); + stat->restoreSpaceMap(entry.offset, entry.getTotalSize()); } std::pair BlobStore::BlobStats::getBlobIdFromName(String blob_name) diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index ce980c6edc3..5aebc0f128d 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -44,7 +44,21 @@ class BlobStore : private Allocator SettingUInt64 file_limit_size = BLOBFILE_LIMIT_SIZE; SettingUInt64 spacemap_type = SpaceMap::SpaceMapType::SMAP64_STD_MAP; SettingUInt64 cached_fd_size = BLOBSTORE_CACHED_FD_SIZE; + SettingUInt64 block_alignment_bytes = 0; SettingDouble heavy_gc_valid_rate = 0.2; + + String toString() + { + return fmt::format("BlobStore Config Info: " + "[file_limit_size={}],[spacemap_type={}]," + "[cached_fd_size={}],[block_alignment_bytes={}]," + "[heavy_gc_valid_rate={}]", + file_limit_size, + spacemap_type, + cached_fd_size, + block_alignment_bytes, + heavy_gc_valid_rate); + } }; class BlobStats diff --git a/dbms/src/Storages/Page/V3/PageEntry.h b/dbms/src/Storages/Page/V3/PageEntry.h index bbc3915647c..22379611972 100644 --- a/dbms/src/Storages/Page/V3/PageEntry.h +++ b/dbms/src/Storages/Page/V3/PageEntry.h @@ -32,6 +32,7 @@ struct PageEntryV3 public: BlobFileId file_id = 0; // The id of page data persisted in PageSize size = 0; // The size of page data + PageSize padded_size = 0; // The extra align size of page data UInt64 tag = 0; BlobFileOffset offset = 0; // The offset of page data in file UInt64 checksum = 0; // The checksum of whole page data @@ -40,6 +41,10 @@ struct PageEntryV3 PageFieldOffsetChecksums field_offsets{}; public: + PageSize getTotalSize() const + { + return size + padded_size; + } inline bool isValid() const { return file_id != INVALID_BLOBFILE_ID; } size_t getFieldSize(size_t index) const diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index 272cbf73a7d..e3df872b1e1 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -44,6 +44,7 @@ class PageStorageImpl : public DB::PageStorage blob_config.cached_fd_size = config.blob_cached_fd_size; blob_config.spacemap_type = config.blob_spacemap_type; blob_config.heavy_gc_valid_rate = config.blob_heavy_gc_valid_rate; + blob_config.block_alignment_bytes = config.blob_block_alignment_bytes; return blob_config; } diff --git a/dbms/src/Storages/Page/V3/WAL/serialize.cpp b/dbms/src/Storages/Page/V3/WAL/serialize.cpp index a6afbb357f6..45104b50cea 100644 --- a/dbms/src/Storages/Page/V3/WAL/serialize.cpp +++ b/dbms/src/Storages/Page/V3/WAL/serialize.cpp @@ -40,6 +40,7 @@ inline void serializeEntryTo(const PageEntryV3 & entry, WriteBuffer & buf) writeIntBinary(entry.file_id, buf); writeIntBinary(entry.offset, buf); writeIntBinary(entry.size, buf); + writeIntBinary(entry.padded_size, buf); writeIntBinary(entry.checksum, buf); writeIntBinary(entry.tag, buf); // fieldsOffset TODO: compression on `fieldsOffset` @@ -56,6 +57,7 @@ inline void deserializeEntryFrom(ReadBuffer & buf, PageEntryV3 & entry) readIntBinary(entry.file_id, buf); readIntBinary(entry.offset, buf); readIntBinary(entry.size, buf); + readIntBinary(entry.padded_size, buf); readIntBinary(entry.checksum, buf); readIntBinary(entry.tag, buf); // fieldsOffset From 5d461abe94f3b0462c330dec05057eb6c38e9044 Mon Sep 17 00:00:00 2001 From: JaySon Date: Sat, 7 May 2022 12:50:32 +0800 Subject: [PATCH 66/79] Refine some comments about learner read (#4784) ref pingcap/tiflash#4118 --- dbms/src/Common/FailPoint.cpp | 2 +- .../Coprocessor/DAGStorageInterpreter.cpp | 102 ++++++++++++------ .../Flash/Coprocessor/DAGStorageInterpreter.h | 23 ++-- dbms/src/Flash/Coprocessor/TiDBTableScan.h | 2 +- dbms/src/Functions/GeoUtils.h | 1 + dbms/src/Storages/Transaction/LearnerRead.h | 2 +- 6 files changed, 81 insertions(+), 51 deletions(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index 6da54e74e69..8e8b6117def 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -83,7 +83,7 @@ std::unordered_map> FailPointHelper::f M(force_slow_page_storage_snapshot_release) #define APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) \ - M(pause_after_learner_read) \ + M(pause_with_alter_locks_acquired) \ M(hang_in_execution) \ M(pause_before_dt_background_delta_merge) \ M(pause_until_dt_background_delta_merge) \ diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index f514293e7d6..879a8435e0f 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -14,16 +14,20 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -31,13 +35,20 @@ #include #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#pragma GCC diagnostic pop + + namespace DB { namespace FailPoints { extern const char region_exception_after_read_from_storage_some_error[]; extern const char region_exception_after_read_from_storage_all_error[]; -extern const char pause_after_learner_read[]; +extern const char pause_with_alter_locks_acquired[]; extern const char force_remote_read_for_batch_cop[]; extern const char pause_after_copr_streams_acquired[]; } // namespace FailPoints @@ -223,6 +234,9 @@ DAGStorageInterpreter::DAGStorageInterpreter( } } +// Apply learner read to ensure we can get strong consistent with TiKV Region +// leaders. If the local Regions do not match the requested Regions, then build +// request to retry fetching data from other nodes. void DAGStorageInterpreter::execute(DAGPipeline & pipeline) { prepare(); @@ -233,23 +247,29 @@ void DAGStorageInterpreter::execute(DAGPipeline & pipeline) void DAGStorageInterpreter::executeImpl(DAGPipeline & pipeline) { if (!mvcc_query_info->regions_query_info.empty()) - doLocalRead(pipeline, settings.max_block_size); + buildLocalStreams(pipeline, settings.max_block_size); - null_stream_if_empty = std::make_shared(storage_for_logical_table->getSampleBlockForColumns(required_columns)); + // Should build `remote_requests` and `null_stream` under protect of `table_structure_lock`. + auto null_stream_if_empty = std::make_shared(storage_for_logical_table->getSampleBlockForColumns(required_columns)); - // Should build these vars under protect of `table_structure_lock`. - buildRemoteRequests(); + auto remote_requests = buildRemoteRequests(); - releaseAlterLocks(); + // A failpoint to test pause before alter lock released + FAIL_POINT_PAUSE(FailPoints::pause_with_alter_locks_acquired); + // Release alter locks + // The DeltaTree engine ensures that once input streams are created, the caller can get a consistent result + // from those streams even if DDL operations are applied. Release the alter lock so that reading does not + // block DDL operations, keep the drop lock so that the storage not to be dropped during reading. + const TableLockHolders drop_locks = releaseAlterLocks(); // It is impossible to have no joined stream. assert(pipeline.streams_with_non_joined_data.empty()); - // after executeRemoteQuery, remote read stream will be appended in pipeline.streams. + // after buildRemoteStreams, remote read stream will be appended in pipeline.streams. size_t remote_read_streams_start_index = pipeline.streams.size(); // For those regions which are not presented in this tiflash node, we will try to fetch streams by key ranges from other tiflash nodes, only happens in batch cop / mpp mode. if (!remote_requests.empty()) - executeRemoteQuery(pipeline); + buildRemoteStreams(std::move(remote_requests), pipeline); /// record local and remote io input stream auto & table_scan_io_input_streams = dagContext().getInBoundIOInputStreamsMap()[table_scan.getTableScanExecutorID()]; @@ -257,7 +277,7 @@ void DAGStorageInterpreter::executeImpl(DAGPipeline & pipeline) if (pipeline.streams.empty()) { - pipeline.streams.emplace_back(null_stream_if_empty); + pipeline.streams.emplace_back(std::move(null_stream_if_empty)); // reset remote_read_streams_start_index for null_stream_if_empty. remote_read_streams_start_index = 1; } @@ -268,7 +288,7 @@ void DAGStorageInterpreter::executeImpl(DAGPipeline & pipeline) pipeline.transform([&](auto & stream) { // todo do not need to hold all locks in each stream, if the stream is reading from table a // it only needs to hold the lock of table a - for (auto & lock : drop_locks) + for (const auto & lock : drop_locks) stream->addTableLock(lock); }); @@ -290,12 +310,29 @@ void DAGStorageInterpreter::executeImpl(DAGPipeline & pipeline) void DAGStorageInterpreter::prepare() { + // About why we do learner read before acquiring structure lock on Storage(s). + // Assume that: + // 1. Read threads do learner read and wait for the Raft applied index with holding a read lock + // on "alter lock" of an IStorage X + // 2. Raft threads try to decode data for Region in the same IStorage X, and find it need to + // apply DDL operations which acquire write lock on "alter locks" + // Under this situation, all Raft threads will be stuck by the read threads, but read threads + // wait for Raft threads to push forward the applied index. Deadlocks happens!! + // So we must do learner read without structure lock on IStorage. After learner read, acquire the + // structure lock of IStorage(s) (to avoid concurrent issues between read threads and DDL + // operations) and build the requested inputstreams. Once the inputstreams build, we should release + // the alter lock to avoid blocking DDL operations. + // TODO: If we can acquire a read-only view on the IStorage structure (both `ITableDeclaration` + // and `TiDB::TableInfo`) we may get this process more simplified. (tiflash/issues/1853) + + // Do learner read const DAGContext & dag_context = *context.getDAGContext(); if (dag_context.isBatchCop() || dag_context.isMPPTask()) learner_read_snapshot = doBatchCopLearnerRead(); else learner_read_snapshot = doCopLearnerRead(); + // Acquire read lock on `alter lock` and build the requested inputstreams storages_with_structure_lock = getAndLockStorages(settings.schema_version); assert(storages_with_structure_lock.find(logical_table_id) != storages_with_structure_lock.end()); storage_for_logical_table = storages_with_structure_lock[logical_table_id].storage; @@ -303,8 +340,6 @@ void DAGStorageInterpreter::prepare() std::tie(required_columns, source_columns, is_need_add_cast_column) = getColumnsForTableScan(settings.max_columns_to_read); analyzer = std::make_unique(std::move(source_columns), context); - - FAIL_POINT_PAUSE(FailPoints::pause_after_learner_read); } void DAGStorageInterpreter::executePushedDownFilter( @@ -392,7 +427,7 @@ void DAGStorageInterpreter::executeCastAfterTableScan( } } -void DAGStorageInterpreter::executeRemoteQuery(DAGPipeline & pipeline) +void DAGStorageInterpreter::buildRemoteStreams(std::vector && remote_requests, DAGPipeline & pipeline) { assert(!remote_requests.empty()); DAGSchema & schema = remote_requests[0].schema; @@ -464,8 +499,9 @@ LearnerReadSnapshot DAGStorageInterpreter::doCopLearnerRead() { if (table_scan.isPartitionTableScan()) { - throw Exception("Cop request does not support partition table scan"); + throw TiFlashException("Cop request does not support partition table scan", DB::Errors::Coprocessor::BadRequest); } + TablesRegionInfoMap regions_for_local_read; for (const auto physical_table_id : table_scan.getPhysicalTableIDs()) { @@ -481,7 +517,7 @@ LearnerReadSnapshot DAGStorageInterpreter::doCopLearnerRead() if (info_retry) throw RegionException({info_retry->begin()->get().region_id}, status); - return doLearnerRead(logical_table_id, *mvcc_query_info, max_streams, false, context, log); + return doLearnerRead(logical_table_id, *mvcc_query_info, max_streams, /*for_batch_cop=*/false, context, log); } /// Will assign region_retry_from_local_region @@ -517,7 +553,7 @@ LearnerReadSnapshot DAGStorageInterpreter::doBatchCopLearnerRead() } if (mvcc_query_info->regions_query_info.empty()) return {}; - return doLearnerRead(logical_table_id, *mvcc_query_info, max_streams, true, context, log); + return doLearnerRead(logical_table_id, *mvcc_query_info, max_streams, /*for_batch_cop=*/true, context, log); } catch (const LockException & e) { @@ -584,18 +620,18 @@ std::unordered_map DAGStorageInterpreter::generateSele return ret; } -void DAGStorageInterpreter::doLocalRead(DAGPipeline & pipeline, size_t max_block_size) +void DAGStorageInterpreter::buildLocalStreams(DAGPipeline & pipeline, size_t max_block_size) { const DAGContext & dag_context = *context.getDAGContext(); size_t total_local_region_num = mvcc_query_info->regions_query_info.size(); if (total_local_region_num == 0) return; - auto table_query_infos = generateSelectQueryInfos(); - for (auto & table_query_info : table_query_infos) + const auto table_query_infos = generateSelectQueryInfos(); + for (const auto & table_query_info : table_query_infos) { DAGPipeline current_pipeline; - TableID table_id = table_query_info.first; - SelectQueryInfo & query_info = table_query_info.second; + const TableID table_id = table_query_info.first; + const SelectQueryInfo & query_info = table_query_info.second; size_t region_num = query_info.mvcc_query_info->regions_query_info.size(); if (region_num == 0) continue; @@ -613,11 +649,11 @@ void DAGStorageInterpreter::doLocalRead(DAGPipeline & pipeline, size_t max_block { current_pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, current_max_streams); - // After getting streams from storage, we need to validate whether regions have changed or not after learner read. - // In case the versions of regions have changed, those `streams` may contain different data other than expected. - // Like after region merge/split. + // After getting streams from storage, we need to validate whether Regions have changed or not after learner read. + // (by calling `validateQueryInfo`). In case the key ranges of Regions have changed (Region merge/split), those `streams` + // may contain different data other than expected. - // Inject failpoint to throw RegionException + // Inject failpoint to throw RegionException for testing fiu_do_on(FailPoints::region_exception_after_read_from_storage_some_error, { const auto & regions_info = query_info.mvcc_query_info->regions_query_info; RegionException::UnavailableRegions region_ids; @@ -781,7 +817,7 @@ std::unordered_map DAG return {{}, {}, {}, false}; } - if (table_store->engineType() != ::TiDB::StorageEngine::TMT && table_store->engineType() != ::TiDB::StorageEngine::DT) + if (unlikely(table_store->engineType() != ::TiDB::StorageEngine::DT)) { throw TiFlashException( fmt::format( @@ -954,8 +990,10 @@ std::tuple> DAGStorageIn return {required_columns_tmp, source_columns_tmp, need_cast_column}; } -void DAGStorageInterpreter::buildRemoteRequests() +// Build remote requests from `region_retry_from_local_region` and `table_regions_info.remote_regions` +std::vector DAGStorageInterpreter::buildRemoteRequests() { + std::vector remote_requests; std::unordered_map region_id_to_table_id_map; std::unordered_map retry_regions_map; for (const auto physical_table_id : table_scan.getPhysicalTableIDs()) @@ -978,6 +1016,8 @@ void DAGStorageInterpreter::buildRemoteRequests() if (retry_regions.empty()) continue; + // Append the region into DAGContext to return them to the upper layer. + // The upper layer should refresh its cache about these regions. for (const auto & r : retry_regions) context.getDAGContext()->retry_regions.push_back(r.get()); @@ -989,17 +1029,17 @@ void DAGStorageInterpreter::buildRemoteRequests() push_down_filter, log)); } + return remote_requests; } -void DAGStorageInterpreter::releaseAlterLocks() +TableLockHolders DAGStorageInterpreter::releaseAlterLocks() { - // The DeltaTree engine ensures that once input streams are created, the caller can get a consistent result - // from those streams even if DDL operations are applied. Release the alter lock so that reading does not - // block DDL operations, keep the drop lock so that the storage not to be dropped during reading. + TableLockHolders drop_locks; for (auto storage_with_lock : storages_with_structure_lock) { drop_locks.emplace_back(std::get<1>(std::move(storage_with_lock.second.lock).release())); } + return drop_locks; } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h index a1d88083468..56de51385b2 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h @@ -14,13 +14,11 @@ #pragma once -#include #include #include #include #include #include -#include #include #include #include @@ -30,12 +28,6 @@ #include #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#include -#pragma GCC diagnostic pop - #include namespace DB @@ -43,7 +35,7 @@ namespace DB using TablesRegionInfoMap = std::unordered_map>; /// DAGStorageInterpreter encapsulates operations around storage during interprete stage. /// It's only intended to be used by DAGQueryBlockInterpreter. -/// After DAGStorageInterpreter::execute some of its members will be transfered to DAGQueryBlockInterpreter. +/// After DAGStorageInterpreter::execute some of its members will be transferred to DAGQueryBlockInterpreter. class DAGStorageInterpreter { public: @@ -58,7 +50,7 @@ class DAGStorageInterpreter void execute(DAGPipeline & pipeline); - /// Members will be transfered to DAGQueryBlockInterpreter after execute + /// Members will be transferred to DAGQueryBlockInterpreter after execute std::unique_ptr analyzer; @@ -72,15 +64,15 @@ class DAGStorageInterpreter LearnerReadSnapshot doBatchCopLearnerRead(); - void doLocalRead(DAGPipeline & pipeline, size_t max_block_size); + void buildLocalStreams(DAGPipeline & pipeline, size_t max_block_size); std::unordered_map getAndLockStorages(Int64 query_schema_version); std::tuple> getColumnsForTableScan(Int64 max_columns_to_read); - void buildRemoteRequests(); + std::vector buildRemoteRequests(); - void releaseAlterLocks(); + TableLockHolders releaseAlterLocks(); std::unordered_map generateSelectQueryInfos(); @@ -88,7 +80,7 @@ class DAGStorageInterpreter void recordProfileStreams(DAGPipeline & pipeline, const String & key); - void executeRemoteQuery(DAGPipeline & pipeline); + void buildRemoteStreams(std::vector && remote_requests, DAGPipeline & pipeline); void executeCastAfterTableScan( size_t remote_read_streams_start_index, @@ -106,9 +98,6 @@ class DAGStorageInterpreter std::vector is_need_add_cast_column; /// it shouldn't be hash map because duplicated region id may occur if merge regions to retry of dag. RegionRetryList region_retry_from_local_region; - TableLockHolders drop_locks; - std::vector remote_requests; - BlockInputStreamPtr null_stream_if_empty; /// passed from caller, doesn't change during DAGStorageInterpreter's lifetime diff --git a/dbms/src/Flash/Coprocessor/TiDBTableScan.h b/dbms/src/Flash/Coprocessor/TiDBTableScan.h index 3c7703de7bf..934ee2c7769 100644 --- a/dbms/src/Flash/Coprocessor/TiDBTableScan.h +++ b/dbms/src/Flash/Coprocessor/TiDBTableScan.h @@ -49,7 +49,7 @@ class TiDBTableScan { return physical_table_ids; } - String getTableScanExecutorID() const + const String & getTableScanExecutorID() const { return executor_id; } diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index 6bf1f52fbf0..764e7aa5427 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -30,6 +30,7 @@ #endif #pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wunknown-warning-option" #pragma GCC diagnostic ignored "-Wunused-but-set-variable" #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-variable" diff --git a/dbms/src/Storages/Transaction/LearnerRead.h b/dbms/src/Storages/Transaction/LearnerRead.h index 91d027c6599..ab7da31935c 100644 --- a/dbms/src/Storages/Transaction/LearnerRead.h +++ b/dbms/src/Storages/Transaction/LearnerRead.h @@ -30,7 +30,7 @@ struct RegionLearnerReadSnapshot : RegionPtr UInt64 snapshot_event_flag{0}; RegionLearnerReadSnapshot() = default; - RegionLearnerReadSnapshot(const RegionPtr & region) + explicit RegionLearnerReadSnapshot(const RegionPtr & region) : RegionPtr(region) , snapshot_event_flag(region->getSnapshotEventFlag()) {} From 2b2750b7dfd7823be7c33e290492e1f8cffe1a76 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Sat, 7 May 2022 18:22:32 +0800 Subject: [PATCH 67/79] Fix getMaxId won't get the right max id before GC (#4838) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/PageDirectory.cpp | 29 ++++++++++-- .../Page/V3/tests/gtest_page_directory.cpp | 47 +++++++++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index aef4e9e1922..a87b2310d63 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -889,10 +889,31 @@ PageId PageDirectory::getMaxId(NamespaceId ns_id) const // iter is not at the beginning and mvcc_table_directory is not empty, // so iter-- must be a valid iterator, and it's the largest page id which is smaller than the target page id. iter--; - if (iter->first.high == ns_id) - return iter->first.low; - else - return 0; + + do + { + // Can't find any entries in current ns_id + if (iter->first.high != ns_id) + { + break; + } + + // Find the last valid one + if (iter->second->getEntry(UINT64_MAX - 1) != std::nullopt) + { + return iter->first.low; + } + + // Current entry is deleted and there are no entries before it. + if (iter == mvcc_table_directory.begin()) + { + break; + } + + iter--; + } while (true); + + return 0; } } diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index ad00c47c097..a308c11e3f5 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -2054,6 +2054,53 @@ try ASSERT_EQ(dir->getMaxId(medium), 320); ASSERT_EQ(dir->getMaxId(large), 2); } + + { + PageEntriesEdit edit; + edit.del(buildV3Id(medium, 320)); + dir->apply(std::move(edit)); + ASSERT_EQ(dir->getMaxId(medium), 300); + } + + { + PageEntriesEdit edit; + edit.del(buildV3Id(medium, 300)); + dir->apply(std::move(edit)); + ASSERT_EQ(dir->getMaxId(medium), 0); + } +} +CATCH + +TEST_F(PageDirectoryTest, GetMaxIdAfterDelete) +try +{ + PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + { + PageEntriesEdit edit; + edit.put(1, entry1); + edit.put(2, entry2); + dir->apply(std::move(edit)); + } + + ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 2); + + { + PageEntriesEdit edit; + edit.del(2); + dir->apply(std::move(edit)); + } + ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 1); + + { + PageEntriesEdit edit; + edit.del(1); + dir->apply(std::move(edit)); + } + ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); + + dir->gcInMemEntries(); + ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); } CATCH From ad24c1090e8aa9ada41607c01214c7fed704d146 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Sat, 7 May 2022 19:20:33 +0800 Subject: [PATCH 68/79] Enable PageStorage V3 by default in dtworkload tool (#4817) close pingcap/tiflash#4816 --- .../DeltaMerge/tools/workload/MainEntry.cpp | 36 ++++++------------- .../DeltaMerge/tools/workload/Options.cpp | 12 +++++-- .../DeltaMerge/tools/workload/Options.h | 3 ++ .../tools/workload/TableGenerator.cpp | 12 +++---- .../tools/workload/TableGenerator.h | 2 +- 5 files changed, 31 insertions(+), 34 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp index 9730a44c5c9..e18a6ef30a2 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -37,11 +36,12 @@ void initWorkDirs(const std::vector & dirs) { for (const auto & dir : dirs) { - int ret = ::mkdir(dir.c_str(), 0777); - if (ret != 0 && errno != EEXIST) + Poco::File d(dir); + if (d.exists()) { - throw std::runtime_error(fmt::format("mkdir {} failed: {}", dir, strerror(errno))); + d.remove(true); } + d.createDirectories(); } } @@ -56,21 +56,6 @@ void init(WorkloadOptions & opts) opts.initFailpoints(); } -void finish() -{ - log_ofs.close(); -} - -void removeData(Poco::Logger * log, const std::vector & data_dirs) -{ - for (const auto & dir : data_dirs) - { - LOG_FMT_ERROR(log, "rm -rf {}", dir); - Poco::File d(dir); - d.remove(true); - } -} - void outputResultHeader() { std::cout << "Date,Table Schema,Workload,Init Seconds,Write Speed(rows count),Read Speed(rows count)" << std::endl; @@ -131,7 +116,6 @@ void run(WorkloadOptions & opts) { auto * log = &Poco::Logger::get("DTWorkload_main"); LOG_FMT_INFO(log, "{}", opts.toString()); - auto data_dirs = DB::tests::TiFlashTestEnv::getGlobalContext().getPathPool().listPaths(); std::vector stats; try { @@ -139,7 +123,7 @@ void run(WorkloadOptions & opts) auto handle_table = createHandleTable(opts); // Table Schema auto table_gen = TableGenerator::create(opts); - auto table_info = table_gen->get(); + auto table_info = table_gen->get(opts.table_id, opts.table_name); // In this for loop, destory DeltaMergeStore gracefully and recreate it. for (uint64_t i = 0; i < opts.verify_round; i++) { @@ -148,15 +132,14 @@ void run(WorkloadOptions & opts) stats.push_back(workload.getStat()); LOG_FMT_INFO(log, "No.{} Workload {} {}", i, opts.write_key_distribution, stats.back().toStrings()); } - removeData(log, data_dirs); } catch (...) { DB::tryLogCurrentException("exception thrown"); + std::abort(); // Finish testing if some error happened. } outputResult(log, stats, opts); - finish(); } void randomKill(WorkloadOptions & opts, pid_t pid) @@ -231,9 +214,11 @@ void dailyPerformanceTest(WorkloadOptions & opts) { outputResultHeader(); std::vector workloads{"uniform", "normal", "incremental"}; - for (const auto & w : workloads) + for (size_t i = 0; i < workloads.size(); i++) { - opts.write_key_distribution = w; + opts.write_key_distribution = workloads[i]; + opts.table_id = i; + opts.table_name = workloads[i]; ::run(opts); } } @@ -263,6 +248,7 @@ int DTWorkload::mainEntry(int argc, char ** argv) // Log file is created in the first directory of `opts.work_dirs` by default. // So create these work_dirs before logger initialization. + // Attention: This function will remove directory first if `work_dirs` exists. initWorkDirs(opts.work_dirs); // need to init logger before creating global context, // or the logging in global context won't be output to diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp index 21e997949c1..0d2b14d916b 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp @@ -45,7 +45,9 @@ std::string WorkloadOptions::toString(std::string seperator) const fmt::format("testing_type {}{}", testing_type, seperator) + // fmt::format("log_write_request {}{}", log_write_request, seperator) + // fmt::format("enable_ps_v3 {}{}", enable_ps_v3, seperator) + // - fmt::format("bg_thread_count {}{}", bg_thread_count, seperator); + fmt::format("bg_thread_count {}{}", bg_thread_count, seperator) + // + fmt::format("table_id {}{}", table_id, seperator) + // + fmt::format("table_name {}{}", table_name, seperator); } std::pair WorkloadOptions::parseOptions(int argc, char * argv[]) @@ -86,9 +88,12 @@ std::pair WorkloadOptions::parseOptions(int argc, char * argv // ("log_write_request", value()->default_value(false), "") // // - ("enable_ps_v3", value()->default_value(false), "") // + ("enable_ps_v3", value()->default_value(true), "") // // ("bg_thread_count", value()->default_value(4), "") // + // + ("table_name", value()->default_value(""), "") // + ("table_id", value()->default_value(-1), "") // ; boost::program_options::variables_map vm; @@ -155,6 +160,9 @@ std::pair WorkloadOptions::parseOptions(int argc, char * argv bg_thread_count = vm["bg_thread_count"].as(); + table_id = vm["table_id"].as(); + table_name = vm["table_name"].as(); + return {true, toString()}; } diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.h b/dbms/src/Storages/DeltaMerge/tools/workload/Options.h index bc4db947a4e..17c7a5ba61f 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Options.h +++ b/dbms/src/Storages/DeltaMerge/tools/workload/Options.h @@ -57,6 +57,9 @@ struct WorkloadOptions uint64_t bg_thread_count; + int64_t table_id; + std::string table_name; + std::string toString(std::string seperator = "\n") const; std::pair parseOptions(int argc, char * argv[]); void initFailpoints() const; diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp index 8baf986099c..012fe040c99 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp @@ -239,13 +239,13 @@ class RandomTableGenerator : public TableGenerator , rand_gen(std::random_device()()) {} - virtual TableInfo get() override + virtual TableInfo get(int64_t table_id, std::string table_name) override { TableInfo table_info; - table_info.table_id = rand_gen(); + table_info.table_id = table_id < 0 ? rand_gen() : table_id; + table_info.table_name = table_name.empty() ? fmt::format("t_{}", table_info.table_id) : table_name; table_info.db_name = "workload"; - table_info.table_name = fmt::format("random_table_{}", table_info.table_id); auto type = getPkType(); table_info.columns = TablePkType::getDefaultColumns(type); @@ -295,13 +295,13 @@ class RandomTableGenerator : public TableGenerator class ConstantTableGenerator : public TableGenerator { - virtual TableInfo get() override + virtual TableInfo get(int64_t table_id, std::string table_name) override { TableInfo table_info; - table_info.table_id = 0; + table_info.table_id = table_id < 0 ? 0 : table_id; + table_info.table_name = table_name.empty() ? "constant" : table_name; table_info.db_name = "workload"; - table_info.table_name = "constant"; table_info.columns = TablePkType::getDefaultColumns(); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h b/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h index 9e4c1abf8ec..aba5c1590b7 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h +++ b/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h @@ -36,7 +36,7 @@ class TableGenerator public: static std::unique_ptr create(const WorkloadOptions & opts); - virtual TableInfo get() = 0; + virtual TableInfo get(int64_t table_id, std::string table_name) = 0; virtual ~TableGenerator() {} }; From 7ffb94350db2f60634843d37427811f75fdeff8f Mon Sep 17 00:00:00 2001 From: Kira Yoshikage <46625329+ZHANGWENTAI@users.noreply.github.com> Date: Sat, 7 May 2022 20:02:32 +0800 Subject: [PATCH 69/79] Disallow copy and move (#4578) ref pingcap/tiflash#4411 --- dbms/src/Common/ActionBlocker.h | 4 +- dbms/src/Common/COWPtr.h | 4 +- dbms/src/Common/CPUAffinityManager.h | 6 +-- dbms/src/Common/CompactArray.h | 4 +- dbms/src/Common/DNSCache.h | 3 +- dbms/src/Common/ExecutableTask.h | 6 ++- dbms/src/Common/HashTable/FixedHashTable.h | 4 +- dbms/src/Common/HashTable/HashTable.h | 5 +- dbms/src/Common/HashTable/SmallTable.h | 5 +- dbms/src/Common/RWLock.cpp | 5 +- dbms/src/Common/TiFlashMetrics.h | 8 ++- dbms/src/Common/nocopyable.h | 27 ++++++++++ dbms/src/Common/tests/gtest_mpmc_queue.cpp | 7 ++- .../SummingSortedBlockInputStream.h | 23 ++++----- dbms/src/Debug/MockSSTReader.h | 5 +- dbms/src/Encryption/RateLimiter.h | 21 +++++--- ...ateWriteBufferFromFileBaseByFileProvider.h | 4 +- .../Flash/Coprocessor/DAGStorageInterpreter.h | 4 +- dbms/src/IO/DoubleConverter.h | 5 +- dbms/src/IO/ReadBufferAIO.h | 4 +- dbms/src/IO/WriteBufferAIO.h | 4 +- dbms/src/IO/WriteBufferFromString.h | 4 +- dbms/src/Interpreters/Aggregator.h | 10 +++- .../LogicalExpressionsOptimizer.h | 4 +- .../DeltaMerge/ColumnFile/ColumnFile.h | 3 +- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 15 ++++-- dbms/src/Storages/DeltaMerge/Segment.h | 5 +- .../tools/workload/TableGenerator.cpp | 6 +-- dbms/src/Storages/Page/V1/PageEntries.h | 4 +- .../Page/V1/VersionSet/PageEntriesEdit.h | 4 +- dbms/src/Storages/Page/V1/mvcc/VersionSet.h | 7 ++- dbms/src/Storages/Page/V2/PageEntries.h | 4 +- .../Page/V2/VersionSet/PageEntriesEdit.h | 4 +- dbms/src/Storages/Page/V3/LogFile/LogReader.h | 4 +- dbms/src/Storages/Page/V3/LogFile/LogWriter.h | 4 +- dbms/src/Storages/Page/V3/PageDirectory.h | 9 ++-- dbms/src/Storages/Page/V3/PageEntriesEdit.h | 49 ++++++++++++++----- dbms/src/Storages/Page/V3/WAL/WALReader.h | 4 +- dbms/src/Storages/Page/stress/PSWorkload.h | 4 +- dbms/src/Storages/StorageDeltaMerge.h | 5 +- .../DecodingStorageSchemaSnapshot.h | 4 +- .../src/Storages/Transaction/FileEncryption.h | 18 +++---- dbms/src/Storages/Transaction/ProxyFFI.cpp | 3 +- dbms/src/Storages/Transaction/ProxyFFI.h | 4 +- .../src/Storages/Transaction/ProxyFFICommon.h | 12 +++-- .../Storages/Transaction/ReadIndexWorker.h | 3 +- dbms/src/Storages/Transaction/RegionTable.h | 3 +- dbms/src/Storages/Transaction/SSTReader.h | 4 +- .../Storages/Transaction/TableRowIDMinMax.h | 8 +-- dbms/src/Storages/Transaction/TiKVKeyValue.h | 4 +- 50 files changed, 228 insertions(+), 142 deletions(-) create mode 100644 dbms/src/Common/nocopyable.h diff --git a/dbms/src/Common/ActionBlocker.h b/dbms/src/Common/ActionBlocker.h index a1094c940ce..1e1efc21f14 100644 --- a/dbms/src/Common/ActionBlocker.h +++ b/dbms/src/Common/ActionBlocker.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include @@ -60,8 +61,7 @@ class ActionBlocker return *this; } - LockHolder(const LockHolder & other) = delete; - LockHolder & operator=(const LockHolder & other) = delete; + DISALLOW_COPY(LockHolder); ~LockHolder() { diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index 95f355e2f30..1f6bb8dacbb 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include #include @@ -121,7 +123,7 @@ class COWPtr : public boost::intrusive_ref_counter public: /// Copy: not possible. - mutable_ptr(const mutable_ptr &) = delete; + DISALLOW_COPY(mutable_ptr); /// Move: ok. mutable_ptr(mutable_ptr &&) = default; diff --git a/dbms/src/Common/CPUAffinityManager.h b/dbms/src/Common/CPUAffinityManager.h index 5de62cf8368..33b8c34a66c 100644 --- a/dbms/src/Common/CPUAffinityManager.h +++ b/dbms/src/Common/CPUAffinityManager.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -126,9 +127,6 @@ class CPUAffinityManager CPUAffinityManager(); // Disable copy and move public: - CPUAffinityManager(const CPUAffinityManager &) = delete; - CPUAffinityManager & operator=(const CPUAffinityManager &) = delete; - CPUAffinityManager(CPUAffinityManager &&) = delete; - CPUAffinityManager & operator=(CPUAffinityManager &&) = delete; + DISALLOW_COPY_AND_MOVE(CPUAffinityManager); }; } // namespace DB diff --git a/dbms/src/Common/CompactArray.h b/dbms/src/Common/CompactArray.h index 3c6470fddf7..21d6a57f4b1 100644 --- a/dbms/src/Common/CompactArray.h +++ b/dbms/src/Common/CompactArray.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -105,8 +106,7 @@ class CompactArray::Reader final { } - Reader(const Reader &) = delete; - Reader & operator=(const Reader &) = delete; + DISALLOW_COPY(Reader); bool next() { diff --git a/dbms/src/Common/DNSCache.h b/dbms/src/Common/DNSCache.h index df11e7b8eb0..682b19f0bcf 100644 --- a/dbms/src/Common/DNSCache.h +++ b/dbms/src/Common/DNSCache.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include @@ -27,7 +28,7 @@ namespace DB class DNSCache : public ext::Singleton { public: - DNSCache(const DNSCache &) = delete; + DISALLOW_COPY(DNSCache); /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolve its IP Poco::Net::IPAddress resolveHost(const std::string & host); diff --git a/dbms/src/Common/ExecutableTask.h b/dbms/src/Common/ExecutableTask.h index ccdc317cc71..834a0b12133 100644 --- a/dbms/src/Common/ExecutableTask.h +++ b/dbms/src/Common/ExecutableTask.h @@ -14,6 +14,8 @@ #pragma once +#include + #include namespace DB @@ -24,8 +26,8 @@ class IExecutableTask IExecutableTask() = default; virtual ~IExecutableTask() = default; - IExecutableTask(const IExecutableTask & rhs) = delete; - IExecutableTask & operator=(const IExecutableTask & rhs) = delete; + + DISALLOW_COPY(IExecutableTask); IExecutableTask(IExecutableTask && other) = default; IExecutableTask & operator=(IExecutableTask && other) = default; diff --git a/dbms/src/Common/HashTable/FixedHashTable.h b/dbms/src/Common/HashTable/FixedHashTable.h index aca4b42da91..87ccb0992c9 100644 --- a/dbms/src/Common/HashTable/FixedHashTable.h +++ b/dbms/src/Common/HashTable/FixedHashTable.h @@ -15,6 +15,7 @@ #pragma once #include +#include namespace DB { @@ -258,8 +259,7 @@ class FixedHashTable : private boost::noncopyable : in(in_) {} - Reader(const Reader &) = delete; - Reader & operator=(const Reader &) = delete; + DISALLOW_COPY(Reader); bool next() { diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index c8530b52382..2c857b9bc1b 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include + #ifdef DBMS_HASH_MAP_DEBUG_RESIZES #include @@ -770,8 +772,7 @@ class HashTable : private boost::noncopyable { } - Reader(const Reader &) = delete; - Reader & operator=(const Reader &) = delete; + DISALLOW_COPY(Reader); bool next() { diff --git a/dbms/src/Common/HashTable/SmallTable.h b/dbms/src/Common/HashTable/SmallTable.h index 758c7ebea4c..74d4731b40a 100644 --- a/dbms/src/Common/HashTable/SmallTable.h +++ b/dbms/src/Common/HashTable/SmallTable.h @@ -15,7 +15,7 @@ #pragma once #include - +#include namespace DB { @@ -95,8 +95,7 @@ class SmallTable : private boost::noncopyable { } - Reader(const Reader &) = delete; - Reader & operator=(const Reader &) = delete; + DISALLOW_COPY(Reader); bool next() { diff --git a/dbms/src/Common/RWLock.cpp b/dbms/src/Common/RWLock.cpp index 084e284ba7c..658477e2361 100644 --- a/dbms/src/Common/RWLock.cpp +++ b/dbms/src/Common/RWLock.cpp @@ -17,7 +17,7 @@ #include #include #include - +#include namespace ProfileEvents { @@ -59,8 +59,7 @@ class RWLock::LockHolderImpl GroupsContainer::iterator it_group; public: - LockHolderImpl(const LockHolderImpl & other) = delete; - LockHolderImpl & operator=(const LockHolderImpl & other) = delete; + DISALLOW_COPY(LockHolderImpl); /// Implicit memory allocation for query_id is done here LockHolderImpl(const String & query_id_, Type type) diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index b67b263a617..b3ddbd7fe5c 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include #include @@ -24,6 +25,7 @@ #include + // to make GCC 11 happy #include @@ -341,11 +343,7 @@ class TiFlashMetrics } APPLY_FOR_METRICS(MAKE_METRIC_MEMBER_M, MAKE_METRIC_MEMBER_F) - TiFlashMetrics(const TiFlashMetrics &) = delete; - TiFlashMetrics & operator=(const TiFlashMetrics &) = delete; - - TiFlashMetrics(TiFlashMetrics &&) = delete; - TiFlashMetrics & operator=(TiFlashMetrics &&) = delete; + DISALLOW_COPY_AND_MOVE(TiFlashMetrics); friend class MetricsPrometheus; }; diff --git a/dbms/src/Common/nocopyable.h b/dbms/src/Common/nocopyable.h new file mode 100644 index 00000000000..7b198489830 --- /dev/null +++ b/dbms/src/Common/nocopyable.h @@ -0,0 +1,27 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#define DISALLOW_COPY(ClassName) \ + ClassName(const ClassName &) = delete; \ + ClassName & operator=(const ClassName &) = delete + +#define DISALLOW_MOVE(ClassName) \ + ClassName(ClassName &&) = delete; \ + ClassName & operator=(ClassName &&) = delete + +#define DISALLOW_COPY_AND_MOVE(ClassName) \ + DISALLOW_COPY(ClassName); \ + DISALLOW_MOVE(ClassName) diff --git a/dbms/src/Common/tests/gtest_mpmc_queue.cpp b/dbms/src/Common/tests/gtest_mpmc_queue.cpp index 52a4c5167e7..85ad1892067 100644 --- a/dbms/src/Common/tests/gtest_mpmc_queue.cpp +++ b/dbms/src/Common/tests/gtest_mpmc_queue.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -465,13 +466,15 @@ class MPMCQueueTest : public ::testing::Test struct ThrowInjectable { ThrowInjectable() = default; - ThrowInjectable(const ThrowInjectable &) = delete; + + DISALLOW_COPY(ThrowInjectable); + ThrowInjectable(ThrowInjectable && rhs) { throwOrMove(std::move(rhs)); } - ThrowInjectable & operator=(const ThrowInjectable &) = delete; + ThrowInjectable & operator=(ThrowInjectable && rhs) { if (this != &rhs) diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.h b/dbms/src/DataStreams/SummingSortedBlockInputStream.h index 3fe429f7fc6..deeb16c3215 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.h @@ -14,11 +14,12 @@ #pragma once -#include +#include +#include +#include #include +#include #include -#include -#include namespace DB @@ -26,7 +27,7 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; +extern const int LOGICAL_ERROR; } @@ -121,7 +122,7 @@ class SummingSortedBlockInputStream : public MergingSortedBlockInputStream AggregateDescription() = default; AggregateDescription(AggregateDescription &&) = default; - AggregateDescription(const AggregateDescription &) = delete; + DISALLOW_COPY(AggregateDescription); }; /// Stores numbers of key-columns and value-columns. @@ -134,14 +135,14 @@ class SummingSortedBlockInputStream : public MergingSortedBlockInputStream std::vector columns_to_aggregate; std::vector maps_to_sum; - RowRef current_key; /// The current primary key. - RowRef next_key; /// The primary key of the next row. + RowRef current_key; /// The current primary key. + RowRef next_key; /// The primary key of the next row. Row current_row; - bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. + bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. - bool output_is_non_empty = false; /// Have we given out at least one row as a result. - size_t merged_rows = 0; /// Number of rows merged into current result block + bool output_is_non_empty = false; /// Have we given out at least one row as a result. + size_t merged_rows = 0; /// Number of rows merged into current result block /** We support two different cursors - with Collation and without. * Templates are used instead of polymorphic SortCursor and calls to virtual functions. @@ -159,4 +160,4 @@ class SummingSortedBlockInputStream : public MergingSortedBlockInputStream void addRow(SortCursor & cursor); }; -} +} // namespace DB diff --git a/dbms/src/Debug/MockSSTReader.h b/dbms/src/Debug/MockSSTReader.h index e916b915a27..99e166dc9ce 100644 --- a/dbms/src/Debug/MockSSTReader.h +++ b/dbms/src/Debug/MockSSTReader.h @@ -14,10 +14,12 @@ #pragma once +#include #include #include + namespace DB { @@ -35,8 +37,7 @@ struct MockSSTReader using Key = std::pair; struct Data : std::vector> { - Data(const Data &) = delete; - Data & operator=(const Data &) = delete; + DISALLOW_COPY(Data); Data(Data &&) = default; Data & operator=(Data &&) = default; Data() = default; diff --git a/dbms/src/Encryption/RateLimiter.h b/dbms/src/Encryption/RateLimiter.h index 7f2f19822f3..f44beeb8ed7 100644 --- a/dbms/src/Encryption/RateLimiter.h +++ b/dbms/src/Encryption/RateLimiter.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -179,7 +180,10 @@ class ReadLimiter : public WriteLimiter std::function getIOStatistic; Int64 last_stat_bytes; using TimePoint = std::chrono::time_point; - static TimePoint now() { return std::chrono::time_point_cast(std::chrono::system_clock::now()); } + static TimePoint now() + { + return std::chrono::time_point_cast(std::chrono::system_clock::now()); + } TimePoint last_stat_time; Poco::Logger * log; @@ -262,10 +266,7 @@ class IORateLimiter std::thread auto_tune_thread; // Noncopyable and nonmovable. - IORateLimiter(const IORateLimiter & limiter) = delete; - IORateLimiter & operator=(const IORateLimiter & limiter) = delete; - IORateLimiter(IORateLimiter && limiter) = delete; - IORateLimiter && operator=(IORateLimiter && limiter) = delete; + DISALLOW_COPY_AND_MOVE(IORateLimiter); }; class LimiterStat @@ -371,8 +372,14 @@ class IOLimitTuner { return writeLimiterCount() + readLimiterCount(); } - int writeLimiterCount() const { return (bg_write_stat != nullptr) + (fg_write_stat != nullptr); } - int readLimiterCount() const { return (bg_read_stat != nullptr) + (fg_read_stat != nullptr); } + int writeLimiterCount() const + { + return (bg_write_stat != nullptr) + (fg_write_stat != nullptr); + } + int readLimiterCount() const + { + return (bg_read_stat != nullptr) + (fg_read_stat != nullptr); + } // Background write and foreground write Int64 avgWriteBytesPerSec() const diff --git a/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.h b/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.h index e3fc6ceaa63..36f9e861282 100644 --- a/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.h +++ b/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -96,8 +97,7 @@ class WriteBufferByFileProviderBuilder , write_limiter(write_limiter) {} - WriteBufferByFileProviderBuilder(const WriteBufferByFileProviderBuilder &) = delete; - WriteBufferByFileProviderBuilder & operator=(const WriteBufferByFileProviderBuilder &) = delete; + DISALLOW_COPY(WriteBufferByFileProviderBuilder); std::unique_ptr build() diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h index 56de51385b2..d86274a1e22 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -45,8 +46,7 @@ class DAGStorageInterpreter const PushDownFilter & push_down_filter_, size_t max_streams_); - DAGStorageInterpreter(DAGStorageInterpreter &&) = delete; - DAGStorageInterpreter & operator=(DAGStorageInterpreter &&) = delete; + DISALLOW_MOVE(DAGStorageInterpreter); void execute(DAGPipeline & pipeline); diff --git a/dbms/src/IO/DoubleConverter.h b/dbms/src/IO/DoubleConverter.h index 76805ef9877..4a105db1d3a 100644 --- a/dbms/src/IO/DoubleConverter.h +++ b/dbms/src/IO/DoubleConverter.h @@ -14,9 +14,9 @@ #pragma once +#include #include - namespace DB { template @@ -53,8 +53,7 @@ class DoubleConverter return instance; } - DoubleConverter(const DoubleConverter &) = delete; - DoubleConverter & operator=(const DoubleConverter &) = delete; + DISALLOW_COPY(DoubleConverter); }; } // namespace DB diff --git a/dbms/src/IO/ReadBufferAIO.h b/dbms/src/IO/ReadBufferAIO.h index 89e73410fc1..56173a11543 100644 --- a/dbms/src/IO/ReadBufferAIO.h +++ b/dbms/src/IO/ReadBufferAIO.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -48,8 +49,7 @@ class ReadBufferAIO : public ReadBufferFromFileBase char * existing_memory_ = nullptr); ~ReadBufferAIO() override; - ReadBufferAIO(const ReadBufferAIO &) = delete; - ReadBufferAIO & operator=(const ReadBufferAIO &) = delete; + DISALLOW_COPY(ReadBufferAIO); void setMaxBytes(size_t max_bytes_read_); off_t getPositionInFile() override { return first_unread_pos_in_file - (working_buffer.end() - pos); } diff --git a/dbms/src/IO/WriteBufferAIO.h b/dbms/src/IO/WriteBufferAIO.h index be4b2c51e4a..0f8081290eb 100644 --- a/dbms/src/IO/WriteBufferAIO.h +++ b/dbms/src/IO/WriteBufferAIO.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -48,8 +49,7 @@ class WriteBufferAIO : public WriteBufferFromFileBase char * existing_memory_ = nullptr); ~WriteBufferAIO() override; - WriteBufferAIO(const WriteBufferAIO &) = delete; - WriteBufferAIO & operator=(const WriteBufferAIO &) = delete; + DISALLOW_COPY(WriteBufferAIO); off_t getPositionInFile() override; void sync() override; diff --git a/dbms/src/IO/WriteBufferFromString.h b/dbms/src/IO/WriteBufferFromString.h index e08a6049cec..7c3f533cb25 100644 --- a/dbms/src/IO/WriteBufferFromString.h +++ b/dbms/src/IO/WriteBufferFromString.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -60,8 +61,7 @@ class WriteBufferFromOwnString , WriteBufferFromString(value) {} - WriteBufferFromOwnString(WriteBufferFromOwnString && rhs) = delete; - WriteBufferFromOwnString & operator=(WriteBufferFromOwnString && rhs) = delete; + DISALLOW_MOVE(WriteBufferFromOwnString); StringRef stringRef() const { return isFinished() ? StringRef(value) : StringRef(value.data(), pos - value.data()); } diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index b3bb537dc2e..052e3dbdcbb 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -501,8 +501,14 @@ struct AggregatedDataVariants : private boost::noncopyable : aggregates_pools(1, std::make_shared()) , aggregates_pool(aggregates_pools.back().get()) {} - bool empty() const { return type == Type::EMPTY; } - void invalidate() { type = Type::EMPTY; } + bool empty() const + { + return type == Type::EMPTY; + } + void invalidate() + { + type = Type::EMPTY; + } ~AggregatedDataVariants(); diff --git a/dbms/src/Interpreters/LogicalExpressionsOptimizer.h b/dbms/src/Interpreters/LogicalExpressionsOptimizer.h index 8939a876615..ac15ae7e05a 100644 --- a/dbms/src/Interpreters/LogicalExpressionsOptimizer.h +++ b/dbms/src/Interpreters/LogicalExpressionsOptimizer.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -46,8 +47,7 @@ class LogicalExpressionsOptimizer final */ void perform(); - LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete; - LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete; + DISALLOW_COPY(LogicalExpressionsOptimizer); private: /** The OR function with the expression. diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h index 14230016405..00731068858 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFile.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -130,7 +131,7 @@ class ColumnFileReader public: virtual ~ColumnFileReader() = default; ColumnFileReader() = default; - ColumnFileReader(const ColumnFileReader & o) = delete; + DISALLOW_COPY(ColumnFileReader); /// Read data from this reader and store the result into output_cols. /// Note that if "range" is specified, then the caller must guarantee that the rows between [rows_offset, rows_offset + rows_limit) are sorted. diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 4f831ddfe0e..de56a622978 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -417,7 +417,10 @@ class DeltaMergeStore : private boost::noncopyable DMContextPtr newDMContext(const Context & db_context, const DB::Settings & db_settings, const String & tracing_id = ""); - static bool pkIsHandle(const ColumnDefine & handle_define) { return handle_define.id != EXTRA_HANDLE_COLUMN_ID; } + static bool pkIsHandle(const ColumnDefine & handle_define) + { + return handle_define.id != EXTRA_HANDLE_COLUMN_ID; + } void waitForWrite(const DMContextPtr & context, const SegmentPtr & segment); void waitForDeleteRange(const DMContextPtr & context, const SegmentPtr & segment); @@ -437,8 +440,14 @@ class DeltaMergeStore : private boost::noncopyable bool handleBackgroundTask(bool heavy); // isSegmentValid should be protected by lock on `read_write_mutex` - inline bool isSegmentValid(std::shared_lock &, const SegmentPtr & segment) { return doIsSegmentValid(segment); } - inline bool isSegmentValid(std::unique_lock &, const SegmentPtr & segment) { return doIsSegmentValid(segment); } + inline bool isSegmentValid(std::shared_lock &, const SegmentPtr & segment) + { + return doIsSegmentValid(segment); + } + inline bool isSegmentValid(std::unique_lock &, const SegmentPtr & segment) + { + return doIsSegmentValid(segment); + } bool doIsSegmentValid(const SegmentPtr & segment); void restoreStableFiles(); diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index d1c2da92898..a6328d24128 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -105,9 +106,7 @@ class Segment : private boost::noncopyable StableValueSpacePtr other_stable; }; - Segment(const Segment &) = delete; - Segment & operator=(const Segment &) = delete; - Segment & operator=(Segment &&) = delete; + DISALLOW_COPY_AND_MOVE(Segment); Segment( UInt64 epoch_, diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp index 012fe040c99..cf52e808ab1 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -44,10 +45,7 @@ std::vector TableInfo::toStrings() const class TablePkType { public: - TablePkType(const TablePkType &) = delete; - TablePkType(TablePkType &&) = delete; - TablePkType & operator=(const TablePkType &) = delete; - TablePkType && operator=(TablePkType &&) = delete; + DISALLOW_COPY_AND_MOVE(TablePkType); static TablePkType & instance() { diff --git a/dbms/src/Storages/Page/V1/PageEntries.h b/dbms/src/Storages/Page/V1/PageEntries.h index 6a5b737512f..6b22c362643 100644 --- a/dbms/src/Storages/Page/V1/PageEntries.h +++ b/dbms/src/Storages/Page/V1/PageEntries.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -198,8 +199,7 @@ class PageEntriesMixin public: // no copying allowed - PageEntriesMixin(const PageEntriesMixin &) = delete; - PageEntriesMixin & operator=(const PageEntriesMixin &) = delete; + DISALLOW_COPY(PageEntriesMixin); // only move allowed PageEntriesMixin(PageEntriesMixin && rhs) noexcept : PageEntriesMixin(true) diff --git a/dbms/src/Storages/Page/V1/VersionSet/PageEntriesEdit.h b/dbms/src/Storages/Page/V1/VersionSet/PageEntriesEdit.h index 684bac1753b..0af786a1add 100644 --- a/dbms/src/Storages/Page/V1/VersionSet/PageEntriesEdit.h +++ b/dbms/src/Storages/Page/V1/VersionSet/PageEntriesEdit.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -82,8 +83,7 @@ class PageEntriesEdit public: // No copying allowed - PageEntriesEdit(const PageEntriesEdit &) = delete; - PageEntriesEdit & operator=(const PageEntriesEdit &) = delete; + DISALLOW_COPY(PageEntriesEdit); // Only move allowed PageEntriesEdit(PageEntriesEdit && rhs) noexcept : PageEntriesEdit() diff --git a/dbms/src/Storages/Page/V1/mvcc/VersionSet.h b/dbms/src/Storages/Page/V1/mvcc/VersionSet.h index e41517a822e..1dd4660ee5d 100644 --- a/dbms/src/Storages/Page/V1/mvcc/VersionSet.h +++ b/dbms/src/Storages/Page/V1/mvcc/VersionSet.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -219,8 +220,7 @@ class VersionSet public: // No copying allowed. - Snapshot(const Snapshot &) = delete; - Snapshot & operator=(const Snapshot &) = delete; + DISALLOW_COPY(Snapshot); }; using SnapshotPtr = std::shared_ptr; @@ -257,8 +257,7 @@ class VersionSet public: // No copying allowed - VersionSet(const VersionSet &) = delete; - VersionSet & operator=(const VersionSet &) = delete; + DISALLOW_COPY(VersionSet); }; diff --git a/dbms/src/Storages/Page/V2/PageEntries.h b/dbms/src/Storages/Page/V2/PageEntries.h index 84f8428364f..c99e0dade6b 100644 --- a/dbms/src/Storages/Page/V2/PageEntries.h +++ b/dbms/src/Storages/Page/V2/PageEntries.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -274,8 +275,7 @@ class PageEntriesMixin public: // no copying allowed - PageEntriesMixin(const PageEntriesMixin &) = delete; - PageEntriesMixin & operator=(const PageEntriesMixin &) = delete; + DISALLOW_COPY(PageEntriesMixin); // only move allowed PageEntriesMixin(PageEntriesMixin && rhs) noexcept : PageEntriesMixin(true) diff --git a/dbms/src/Storages/Page/V2/VersionSet/PageEntriesEdit.h b/dbms/src/Storages/Page/V2/VersionSet/PageEntriesEdit.h index 609cce19518..814736476f7 100644 --- a/dbms/src/Storages/Page/V2/VersionSet/PageEntriesEdit.h +++ b/dbms/src/Storages/Page/V2/VersionSet/PageEntriesEdit.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -90,8 +91,7 @@ class PageEntriesEdit public: // No copying allowed - PageEntriesEdit(const PageEntriesEdit &) = delete; - PageEntriesEdit & operator=(const PageEntriesEdit &) = delete; + DISALLOW_COPY(PageEntriesEdit); // Only move allowed PageEntriesEdit(PageEntriesEdit && rhs) noexcept : PageEntriesEdit() diff --git a/dbms/src/Storages/Page/V3/LogFile/LogReader.h b/dbms/src/Storages/Page/V3/LogFile/LogReader.h index 6ba98da6eb3..617b2ae1629 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogReader.h +++ b/dbms/src/Storages/Page/V3/LogFile/LogReader.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -51,8 +52,7 @@ class LogReader Format::LogNumberType log_num_, WALRecoveryMode recovery_mode_); - LogReader(const LogReader &) = delete; - LogReader & operator=(const LogReader &) = delete; + DISALLOW_COPY(LogReader); virtual ~LogReader(); diff --git a/dbms/src/Storages/Page/V3/LogFile/LogWriter.h b/dbms/src/Storages/Page/V3/LogFile/LogWriter.h index 4599c6105fb..6cd8f3b46b4 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogWriter.h +++ b/dbms/src/Storages/Page/V3/LogFile/LogWriter.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -80,8 +81,7 @@ class LogWriter final : private Allocator bool recycle_log_files_, bool manual_flush_ = false); - LogWriter(const LogWriter &) = delete; - LogWriter & operator=(const LogWriter &) = delete; + DISALLOW_COPY(LogWriter); ~LogWriter(); diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index 8263f5ef5bb..7f56676e363 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -362,12 +363,8 @@ class PageDirectory return mvcc_table_directory.size(); } - // No copying - PageDirectory(const PageDirectory &) = delete; - PageDirectory & operator=(const PageDirectory &) = delete; - // No moving - PageDirectory(PageDirectory && rhs) = delete; - PageDirectory & operator=(PageDirectory && rhs) = delete; + // No copying and no moving + DISALLOW_COPY_AND_MOVE(PageDirectory); friend class PageDirectoryFactory; friend class PageStorageControl; diff --git a/dbms/src/Storages/Page/V3/PageEntriesEdit.h b/dbms/src/Storages/Page/V3/PageEntriesEdit.h index 84fa18bd5d8..1702b9e575f 100644 --- a/dbms/src/Storages/Page/V3/PageEntriesEdit.h +++ b/dbms/src/Storages/Page/V3/PageEntriesEdit.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -250,15 +251,42 @@ class PageEntriesEdit #ifndef NDEBUG // Just for tests, refactor them out later - void put(PageId page_id, const PageEntryV3 & entry) { put(buildV3Id(TEST_NAMESPACE_ID, page_id), entry); } - void putExternal(PageId page_id) { putExternal(buildV3Id(TEST_NAMESPACE_ID, page_id)); } - void upsertPage(PageId page_id, const PageVersionType & ver, const PageEntryV3 & entry) { upsertPage(buildV3Id(TEST_NAMESPACE_ID, page_id), ver, entry); } - void del(PageId page_id) { del(buildV3Id(TEST_NAMESPACE_ID, page_id)); } - void ref(PageId ref_id, PageId page_id) { ref(buildV3Id(TEST_NAMESPACE_ID, ref_id), buildV3Id(TEST_NAMESPACE_ID, page_id)); } - void varRef(PageId ref_id, const PageVersionType & ver, PageId ori_page_id) { varRef(buildV3Id(TEST_NAMESPACE_ID, ref_id), ver, buildV3Id(TEST_NAMESPACE_ID, ori_page_id)); } - void varExternal(PageId page_id, const PageVersionType & create_ver, Int64 being_ref_count) { varExternal(buildV3Id(TEST_NAMESPACE_ID, page_id), create_ver, being_ref_count); } - void varEntry(PageId page_id, const PageVersionType & ver, const PageEntryV3 & entry, Int64 being_ref_count) { varEntry(buildV3Id(TEST_NAMESPACE_ID, page_id), ver, entry, being_ref_count); } - void varDel(PageId page_id, const PageVersionType & delete_ver) { varDel(buildV3Id(TEST_NAMESPACE_ID, page_id), delete_ver); } + void put(PageId page_id, const PageEntryV3 & entry) + { + put(buildV3Id(TEST_NAMESPACE_ID, page_id), entry); + } + void putExternal(PageId page_id) + { + putExternal(buildV3Id(TEST_NAMESPACE_ID, page_id)); + } + void upsertPage(PageId page_id, const PageVersionType & ver, const PageEntryV3 & entry) + { + upsertPage(buildV3Id(TEST_NAMESPACE_ID, page_id), ver, entry); + } + void del(PageId page_id) + { + del(buildV3Id(TEST_NAMESPACE_ID, page_id)); + } + void ref(PageId ref_id, PageId page_id) + { + ref(buildV3Id(TEST_NAMESPACE_ID, ref_id), buildV3Id(TEST_NAMESPACE_ID, page_id)); + } + void varRef(PageId ref_id, const PageVersionType & ver, PageId ori_page_id) + { + varRef(buildV3Id(TEST_NAMESPACE_ID, ref_id), ver, buildV3Id(TEST_NAMESPACE_ID, ori_page_id)); + } + void varExternal(PageId page_id, const PageVersionType & create_ver, Int64 being_ref_count) + { + varExternal(buildV3Id(TEST_NAMESPACE_ID, page_id), create_ver, being_ref_count); + } + void varEntry(PageId page_id, const PageVersionType & ver, const PageEntryV3 & entry, Int64 being_ref_count) + { + varEntry(buildV3Id(TEST_NAMESPACE_ID, page_id), ver, entry, being_ref_count); + } + void varDel(PageId page_id, const PageVersionType & delete_ver) + { + varDel(buildV3Id(TEST_NAMESPACE_ID, page_id), delete_ver); + } #endif private: @@ -266,8 +294,7 @@ class PageEntriesEdit public: // No copying allowed - PageEntriesEdit(const PageEntriesEdit &) = delete; - PageEntriesEdit & operator=(const PageEntriesEdit &) = delete; + DISALLOW_COPY(PageEntriesEdit); // Only move allowed PageEntriesEdit(PageEntriesEdit && rhs) noexcept : PageEntriesEdit() diff --git a/dbms/src/Storages/Page/V3/WAL/WALReader.h b/dbms/src/Storages/Page/V3/WAL/WALReader.h index b12f2f35e9d..3443cc605c0 100644 --- a/dbms/src/Storages/Page/V3/WAL/WALReader.h +++ b/dbms/src/Storages/Page/V3/WAL/WALReader.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -91,8 +92,7 @@ class WALStoreReader WALRecoveryMode recovery_mode_, const ReadLimiterPtr & read_limiter_); - WALStoreReader(const WALStoreReader &) = delete; - WALStoreReader & operator=(const WALStoreReader &) = delete; + DISALLOW_COPY(WALStoreReader); private: bool openNextFile(); diff --git a/dbms/src/Storages/Page/stress/PSWorkload.h b/dbms/src/Storages/Page/stress/PSWorkload.h index 62c2c074045..cb099b4203a 100644 --- a/dbms/src/Storages/Page/stress/PSWorkload.h +++ b/dbms/src/Storages/Page/stress/PSWorkload.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include #include @@ -126,8 +127,7 @@ class StressWorkloadManger StressWorkloadManger() = default; public: - StressWorkloadManger(const StressWorkloadManger &) = delete; - StressWorkloadManger(StressWorkloadManger &&) = delete; + DISALLOW_COPY_AND_MOVE(StressWorkloadManger); static StressWorkloadManger & getInstance() { diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index 84ae387ecee..a6e61f3bebe 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -178,7 +178,10 @@ class StorageDeltaMerge DataTypePtr getPKTypeImpl() const override; DM::DeltaMergeStorePtr & getAndMaybeInitStore(); - bool storeInited() const { return store_inited.load(std::memory_order_acquire); } + bool storeInited() const + { + return store_inited.load(std::memory_order_acquire); + } void updateTableColumnInfo(); DM::ColumnDefines getStoreColumnDefines() const; bool dataDirExist(); diff --git a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h index f2fd58c86c5..6cedbe3f0c0 100644 --- a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h +++ b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -139,8 +140,7 @@ struct DecodingStorageSchemaSnapshot } } - DecodingStorageSchemaSnapshot(const DecodingStorageSchemaSnapshot &) = delete; - DecodingStorageSchemaSnapshot & operator=(const DecodingStorageSchemaSnapshot &) = delete; + DISALLOW_COPY(DecodingStorageSchemaSnapshot); DecodingStorageSchemaSnapshot(DecodingStorageSchemaSnapshot &&) = default; }; diff --git a/dbms/src/Storages/Transaction/FileEncryption.h b/dbms/src/Storages/Transaction/FileEncryption.h index 03ba1c594d9..85f3ca5ef6d 100644 --- a/dbms/src/Storages/Transaction/FileEncryption.h +++ b/dbms/src/Storages/Transaction/FileEncryption.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -49,15 +50,17 @@ struct FileEncryptionInfo : FileEncryptionInfoRaw } } - FileEncryptionInfo(const FileEncryptionInfoRaw & src) : FileEncryptionInfoRaw(src) {} + FileEncryptionInfo(const FileEncryptionInfoRaw & src) + : FileEncryptionInfoRaw(src) + {} FileEncryptionInfo(const FileEncryptionRes & res_, - const EncryptionMethod & method_, - RawCppStringPtr key_, - RawCppStringPtr iv_, - RawCppStringPtr error_msg_) + const EncryptionMethod & method_, + RawCppStringPtr key_, + RawCppStringPtr iv_, + RawCppStringPtr error_msg_) : FileEncryptionInfoRaw{res_, method_, key_, iv_, error_msg_} {} - FileEncryptionInfo(const FileEncryptionInfo &) = delete; + DISALLOW_COPY(FileEncryptionInfo); FileEncryptionInfo(FileEncryptionInfo && src) { std::memcpy(this, &src, sizeof(src)); @@ -76,6 +79,3 @@ struct FileEncryptionInfo : FileEncryptionInfoRaw #pragma GCC diagnostic pop } // namespace DB - - - diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index 5b4ff4ec02e..58e7f5ad2e5 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -253,7 +254,7 @@ struct CppStrVec { updateView(); } - CppStrVec(const CppStrVec &) = delete; + DISALLOW_COPY(CppStrVec); void updateView(); CppStrVecView intoOuterView() const { return {view.data(), view.size()}; } }; diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index 149113782e8..5d87af94f30 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -67,8 +68,7 @@ struct RawRustPtrWrap; struct RawRustPtrWrap : RawRustPtr { - RawRustPtrWrap(const RawRustPtrWrap &) = delete; - RawRustPtrWrap & operator=(const RawRustPtrWrap &) = delete; + DISALLOW_COPY(RawRustPtrWrap); explicit RawRustPtrWrap(RawRustPtr inner); ~RawRustPtrWrap(); diff --git a/dbms/src/Storages/Transaction/ProxyFFICommon.h b/dbms/src/Storages/Transaction/ProxyFFICommon.h index db667d476a9..8d6fcad56d6 100644 --- a/dbms/src/Storages/Transaction/ProxyFFICommon.h +++ b/dbms/src/Storages/Transaction/ProxyFFICommon.h @@ -14,6 +14,8 @@ #pragma once +#include + #include namespace DB @@ -23,9 +25,13 @@ struct RawCppString : std::string using Base = std::string; using Base::Base; RawCppString() = delete; - RawCppString(Base && src) : Base(std::move(src)) {} - RawCppString(const Base & src) : Base(src) {} - RawCppString(const RawCppString &) = delete; + RawCppString(Base && src) + : Base(std::move(src)) + {} + RawCppString(const Base & src) + : Base(src) + {} + DISALLOW_COPY(RawCppString); template static RawCppString * New(Args &&... _args) diff --git a/dbms/src/Storages/Transaction/ReadIndexWorker.h b/dbms/src/Storages/Transaction/ReadIndexWorker.h index 48ec48aad8b..e26671a2303 100644 --- a/dbms/src/Storages/Transaction/ReadIndexWorker.h +++ b/dbms/src/Storages/Transaction/ReadIndexWorker.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -205,7 +206,7 @@ struct ReadIndexDataNode : MutexLockWrap , start_ts(start_ts_) {} - ReadIndexElement(const ReadIndexElement &) = delete; + DISALLOW_COPY(ReadIndexElement); void doTriggerCallbacks(); diff --git a/dbms/src/Storages/Transaction/RegionTable.h b/dbms/src/Storages/Transaction/RegionTable.h index c624fe1011c..b30a905541a 100644 --- a/dbms/src/Storages/Transaction/RegionTable.h +++ b/dbms/src/Storages/Transaction/RegionTable.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -217,7 +218,7 @@ struct RegionPreDecodeBlockData , schema_version(schema_version_) , data_list_read(std::move(data_list_read_)) {} - RegionPreDecodeBlockData(const RegionPreDecodeBlockData &) = delete; + DISALLOW_COPY(RegionPreDecodeBlockData); void toString(std::stringstream & ss) const { ss << " {"; diff --git a/dbms/src/Storages/Transaction/SSTReader.h b/dbms/src/Storages/Transaction/SSTReader.h index b2f2420e7b3..ee483bcfd2a 100644 --- a/dbms/src/Storages/Transaction/SSTReader.h +++ b/dbms/src/Storages/Transaction/SSTReader.h @@ -14,6 +14,7 @@ #pragma once +#include #include namespace DB @@ -26,8 +27,7 @@ struct SSTReader BaseBuffView value() const; void next(); - SSTReader(const SSTReader &) = delete; - SSTReader(SSTReader &&) = delete; + DISALLOW_COPY_AND_MOVE(SSTReader); SSTReader(const TiFlashRaftProxyHelper * proxy_helper_, SSTView view); ~SSTReader(); diff --git a/dbms/src/Storages/Transaction/TableRowIDMinMax.h b/dbms/src/Storages/Transaction/TableRowIDMinMax.h index fe1f7579863..25c6a1409e4 100644 --- a/dbms/src/Storages/Transaction/TableRowIDMinMax.h +++ b/dbms/src/Storages/Transaction/TableRowIDMinMax.h @@ -14,6 +14,7 @@ #pragma once +#include #include namespace DB @@ -23,13 +24,12 @@ namespace DB struct TableRowIDMinMax { TableRowIDMinMax(const TableID table_id) - : handle_min(RecordKVFormat::genRawKey(table_id, std::numeric_limits::min())), - handle_max(RecordKVFormat::genRawKey(table_id, std::numeric_limits::max())) + : handle_min(RecordKVFormat::genRawKey(table_id, std::numeric_limits::min())) + , handle_max(RecordKVFormat::genRawKey(table_id, std::numeric_limits::max())) {} /// Make this struct can't be copied or moved. - TableRowIDMinMax(const TableRowIDMinMax &) = delete; - TableRowIDMinMax(TableRowIDMinMax &&) = delete; + DISALLOW_COPY_AND_MOVE(TableRowIDMinMax); const DecodedTiKVKey handle_min; const DecodedTiKVKey handle_max; diff --git a/dbms/src/Storages/Transaction/TiKVKeyValue.h b/dbms/src/Storages/Transaction/TiKVKeyValue.h index 80cd8bb3bbd..45db99b03a4 100644 --- a/dbms/src/Storages/Transaction/TiKVKeyValue.h +++ b/dbms/src/Storages/Transaction/TiKVKeyValue.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -46,7 +47,7 @@ struct StringObject : std::string {} static StringObject copyFrom(const Base & str) { return StringObject(str); } - StringObject & operator=(const StringObject & a) = delete; + DISALLOW_COPY(StringObject); StringObject & operator=(StringObject && a) { if (this == &a) @@ -73,7 +74,6 @@ struct StringObject : std::string StringObject(const Base & str_) : Base(str_) {} - StringObject(const StringObject & obj) = delete; size_t size() const = delete; }; From a1110d052b6a8a149751c8950ca058d4ddda2300 Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Sat, 7 May 2022 21:02:32 +0800 Subject: [PATCH 70/79] avoid use MAX_NAMESPACE_ID as kvstore's ns_id (#4839) ref pingcap/tiflash#3594 --- dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp | 2 +- dbms/src/Storages/Page/PageDefines.h | 2 ++ dbms/src/Storages/Transaction/RegionPersister.h | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index d7cd1f6fd95..36063e2bd83 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -1729,7 +1729,7 @@ UInt64 DeltaMergeStore::onSyncGc(Int64 limit) } } if (!finish_gc_on_segment) - LOG_FMT_DEBUG( + LOG_FMT_TRACE( log, "GC is skipped Segment [{}] [range={}] [table={}]", segment_id, diff --git a/dbms/src/Storages/Page/PageDefines.h b/dbms/src/Storages/Page/PageDefines.h index 8cd0e4c325e..46789419fbd 100644 --- a/dbms/src/Storages/Page/PageDefines.h +++ b/dbms/src/Storages/Page/PageDefines.h @@ -48,6 +48,8 @@ static constexpr UInt64 MAX_PERSISTED_LOG_FILES = 4; using NamespaceId = UInt64; static constexpr NamespaceId MAX_NAMESPACE_ID = UINT64_MAX; +// KVStore stores it's data individually, so the actual `ns_id` value doesn't matter(just different from `MAX_NAMESPACE_ID` is enough) +static constexpr NamespaceId KVSTORE_NAMESPACE_ID = 1000000UL; // just a random namespace id for test, the value doesn't matter static constexpr NamespaceId TEST_NAMESPACE_ID = 1000; diff --git a/dbms/src/Storages/Transaction/RegionPersister.h b/dbms/src/Storages/Transaction/RegionPersister.h index feb4353a0d0..9341ded6f76 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.h +++ b/dbms/src/Storages/Transaction/RegionPersister.h @@ -70,8 +70,7 @@ class RegionPersister final : private boost::noncopyable PageStoragePtr page_storage; std::shared_ptr stable_page_storage; - // RegionPersister stores it's data individually, so the `ns_id` value doesn't matter - NamespaceId ns_id = MAX_NAMESPACE_ID; + NamespaceId ns_id = KVSTORE_NAMESPACE_ID; const RegionManager & region_manager; std::mutex mutex; Poco::Logger * log; From d539daad547863450e5ca4eb3c6580eae28e8889 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Sun, 8 May 2022 01:24:32 +0800 Subject: [PATCH 71/79] Fix dump snapshot encryption info not right after rename. (#4840) close pingcap/tiflash#4841 --- .../Storages/Page/V3/LogFile/LogReader.cpp | 2 +- dbms/src/Storages/Page/V3/WAL/WALReader.h | 8 ++- dbms/src/Storages/Page/V3/WALStore.cpp | 10 ++- .../Page/V3/tests/gtest_wal_store.cpp | 64 +++++++++++++------ 4 files changed, 60 insertions(+), 24 deletions(-) diff --git a/dbms/src/Storages/Page/V3/LogFile/LogReader.cpp b/dbms/src/Storages/Page/V3/LogFile/LogReader.cpp index bd3258b6e07..1a87f042a2e 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogReader.cpp +++ b/dbms/src/Storages/Page/V3/LogFile/LogReader.cpp @@ -470,7 +470,7 @@ UInt8 LogReader::readMore(size_t * drop_size) void LogReader::reportCorruption(size_t bytes, const String & reason) { - reportDrop(bytes, "Corruption: " + reason); + reportDrop(bytes, fmt::format("Corruption: {} [offset={}] [file={}]", reason, file->getPositionInFile(), file->getFileName())); } void LogReader::reportDrop(size_t bytes, const String & reason) diff --git a/dbms/src/Storages/Page/V3/WAL/WALReader.h b/dbms/src/Storages/Page/V3/WAL/WALReader.h index 3443cc605c0..e61a53da5de 100644 --- a/dbms/src/Storages/Page/V3/WAL/WALReader.h +++ b/dbms/src/Storages/Page/V3/WAL/WALReader.h @@ -21,6 +21,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int CORRUPTED_DATA; +} + class FileProvider; using FileProviderPtr = std::shared_ptr; @@ -29,10 +34,11 @@ namespace PS::V3 class ReportCollector : public LogReader::Reporter { public: - void corruption(size_t /*bytes*/, const String & /*msg*/) override + void corruption(size_t /*bytes*/, const String & msg) override { error_happened = true; // FIXME: store the reason of corruption + throw Exception(msg, ErrorCodes::CORRUPTED_DATA); } bool hasError() const diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 6585c6dfdfe..6759e80f416 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -201,9 +201,15 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire // Rename it to be a normal log file. const auto temp_fullname = log_filename.fullname(LogFileStage::Temporary); const auto normal_fullname = log_filename.fullname(LogFileStage::Normal); + LOG_FMT_INFO(logger, "Renaming log file to be normal [fullname={}]", temp_fullname); - auto f = Poco::File{temp_fullname}; - f.renameTo(normal_fullname); + // Use `renameFile` from FileProvider that take good care of encryption path + provider->renameFile( + temp_fullname, + EncryptionPath(temp_fullname, ""), + normal_fullname, + EncryptionPath(normal_fullname, ""), + true); LOG_FMT_INFO(logger, "Rename log file to normal done [fullname={}]", normal_fullname); // #define ARCHIVE_COMPACTED_LOGS // keep for debug diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp index 008a311841c..23ee2e93f07 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -220,6 +221,7 @@ class WALStoreTest public: WALStoreTest() : multi_paths(GetParam()) + , log(Logger::get("WALStoreTest")) { } @@ -250,11 +252,11 @@ class WALStoreTest protected: PSDiskDelegatorPtr delegator; WALStore::Config config; + LoggerPtr log; }; TEST_P(WALStoreTest, FindCheckpointFile) { - LoggerPtr log = Logger::get("WALLognameTest"); auto path = getTemporaryPath(); { @@ -547,7 +549,7 @@ try ASSERT_NE(wal, nullptr); std::mt19937 rd; - std::uniform_int_distribution<> d(0, 20); + std::uniform_int_distribution<> d_20(0, 20); // Stage 2. insert many edits constexpr size_t num_edits_test = 100000; @@ -559,7 +561,7 @@ try { PageEntryV3 entry{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageEntriesEdit edit; - const size_t num_pages_put = d(rd); + const size_t num_pages_put = d_20(rd); for (size_t p = 0; p < num_pages_put; ++p) { page_id += 1; @@ -595,23 +597,45 @@ try LOG_FMT_INFO(&Poco::Logger::get("WALStoreTest"), "Done test for {} persist pages in {} edits", num_pages_read, num_edits_test); - // Stage 3. compact logs and verify - // wal->compactLogs(); - // wal.reset(); - - // // After logs compacted, they should be written as one edit. - // num_edits_read = 0; - // num_pages_read = 0; - // wal = WALStore::create( - // [&](PageEntriesEdit && edit) { - // num_pages_read += edit.size(); - // EXPECT_EQ(page_id, edit.size()) << fmt::format("at idx={}", num_edits_read); - // num_edits_read += 1; - // }, - // provider, - // delegator); - // EXPECT_EQ(num_edits_read, 1); - // EXPECT_EQ(num_pages_read, page_id); + // Test for save snapshot (with encryption) + auto enc_key_manager = std::make_shared(/*encryption_enabled_=*/true); + auto enc_provider = std::make_shared(enc_key_manager, true); + LogFilenameSet persisted_log_files = WALStoreReader::listAllFiles(delegator, log); + WALStore::FilesSnapshot file_snap{.current_writting_log_num = 100, // just a fake value + .persisted_log_files = persisted_log_files}; + + PageEntriesEdit snap_edit; + PageEntryV3 entry{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + std::uniform_int_distribution<> d_10000(0, 10000); + // just fill in some random entry + for (size_t i = 0; i < 70; ++i) + { + snap_edit.varEntry(d_10000(rd), PageVersionType(345, 22), entry, 1); + } + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), enc_provider, delegator, config); + bool done = wal->saveSnapshot(std::move(file_snap), std::move(snap_edit)); + ASSERT_TRUE(done); + wal.reset(); + reader.reset(); + + // After logs compacted, they should be written as one edit. + num_edits_read = 0; + num_pages_read = 0; + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), enc_provider, delegator, config); + while (reader->remained()) + { + auto [ok, edit] = reader->next(); + if (!ok) + { + reader->throwIfError(); + // else it just run to the end of file. + break; + } + num_pages_read += edit.size(); + num_edits_read += 1; + } + EXPECT_EQ(num_edits_read, 1); + EXPECT_EQ(num_pages_read, 70); } CATCH From 1e1bf1f1fd315edf88552b1092b606ee8686d58c Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Mon, 9 May 2022 15:26:33 +0800 Subject: [PATCH 72/79] Revert "Fix getMaxId won't get the right max id before GC" (#4845) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/PageDirectory.cpp | 29 ++---------- .../Page/V3/tests/gtest_page_directory.cpp | 47 ------------------- 2 files changed, 4 insertions(+), 72 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index a87b2310d63..aef4e9e1922 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -889,31 +889,10 @@ PageId PageDirectory::getMaxId(NamespaceId ns_id) const // iter is not at the beginning and mvcc_table_directory is not empty, // so iter-- must be a valid iterator, and it's the largest page id which is smaller than the target page id. iter--; - - do - { - // Can't find any entries in current ns_id - if (iter->first.high != ns_id) - { - break; - } - - // Find the last valid one - if (iter->second->getEntry(UINT64_MAX - 1) != std::nullopt) - { - return iter->first.low; - } - - // Current entry is deleted and there are no entries before it. - if (iter == mvcc_table_directory.begin()) - { - break; - } - - iter--; - } while (true); - - return 0; + if (iter->first.high == ns_id) + return iter->first.low; + else + return 0; } } diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index a308c11e3f5..ad00c47c097 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -2054,53 +2054,6 @@ try ASSERT_EQ(dir->getMaxId(medium), 320); ASSERT_EQ(dir->getMaxId(large), 2); } - - { - PageEntriesEdit edit; - edit.del(buildV3Id(medium, 320)); - dir->apply(std::move(edit)); - ASSERT_EQ(dir->getMaxId(medium), 300); - } - - { - PageEntriesEdit edit; - edit.del(buildV3Id(medium, 300)); - dir->apply(std::move(edit)); - ASSERT_EQ(dir->getMaxId(medium), 0); - } -} -CATCH - -TEST_F(PageDirectoryTest, GetMaxIdAfterDelete) -try -{ - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - { - PageEntriesEdit edit; - edit.put(1, entry1); - edit.put(2, entry2); - dir->apply(std::move(edit)); - } - - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 2); - - { - PageEntriesEdit edit; - edit.del(2); - dir->apply(std::move(edit)); - } - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 1); - - { - PageEntriesEdit edit; - edit.del(1); - dir->apply(std::move(edit)); - } - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); - - dir->gcInMemEntries(); - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); } CATCH From 8781b0fcb3394ebd7817fd1ddab713021076d242 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Mon, 9 May 2022 20:12:34 +0800 Subject: [PATCH 73/79] Add a metric to statistics expansion or reused. (#4619) ref pingcap/tiflash#3594 --- dbms/src/Common/ProfileEvents.cpp | 2 + dbms/src/Storages/Page/V3/BlobStore.cpp | 6 +- dbms/src/Storages/Page/V3/spacemap/SpaceMap.h | 7 +- .../Page/V3/spacemap/SpaceMapRBTree.cpp | 33 +++-- .../Page/V3/spacemap/SpaceMapRBTree.h | 2 +- .../Page/V3/spacemap/SpaceMapSTDMap.h | 25 ++-- .../Storages/Page/V3/tests/gtest_free_map.cpp | 46 ++++++- metrics/grafana/tiflash_summary.json | 121 ++++++++++++++++++ 8 files changed, 214 insertions(+), 28 deletions(-) diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index 1b9b62dd2c6..0ec1ce438a6 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -110,6 +110,8 @@ \ M(PSMWritePages) \ M(PSMWriteIOCalls) \ + M(PSV3MBlobExpansion) \ + M(PSV3MBlobReused) \ M(PSMWriteBytes) \ M(PSMBackgroundWriteBytes) \ M(PSMReadPages) \ diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index b95a4521af7..2c568d8e85b 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -36,6 +36,8 @@ namespace ProfileEvents { extern const Event PSMWritePages; extern const Event PSMReadPages; +extern const Event PSV3MBlobExpansion; +extern const Event PSV3MBlobReused; } // namespace ProfileEvents namespace DB @@ -1243,8 +1245,10 @@ BlobFileOffset BlobStore::BlobStats::BlobStat::getPosFromStat(size_t buf_size, c { BlobFileOffset offset = 0; UInt64 max_cap = 0; + bool expansion = true; - std::tie(offset, max_cap) = smap->searchInsertOffset(buf_size); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(buf_size); + ProfileEvents::increment(expansion ? ProfileEvents::PSV3MBlobExpansion : ProfileEvents::PSV3MBlobReused); /** * Whatever `searchInsertOffset` success or failed, diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h index e4af33c5a81..4a0c035cd3f 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h @@ -86,10 +86,11 @@ class SpaceMap * It will mark that span to be used and also return a hint of the max capacity available in this SpaceMap. * * return value is : - * insert_offset : start offset for the inserted space - * max_cap : A hint of the largest available space this SpaceMap can hold. + * insert_offset: start offset for the inserted space + * max_cap: A hint of the largest available space this SpaceMap can hold. + * is_expansion: Whether it is an expansion span */ - virtual std::pair searchInsertOffset(size_t size) = 0; + virtual std::tuple searchInsertOffset(size_t size) = 0; /** * Get the offset of the last free block. `[margin_offset, +∞)` is not used at all. diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp index 3b4c6a28099..54275574060 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp @@ -304,7 +304,7 @@ static bool rb_remove_entry(UInt64 start, UInt64 count, struct RbPrivate * priva // Root node have not been init if (private_data->root.rb_node == nullptr) { - assert(false); + LOG_ERROR(log, "Current spacemap is invalid."); } while (*n) @@ -500,7 +500,7 @@ bool RBTreeSpaceMap::isMarkUnused(UInt64 offset, size_t length) if (length == 0 || rb_tree->root.rb_node == nullptr) { - assert(0); + LOG_ERROR(log, "Current spacemap is invalid."); } while (*n) @@ -543,12 +543,12 @@ bool RBTreeSpaceMap::isMarkUnused(UInt64 offset, size_t length) return retval; } -std::pair RBTreeSpaceMap::searchInsertOffset(size_t size) +std::tuple RBTreeSpaceMap::searchInsertOffset(size_t size) { - UInt64 offset = UINT64_MAX; + UInt64 offset = UINT64_MAX, last_offset = UINT64_MAX; UInt64 max_cap = 0; - struct rb_node * node = nullptr; - struct SmapRbEntry * entry; + struct rb_node *node = nullptr, *last_node = nullptr; + struct SmapRbEntry *entry, *last_entry; UInt64 scan_biggest_cap = 0; UInt64 scan_biggest_offset = 0; @@ -558,7 +558,18 @@ std::pair RBTreeSpaceMap::searchInsertOffset(size_t size) { LOG_ERROR(log, "Current spacemap is full."); biggest_cap = 0; - return std::make_pair(offset, biggest_cap); + return std::make_tuple(offset, biggest_cap, false); + } + + last_node = rb_tree_last(&rb_tree->root); + if (last_node != nullptr) + { + last_entry = node_to_entry(last_node); + last_offset = (last_entry->start + last_entry->count == end) ? last_entry->start : UINT64_MAX; + } + else + { + LOG_ERROR(log, "Current spacemap is invalid."); } for (; node != nullptr; node = rb_tree_next(node)) @@ -592,7 +603,7 @@ std::pair RBTreeSpaceMap::searchInsertOffset(size_t size) biggest_range = scan_biggest_offset; biggest_cap = scan_biggest_cap; - return std::make_pair(offset, biggest_cap); + return std::make_tuple(offset, biggest_cap, false); } // Update return start @@ -614,7 +625,7 @@ std::pair RBTreeSpaceMap::searchInsertOffset(size_t size) rb_node_remove(node, &rb_tree->root); rb_free_entry(rb_tree, entry); max_cap = biggest_cap; - return std::make_pair(offset, max_cap); + return std::make_tuple(offset, max_cap, offset == last_offset); } } else // must be entry->count > size @@ -637,7 +648,7 @@ std::pair RBTreeSpaceMap::searchInsertOffset(size_t size) else // It not champion, just return { max_cap = biggest_cap; - return std::make_pair(offset, max_cap); + return std::make_tuple(offset, max_cap, offset == last_offset); } } @@ -653,7 +664,7 @@ std::pair RBTreeSpaceMap::searchInsertOffset(size_t size) biggest_range = scan_biggest_offset; biggest_cap = scan_biggest_cap; max_cap = biggest_cap; - return std::make_pair(offset, max_cap); + return std::make_tuple(offset, max_cap, offset == last_offset); } UInt64 RBTreeSpaceMap::updateAccurateMaxCapacity() diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h index 8c53724be7d..0393fda081b 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h @@ -40,7 +40,7 @@ class RBTreeSpaceMap static std::shared_ptr create(UInt64, UInt64 end); - std::pair searchInsertOffset(size_t size) override; + std::tuple searchInsertOffset(size_t size) override; UInt64 updateAccurateMaxCapacity() override; diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index b691d0b1d81..92c08deb555 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -181,9 +181,9 @@ class STDMapSpaceMap return true; } - std::pair searchInsertOffset(size_t size) override + std::tuple searchInsertOffset(size_t size) override { - UInt64 offset = UINT64_MAX; + UInt64 offset = UINT64_MAX, last_offset = UINT64_MAX; UInt64 max_cap = 0; // The biggest free block capacity and its start offset UInt64 scan_biggest_cap = 0; @@ -193,9 +193,12 @@ class STDMapSpaceMap { LOG_FMT_ERROR(log, "Current space map is full"); hint_biggest_cap = 0; - return std::make_pair(offset, hint_biggest_cap); + return std::make_tuple(offset, hint_biggest_cap, false); } + auto r_it = free_map.rbegin(); + last_offset = (r_it->first + r_it->second == end) ? r_it->first : UINT64_MAX; + auto it = free_map.begin(); for (; it != free_map.end(); it++) { @@ -214,11 +217,17 @@ class STDMapSpaceMap // No enough space for insert if (it == free_map.end()) { - LOG_FMT_ERROR(log, "Not sure why can't found any place to insert. [size={}] [old biggest_offset={}] [old biggest_cap={}] [new biggest_offset={}] [new biggest_cap={}]", size, hint_biggest_offset, hint_biggest_cap, scan_biggest_offset, scan_biggest_cap); + LOG_FMT_ERROR(log, "Not sure why can't found any place to insert." + "[size={}] [old biggest_offset={}] [old biggest_cap={}] [new biggest_offset={}] [new biggest_cap={}]", // + size, + hint_biggest_offset, + hint_biggest_cap, + scan_biggest_offset, + scan_biggest_cap); hint_biggest_offset = scan_biggest_offset; hint_biggest_cap = scan_biggest_cap; - return std::make_pair(offset, hint_biggest_cap); + return std::make_tuple(offset, hint_biggest_cap, false); } // Update return start @@ -231,7 +240,7 @@ class STDMapSpaceMap { free_map.erase(it); max_cap = hint_biggest_cap; - return std::make_pair(offset, max_cap); + return std::make_tuple(offset, max_cap, last_offset == offset); } // It is champion, need to update `scan_biggest_cap`, `scan_biggest_offset` @@ -251,7 +260,7 @@ class STDMapSpaceMap if (k - size != hint_biggest_offset) { max_cap = hint_biggest_cap; - return std::make_pair(offset, max_cap); + return std::make_tuple(offset, max_cap, last_offset == offset); } // It is champion, need to update `scan_biggest_cap`, `scan_biggest_offset` @@ -274,7 +283,7 @@ class STDMapSpaceMap hint_biggest_offset = scan_biggest_offset; hint_biggest_cap = scan_biggest_cap; - return std::make_pair(offset, hint_biggest_cap); + return std::make_tuple(offset, hint_biggest_cap, last_offset == offset); } UInt64 updateAccurateMaxCapacity() override diff --git a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp index 85a94ec0ac3..f7120f000b2 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp @@ -283,14 +283,18 @@ TEST_P(SpaceMapTest, TestSearch) auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); UInt64 offset; UInt64 max_cap; + bool expansion = true; + Range ranges[] = {{.start = 0, .end = 100}}; ASSERT_TRUE(smap->check(genChecker(ranges, 1), 1)); ASSERT_TRUE(smap->markUsed(50, 10)); - std::tie(offset, max_cap) = smap->searchInsertOffset(20); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(20); + ASSERT_EQ(offset, 0); ASSERT_EQ(max_cap, 40); + ASSERT_EQ(expansion, false); Range ranges1[] = {{.start = 20, .end = 50}, @@ -304,9 +308,10 @@ TEST_P(SpaceMapTest, TestSearch) smap = SpaceMap::createSpaceMap(test_type, 0, 100); ASSERT_TRUE(smap->markUsed(50, 10)); - std::tie(offset, max_cap) = smap->searchInsertOffset(5); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(5); ASSERT_EQ(offset, 0); ASSERT_EQ(max_cap, 45); + ASSERT_EQ(expansion, false); Range ranges2[] = {{.start = 5, .end = 50}, @@ -317,9 +322,10 @@ TEST_P(SpaceMapTest, TestSearch) // Test margin smap = SpaceMap::createSpaceMap(test_type, 0, 100); ASSERT_TRUE(smap->markUsed(50, 10)); - std::tie(offset, max_cap) = smap->searchInsertOffset(50); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(50); ASSERT_EQ(offset, 0); ASSERT_EQ(max_cap, 40); + ASSERT_EQ(expansion, false); Range ranges3[] = {{.start = 60, .end = 100}}; @@ -328,9 +334,10 @@ TEST_P(SpaceMapTest, TestSearch) // Test invalid Size smap = SpaceMap::createSpaceMap(test_type, 0, 100); ASSERT_TRUE(smap->markUsed(50, 10)); - std::tie(offset, max_cap) = smap->searchInsertOffset(100); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(100); ASSERT_EQ(offset, UINT64_MAX); ASSERT_EQ(max_cap, 50); + ASSERT_EQ(expansion, false); // No changed Range ranges4[] = {{.start = 0, @@ -338,6 +345,37 @@ TEST_P(SpaceMapTest, TestSearch) {.start = 60, .end = 100}}; ASSERT_TRUE(smap->check(genChecker(ranges4, 2), 2)); + + // Test expansion + smap = SpaceMap::createSpaceMap(test_type, 0, 100); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(10); + ASSERT_EQ(offset, 0); + ASSERT_EQ(max_cap, 90); + ASSERT_EQ(expansion, true); + + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(10); + ASSERT_EQ(offset, 10); + ASSERT_EQ(max_cap, 80); + ASSERT_EQ(expansion, true); +} + +TEST_P(SpaceMapTest, TestSearchIsExpansion) +{ + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + UInt64 offset; + UInt64 max_cap; + bool expansion = true; + + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(20); + ASSERT_EQ(offset, 0); + ASSERT_EQ(max_cap, 80); + ASSERT_EQ(expansion, true); + + ASSERT_TRUE(smap->markUsed(90, 10)); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(20); + ASSERT_EQ(expansion, false); + std::tie(offset, max_cap, expansion) = smap->searchInsertOffset(20); + ASSERT_EQ(expansion, false); } diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 364216a28db..a1c75e7c04e 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -5157,6 +5157,127 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The states of BlobStore (an internal component of storage engine)", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 69 + }, + "hiddenSeries": false, + "id": 85, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "BlobAllocated", + "yaxis": 1 + }, + { + "alias": "BlobReusedRate", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_PSV3MBlobExpansion{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]) + rate(tiflash_system_profile_event_PSV3MBlobReused{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "BlobAllocated", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_PSV3MBlobReused{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]) / (rate(tiflash_system_profile_event_PSV3MBlobExpansion{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]) + rate(tiflash_system_profile_event_PSV3MBlobReused{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "BlobReusedRate", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "The BlobStore Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, From 0e2b7f7cb1b4bec6df21a7b8469ca8929c54e4c3 Mon Sep 17 00:00:00 2001 From: hehechen Date: Tue, 10 May 2022 11:26:34 +0800 Subject: [PATCH 74/79] MinMax Index Supports Nullable DataType (#4792) close pingcap/tiflash#4787 --- dbms/src/DataTypes/IDataType.h | 1 - .../Storages/DeltaMerge/File/DMFileWriter.cpp | 5 +- .../Storages/DeltaMerge/Index/MinMaxIndex.cpp | 229 ++++++++++++++++-- .../Storages/DeltaMerge/Index/MinMaxIndex.h | 5 +- .../tests/gtest_dm_minmax_index.cpp | 82 ++++++- 5 files changed, 284 insertions(+), 38 deletions(-) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 120d0b1ba30..71fda0615e4 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -471,7 +471,6 @@ class IDataType : private boost::noncopyable virtual bool isEnum() const { return false; }; virtual bool isNullable() const { return false; } - /** Is this type can represent only NULL value? (It also implies isNullable) */ virtual bool onlyNull() const { return false; } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp index 3bff05ef19f..424e3f1b0c1 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp @@ -72,10 +72,9 @@ DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, for (auto & cd : write_columns) { // TODO: currently we only generate index for Integers, Date, DateTime types, and this should be configurable by user. - // TODO: If column type is nullable, we won't generate index for it /// for handle column always generate index - bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || cd.type->isInteger() || cd.type->isDateOrDateTime(); - + auto type = removeNullable(cd.type); + bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || type->isInteger() || type->isDateOrDateTime(); if (options.flags.isSingleFile()) { if (do_index) diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 21c53647cfc..ef42a036e1a 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -61,7 +61,6 @@ inline std::pair minmax(const IColumn & column, const ColumnVect void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * del_mark) { - const IColumn * column_ptr = &column; auto size = column.size(); bool has_null = false; if (column.isColumnNullable()) @@ -70,7 +69,6 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de const auto & nullable_column = static_cast(column); const auto & null_mark_data = nullable_column.getNullMapColumn().getData(); - column_ptr = &nullable_column.getNestedColumn(); for (size_t i = 0; i < size; ++i) { @@ -82,14 +80,13 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de } } - const IColumn & updated_column = *column_ptr; - auto [min_index, max_index] = details::minmax(updated_column, del_mark, 0, updated_column.size()); + auto [min_index, max_index] = details::minmax(column, del_mark, 0, column.size()); if (min_index != NONE_EXIST) { has_null_marks->push_back(has_null); has_value_marks->push_back(1); - minmaxes->insertFrom(updated_column, min_index); - minmaxes->insertFrom(updated_column, max_index); + minmaxes->insertFrom(column, min_index); + minmaxes->insertFrom(column, max_index); } else { @@ -158,6 +155,64 @@ std::pair MinMaxIndex::getUInt64MinMax(size_t pack_index) return {minmaxes->get64(pack_index * 2), minmaxes->get64(pack_index * 2 + 1)}; } +RSResult MinMaxIndex::checkNullableEqual(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const ColumnNullable & column_nullable = static_cast(*minmaxes); + + const auto * raw_type = type.get(); + + // if minmaxes_data has null value, the value of minmaxes_data[i] is meaningless and maybe just some random value. + // But in checkEqual, we have checked the has_null_marks and ensured that there is no null value in MinMax Indexes. +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkEqual(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + return RoughCheck::checkEqual(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const DataTypePtr & type) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -166,6 +221,10 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D return RSResult::None; const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableEqual(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ @@ -178,14 +237,14 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D #undef DISPATCH if (typeid_cast(raw_type)) { - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkEqual(value, type, min, max); } if (typeid_cast(raw_type)) { - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkEqual(value, type, min, max); @@ -194,16 +253,16 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D { // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. // Check `struct MyTimeBase` for more details. - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkEqual(value, type, min, max); } if (typeid_cast(raw_type)) { - auto * string_column = checkAndGetColumn(minmaxes.get()); - auto & chars = string_column->getChars(); - auto & offsets = string_column->getOffsets(); + const auto * string_column = checkAndGetColumn(minmaxes.get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); size_t pos = pack_index * 2; size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; // todo use StringRef instead of String @@ -215,6 +274,62 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D } return RSResult::Some; } + +RSResult MinMaxIndex::checkNullableGreater(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const ColumnNullable & column_nullable = static_cast(*minmaxes); + const auto * raw_type = type.get(); + +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkGreater(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + return RoughCheck::checkGreater(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const DataTypePtr & type, int /*nan_direction_hint*/) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -223,6 +338,10 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const return RSResult::None; const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableGreater(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ @@ -235,14 +354,14 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const #undef DISPATCH if (typeid_cast(raw_type)) { - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkGreater(value, type, min, max); } if (typeid_cast(raw_type)) { - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkGreater(value, type, min, max); @@ -251,16 +370,16 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const { // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. // Check `struct MyTimeBase` for more details. - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkGreater(value, type, min, max); } if (typeid_cast(raw_type)) { - auto * string_column = checkAndGetColumn(minmaxes.get()); - auto & chars = string_column->getChars(); - auto & offsets = string_column->getOffsets(); + const auto * string_column = checkAndGetColumn(minmaxes.get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); size_t pos = pack_index * 2; size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; // todo use StringRef instead of String @@ -272,6 +391,62 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const } return RSResult::Some; } + +RSResult MinMaxIndex::checkNullableGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const ColumnNullable & column_nullable = static_cast(*minmaxes); + + const auto * raw_type = type.get(); +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkGreaterEqual(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type, int /*nan_direction_hint*/) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -280,6 +455,10 @@ RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, return RSResult::None; const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableGreaterEqual(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ @@ -292,14 +471,14 @@ RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, #undef DISPATCH if (typeid_cast(raw_type)) { - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkGreaterEqual(value, type, min, max); } if (typeid_cast(raw_type)) { - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkGreaterEqual(value, type, min, max); @@ -308,16 +487,16 @@ RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, { // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. // Check `struct MyTimeBase` for more details. - auto & minmaxes_data = toColumnVectorData(minmaxes); + const auto & minmaxes_data = toColumnVectorData(minmaxes); auto min = minmaxes_data[pack_index * 2]; auto max = minmaxes_data[pack_index * 2 + 1]; return RoughCheck::checkGreaterEqual(value, type, min, max); } if (typeid_cast(raw_type)) { - auto * string_column = checkAndGetColumn(minmaxes.get()); - auto & chars = string_column->getChars(); - auto & offsets = string_column->getOffsets(); + const auto * string_column = checkAndGetColumn(minmaxes.get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); size_t pos = pack_index * 2; size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; // todo use StringRef instead of String @@ -330,7 +509,7 @@ RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, return RSResult::Some; } -String MinMaxIndex::toString() const +String MinMaxIndex::toString() { return ""; } diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h index 34e69b056ce..73284333c73 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h @@ -80,7 +80,10 @@ class MinMaxIndex RSResult checkGreater(size_t pack_index, const Field & value, const DataTypePtr & type, int nan_direction); RSResult checkGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type, int nan_direction); - String toString() const; + static String toString(); + RSResult checkNullableEqual(size_t pack_index, const Field & value, const DataTypePtr & type); + RSResult checkNullableGreater(size_t pack_index, const Field & value, const DataTypePtr & type); + RSResult checkNullableGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type); }; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 31fd99faf01..460d42828d5 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -214,14 +214,6 @@ try ASSERT_EQ(true, checkMatch(case_name, *context, "MyDateTime", "2020-09-27", createLessEqual(attr("MyDateTime"), parseMyDateTime("2020-09-27"), 0))); ASSERT_EQ(false, checkMatch(case_name, *context, "MyDateTime", "2020-09-27", createLessEqual(attr("MyDateTime"), parseMyDateTime("2020-09-26"), 0))); - /// Currently we don't do filtering for null values. i.e. if a pack contains any null values, then the pack will pass the filter. - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); - ASSERT_EQ(false, checkDelMatch(case_name, *context, "Int64", "100", createEqual(attr("Int64"), Field((Int64)100)))); ASSERT_EQ(true, checkPkMatch(case_name, *context, "Int64", "100", createEqual(pkAttr(), Field((Int64)100)), true)); ASSERT_EQ(true, checkPkMatch(case_name, *context, "Int64", "100", createGreater(pkAttr(), Field((Int64)99), 0), true)); @@ -236,6 +228,80 @@ try } CATCH +TEST_F(DMMinMaxIndexTest, NullableToNullable) +try +{ + const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + // clang-format off + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createEqual(attr("Nullable(Int64)"), Field((Int64)100)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createIn(attr("Nullable(Int64)"), {Field((Int64)100)}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreater(attr("Nullable(Int64)"), Field((Int64)99), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLess(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLessEqual(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createEqual(attr("Nullable(Date)"), Field((String) "2020-09-27")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createEqual(attr("Nullable(Date)"), Field((String) "2020-09-28")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createIn(attr("Nullable(Date)"), {Field((String) "2020-09-27")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createIn(attr("Nullable(Date)"), {Field((String) "2020-09-28")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreater(attr("Nullable(Date)"), Field((String) "2020-09-26"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreater(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreaterEqual(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreaterEqual(attr("Nullable(Date)"), Field((String) "2020-09-28"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLess(attr("Nullable(Date)"), Field((String) "2020-09-28"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLess(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLessEqual(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLessEqual(attr("Nullable(Date)"), Field((String) "2020-09-26"), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createIn(attr("Nullable(DateTime)"), {Field((String) "2020-01-01 05:00:01")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createIn(attr("Nullable(DateTime)"), {Field((String) "2020-01-01 05:00:02")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreater(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:00"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreater(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreaterEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreaterEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLess(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLess(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLessEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLessEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:00"), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createIn(attr("Nullable(MyDateTime)"), {parseMyDateTime("2020-09-27")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createIn(attr("Nullable(MyDateTime)"), {parseMyDateTime("2020-09-28")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreater(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-26"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreater(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreaterEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreaterEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLess(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLess(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLessEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLessEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-26"), 0))); + + // has null + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); +} +CATCH + TEST_F(DMMinMaxIndexTest, Logical) try { From f601de130d9f74141f654c2f63813d0aae1830d4 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 10 May 2022 17:58:34 +0800 Subject: [PATCH 75/79] Interpreter: Run interpreter test without exchange executors (#4788) ref pingcap/tiflash#4609 --- .../MockTableScanBlockInputStream.cpp | 4 +- .../MockTableScanBlockInputStream.h | 2 +- dbms/src/Flash/Coprocessor/DAGContext.h | 27 ++ .../Coprocessor/DAGQueryBlockInterpreter.cpp | 28 +- .../Coprocessor/DAGQueryBlockInterpreter.h | 2 + .../Flash/Coprocessor/GenSchemaAndColumn.cpp | 56 ++++ .../Flash/Coprocessor/GenSchemaAndColumn.h | 26 ++ dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 5 +- dbms/src/Flash/Coprocessor/TiDBTableScan.h | 2 - dbms/src/Flash/tests/gtest_interpreter.cpp | 257 ++++++++++++++++++ dbms/src/TestUtils/InterpreterTestUtils.cpp | 35 ++- dbms/src/TestUtils/InterpreterTestUtils.h | 15 +- dbms/src/TestUtils/mockExecutor.cpp | 2 + .../TestUtils/tests/gtest_mock_executors.cpp | 13 +- .../tests/gtest_window_functions.cpp | 2 +- 15 files changed, 445 insertions(+), 31 deletions(-) rename dbms/src/{TestUtils => DataStreams}/MockTableScanBlockInputStream.cpp (95%) rename dbms/src/{TestUtils => DataStreams}/MockTableScanBlockInputStream.h (95%) create mode 100644 dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp create mode 100644 dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h create mode 100644 dbms/src/Flash/tests/gtest_interpreter.cpp diff --git a/dbms/src/TestUtils/MockTableScanBlockInputStream.cpp b/dbms/src/DataStreams/MockTableScanBlockInputStream.cpp similarity index 95% rename from dbms/src/TestUtils/MockTableScanBlockInputStream.cpp rename to dbms/src/DataStreams/MockTableScanBlockInputStream.cpp index 316c7487a63..0405e8082db 100644 --- a/dbms/src/TestUtils/MockTableScanBlockInputStream.cpp +++ b/dbms/src/DataStreams/MockTableScanBlockInputStream.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include namespace DB { @@ -32,7 +32,7 @@ MockTableScanBlockInputStream::MockTableScanBlockInputStream(ColumnsWithTypeAndN } } -ColumnPtr MockTableScanBlockInputStream::makeColumn(ColumnWithTypeAndName elem) +ColumnPtr MockTableScanBlockInputStream::makeColumn(ColumnWithTypeAndName elem) const { auto column = elem.type->createColumn(); size_t row_count = 0; diff --git a/dbms/src/TestUtils/MockTableScanBlockInputStream.h b/dbms/src/DataStreams/MockTableScanBlockInputStream.h similarity index 95% rename from dbms/src/TestUtils/MockTableScanBlockInputStream.h rename to dbms/src/DataStreams/MockTableScanBlockInputStream.h index d148d7f3ac1..624afc195ee 100644 --- a/dbms/src/TestUtils/MockTableScanBlockInputStream.h +++ b/dbms/src/DataStreams/MockTableScanBlockInputStream.h @@ -34,7 +34,7 @@ class MockTableScanBlockInputStream : public IProfilingBlockInputStream protected: Block readImpl() override; - ColumnPtr makeColumn(ColumnWithTypeAndName elem); + ColumnPtr makeColumn(ColumnWithTypeAndName elem) const; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 18ad73ec207..d031ff103ff 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -167,8 +167,30 @@ class DAGContext , max_recorded_error_count(max_error_count_) , warnings(max_recorded_error_count) , warning_count(0) + , is_test(true) {} + // for tests need to run query tasks. + explicit DAGContext(const tipb::DAGRequest & dag_request_, String log_identifier, size_t concurrency) + : dag_request(&dag_request_) + , initialize_concurrency(concurrency) + , is_mpp_task(false) + , is_root_mpp_task(false) + , tunnel_set(nullptr) + , log(Logger::get(log_identifier)) + , flags(dag_request->flags()) + , sql_mode(dag_request->sql_mode()) + , max_recorded_error_count(getMaxErrorCount(*dag_request)) + , warnings(max_recorded_error_count) + , warning_count(0) + , is_test(true) + { + assert(dag_request->has_root_executor() || dag_request->executors_size() > 0); + return_executor_id = dag_request->root_executor().has_executor_id() || dag_request->executors(0).has_executor_id(); + + initOutputInfo(); + } + void attachBlockIO(const BlockIO & io_); std::unordered_map & getProfileStreamsMap(); @@ -275,6 +297,8 @@ class DAGContext return sql_mode & f; } + bool isTest() const { return is_test; } + void cancelAllExchangeReceiver(); void initExchangeReceiverIfMPP(Context & context, size_t max_streams); @@ -287,6 +311,7 @@ class DAGContext const tipb::DAGRequest * dag_request; Int64 compile_time_ns = 0; size_t final_concurrency = 1; + size_t initialize_concurrency = 1; bool has_read_wait_index = false; Clock::time_point read_wait_index_start_timestamp{Clock::duration::zero()}; Clock::time_point read_wait_index_end_timestamp{Clock::duration::zero()}; @@ -345,6 +370,8 @@ class DAGContext /// vector of SubqueriesForSets(such as join build subquery). /// The order of the vector is also the order of the subquery. std::vector subqueries; + + bool is_test = false; /// switch for test, do not use it in production. }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 45a3c1e9471..c11c5bd75a2 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +48,7 @@ #include #include #include +#include #include namespace DB @@ -96,7 +99,8 @@ AnalysisResult analyzeExpressions( AnalysisResult res; ExpressionActionsChain chain; // selection on table scan had been executed in handleTableScan - if (query_block.selection && !query_block.isTableScanSource()) + // In test mode, filter is not pushed down to table scan + if (query_block.selection && (!query_block.isTableScanSource() || context.getDAGContext()->isTest())) { std::vector where_conditions; for (const auto & c : query_block.selection->selection().conditions()) @@ -153,6 +157,19 @@ AnalysisResult analyzeExpressions( } } // namespace +// for tests, we need to mock tableScan blockInputStream as the source stream. +void DAGQueryBlockInterpreter::handleMockTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline) +{ + auto names_and_types = genNamesAndTypes(table_scan); + auto columns_with_type_and_name = getColumnWithTypeAndName(names_and_types); + analyzer = std::make_unique(std::move(names_and_types), context); + for (size_t i = 0; i < max_streams; ++i) + { + auto mock_table_scan_stream = std::make_shared(columns_with_type_and_name, context.getSettingsRef().max_block_size); + pipeline.streams.emplace_back(mock_table_scan_stream); + } +} + void DAGQueryBlockInterpreter::handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline) { const auto push_down_filter = PushDownFilter::toPushDownFilter(query_block.selection); @@ -752,7 +769,10 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) else if (query_block.isTableScanSource()) { TiDBTableScan table_scan(query_block.source, query_block.source_name, dagContext()); - handleTableScan(table_scan, pipeline); + if (dagContext().isTest()) + handleMockTableScan(table_scan, pipeline); + else + handleTableScan(table_scan, pipeline); dagContext().table_scan_executor_id = query_block.source_name; } else if (query_block.source->tp() == tipb::ExecType::TypeWindow) @@ -799,14 +819,12 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) // execute aggregation executeAggregation(pipeline, res.before_aggregation, res.aggregation_keys, res.aggregation_collators, res.aggregate_descriptions, res.is_final_agg); } - if (res.before_having) { // execute having executeWhere(pipeline, res.before_having, res.having_column_name); recordProfileStreams(pipeline, query_block.having_name); } - if (res.before_order_and_select) { executeExpression(pipeline, res.before_order_and_select); @@ -821,14 +839,12 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) // execute final project action executeProject(pipeline, final_project); - // execute limit if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::TypeLimit) { executeLimit(pipeline); recordProfileStreams(pipeline, query_block.limit_or_topn_name); } - restorePipelineConcurrency(pipeline); // execute exchange_sender diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 84253afbc45..f5a8d2b5ce5 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,7 @@ class DAGQueryBlockInterpreter private: #endif void executeImpl(DAGPipeline & pipeline); + void handleMockTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); void handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); void handleJoin(const tipb::Join & join, DAGPipeline & pipeline, SubqueryForSet & right_query); void prepareJoin( diff --git a/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp new file mode 100644 index 00000000000..e7964021709 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp @@ -0,0 +1,56 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include + +namespace DB +{ +NamesAndTypes genNamesAndTypes(const TiDBTableScan & table_scan) +{ + NamesAndTypes names_and_types; + names_and_types.reserve(table_scan.getColumnSize()); + for (Int32 i = 0; i < table_scan.getColumnSize(); ++i) + { + TiDB::ColumnInfo column_info; + const auto & ci = table_scan.getColumns()[i]; + column_info.tp = static_cast(ci.tp()); + column_info.id = ci.column_id(); + + switch (column_info.id) + { + case TiDBPkColumnID: + // TODO: need to check if the type of pk_handle_columns matches the type that used in delta merge tree. + names_and_types.emplace_back(MutableSupport::tidb_pk_column_name, getDataTypeByColumnInfoForComputingLayer(column_info)); + break; + case ExtraTableIDColumnID: + names_and_types.emplace_back(MutableSupport::extra_table_id_column_name, MutableSupport::extra_table_id_column_type); + break; + default: + names_and_types.emplace_back(fmt::format("mock_table_scan_{}", i), getDataTypeByColumnInfoForComputingLayer(column_info)); + } + } + return names_and_types; +} + +ColumnsWithTypeAndName getColumnWithTypeAndName(const NamesAndTypes & names_and_types) +{ + std::vector column_with_type_and_names; + column_with_type_and_names.reserve(names_and_types.size()); + for (const auto & col : names_and_types) + { + column_with_type_and_names.push_back(DB::ColumnWithTypeAndName(col.type, col.name)); + } + return column_with_type_and_names; +} +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h new file mode 100644 index 00000000000..617f69de925 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h @@ -0,0 +1,26 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +NamesAndTypes genNamesAndTypes(const TiDBTableScan & table_scan); +ColumnsWithTypeAndName getColumnWithTypeAndName(const NamesAndTypes & names_and_types); +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 6b118f1dd40..b7c75c06e67 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -28,8 +28,11 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) const Settings & settings = context.getSettingsRef(); if (dagContext().isBatchCop() || dagContext().isMPPTask()) max_streams = settings.max_threads; + else if (dagContext().isTest()) + max_streams = dagContext().initialize_concurrency; else max_streams = 1; + if (max_streams > 1) { max_streams *= settings.max_streams_to_max_threads_ratio; @@ -79,7 +82,6 @@ BlockIO InterpreterDAG::execute() BlockInputStreams streams = executeQueryBlock(*dag.getRootQueryBlock()); DAGPipeline pipeline; pipeline.streams = streams; - /// add union to run in parallel if needed if (dagContext().isMPPTask()) /// MPPTask do not need the returned blocks. @@ -95,7 +97,6 @@ BlockIO InterpreterDAG::execute() SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), dagContext().log->identifier()); } - BlockIO res; res.in = pipeline.firstStream(); return res; diff --git a/dbms/src/Flash/Coprocessor/TiDBTableScan.h b/dbms/src/Flash/Coprocessor/TiDBTableScan.h index 934ee2c7769..6ac07d326f6 100644 --- a/dbms/src/Flash/Coprocessor/TiDBTableScan.h +++ b/dbms/src/Flash/Coprocessor/TiDBTableScan.h @@ -16,8 +16,6 @@ #include -#include - namespace DB { /// TiDBTableScan is a wrap to hide the difference of `TableScan` and `PartitionTableScan` diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp new file mode 100644 index 00000000000..961ab525be8 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -0,0 +1,257 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ +class InterpreterExecuteTest : public DB::tests::InterpreterTest +{ +public: + void initializeContext() override + { + InterpreterTest::initializeContext(); + + context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + } +}; + +TEST_F(InterpreterExecuteTest, SingleQueryBlock) +try +{ + auto request = context.scan("test_db", "test_table_1") + .filter(eq(col("s2"), col("s3"))) + .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) + .filter(eq(col("s2"), col("s3"))) + .topN("s2", false, 10) + .build(context); + { + String expected = R"( +Union + SharedQuery x 10 + Expression + MergeSorting + Union + PartialSorting x 10 + Expression + Filter + SharedQuery + ParallelAggregating + Expression x 10 + Filter + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .filter(eq(col("s2"), col("s3"))) + .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) + .filter(eq(col("s2"), col("s3"))) + .limit(10) + .build(context); + + { + String expected = R"( +Union + SharedQuery x 10 + Limit + Union + Limit x 10 + Expression + Expression + Filter + SharedQuery + ParallelAggregating + Expression x 10 + Filter + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } +} +CATCH + +TEST_F(InterpreterExecuteTest, MultipleQueryBlockWithSource) +try +{ + auto request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .project({"s1", "s2"}) + .project("s1") + .build(context); + { + String expected = R"( +Union + Expression x 10 + Expression + Expression + Expression + Expression + Expression + Expression + Expression + Expression + Expression + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .topN({{"s1", true}, {"s2", false}}, 10) + .project({"s1", "s2"}) + .build(context); + { + String expected = R"( +Union + Expression x 10 + Expression + Expression + SharedQuery + Expression + MergeSorting + Union + PartialSorting x 10 + Expression + Expression + Expression + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .topN({{"s1", true}, {"s2", false}}, 10) + .project({"s1", "s2"}) + .aggregation({Max(col("s1"))}, {col("s1"), col("s2")}) + .project({"max(s1)", "s1", "s2"}) + .build(context); + { + String expected = R"( +Union + Expression x 10 + Expression + Expression + Expression + SharedQuery + ParallelAggregating + Expression x 10 + Expression + Expression + SharedQuery + Expression + MergeSorting + Union + PartialSorting x 10 + Expression + Expression + Expression + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .topN({{"s1", true}, {"s2", false}}, 10) + .project({"s1", "s2"}) + .aggregation({Max(col("s1"))}, {col("s1"), col("s2")}) + .project({"max(s1)", "s1", "s2"}) + .filter(eq(col("s1"), col("s2"))) + .project({"max(s1)", "s1"}) + .limit(10) + .build(context); + { + String expected = R"( +Union + SharedQuery x 10 + Limit + Union + Limit x 10 + Expression + Expression + Expression + Expression + Expression + Filter + Expression + Expression + Expression + SharedQuery + ParallelAggregating + Expression x 10 + Expression + Expression + SharedQuery + Expression + MergeSorting + Union + PartialSorting x 10 + Expression + Expression + Expression + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + // Join Source. + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + DAGRequestBuilder table3 = context.scan("test_db", "r_table"); + DAGRequestBuilder table4 = context.scan("test_db", "l_table"); + + request = table1.join( + table2.join( + table3.join(table4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .build(context); + { + String expected = R"( +CreatingSets + Union + HashJoinBuildBlockInputStream x 10 + Expression + Expression + MockTableScan + Union x 2 + HashJoinBuildBlockInputStream x 10 + Expression + Expression + Expression + HashJoinProbe + Expression + MockTableScan + Union + Expression x 10 + Expression + HashJoinProbe + Expression + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } +} +CATCH + + +} // namespace tests +} // namespace DB \ No newline at end of file diff --git a/dbms/src/TestUtils/InterpreterTestUtils.cpp b/dbms/src/TestUtils/InterpreterTestUtils.cpp index 737978a8bc4..2cc096d4095 100644 --- a/dbms/src/TestUtils/InterpreterTestUtils.cpp +++ b/dbms/src/TestUtils/InterpreterTestUtils.cpp @@ -13,24 +13,26 @@ // limitations under the License. #include +#include +#include #include #include - namespace DB::tests { -DAGContext & MockExecutorTest::getDAGContext() +DAGContext & InterpreterTest::getDAGContext() { assert(dag_context_ptr != nullptr); return *dag_context_ptr; } -void MockExecutorTest::initializeContext() +void InterpreterTest::initializeContext() { dag_context_ptr = std::make_unique(1024); context = MockDAGRequestContext(TiFlashTestEnv::getContext()); + dag_context_ptr->log = Logger::get("interpreterTest"); } -void MockExecutorTest::SetUpTestCase() +void InterpreterTest::SetUpTestCase() { try { @@ -43,8 +45,29 @@ void MockExecutorTest::SetUpTestCase() } } -void MockExecutorTest::dagRequestEqual(String & expected_string, const std::shared_ptr & actual) +void InterpreterTest::initializeClientInfo() +{ + context.context.setCurrentQueryId("test"); + ClientInfo & client_info = context.context.getClientInfo(); + client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + client_info.interface = ClientInfo::Interface::GRPC; +} + +void InterpreterTest::executeInterpreter(const String & expected_string, const std::shared_ptr & request, size_t concurrency) { - ASSERT_EQ(Poco::trimInPlace(expected_string), Poco::trim(ExecutorSerializer().serialize(actual.get()))); + DAGContext dag_context(*request, "interpreter_test", concurrency); + context.context.setDAGContext(&dag_context); + // Currently, don't care about regions information in interpreter tests. + DAGQuerySource dag(context.context); + auto res = executeQuery(dag, context.context, false, QueryProcessingStage::Complete); + FmtBuffer fb; + res.in->dumpTree(fb); + ASSERT_EQ(Poco::trim(expected_string), Poco::trim(fb.toString())); } + +void InterpreterTest::dagRequestEqual(const String & expected_string, const std::shared_ptr & actual) +{ + ASSERT_EQ(Poco::trim(expected_string), Poco::trim(ExecutorSerializer().serialize(actual.get()))); +} + } // namespace DB::tests diff --git a/dbms/src/TestUtils/InterpreterTestUtils.h b/dbms/src/TestUtils/InterpreterTestUtils.h index 074c65da6f0..28d44d3a5f2 100644 --- a/dbms/src/TestUtils/InterpreterTestUtils.h +++ b/dbms/src/TestUtils/InterpreterTestUtils.h @@ -26,30 +26,37 @@ #include namespace DB::tests { -class MockExecutorTest : public ::testing::Test +void executeInterpreter(const std::shared_ptr & request, Context & context); +class InterpreterTest : public ::testing::Test { protected: void SetUp() override { initializeContext(); + initializeClientInfo(); } public: - MockExecutorTest() + InterpreterTest() : context(TiFlashTestEnv::getContext()) {} static void SetUpTestCase(); virtual void initializeContext(); + void initializeClientInfo(); + DAGContext & getDAGContext(); - static void dagRequestEqual(String & expected_string, const std::shared_ptr & actual); + static void dagRequestEqual(const String & expected_string, const std::shared_ptr & actual); + + void executeInterpreter(const String & expected_string, const std::shared_ptr & request, size_t concurrency); protected: MockDAGRequestContext context; std::unique_ptr dag_context_ptr; }; -#define ASSERT_DAGREQUEST_EQAUL(str, request) dagRequestEqual(str, request); +#define ASSERT_DAGREQUEST_EQAUL(str, request) dagRequestEqual((str), (request)); +#define ASSERT_BLOCKINPUTSTREAM_EQAUL(str, request, concurrency) executeInterpreter((str), (request), (concurrency)) } // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index c862c7deec8..3313aae6a93 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -88,11 +88,13 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String assert(!columns.empty()); TableInfo table_info; table_info.name = db + "." + table; + int i = 0; for (const auto & column : columns) { TiDB::ColumnInfo ret; ret.tp = column.second; ret.name = column.first; + ret.id = i++; table_info.columns.push_back(std::move(ret)); } String empty_alias; diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 5c7d77c399a..6dbf791669f 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -12,22 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include -#include namespace DB { namespace tests { -class MockDAGRequestTest : public DB::tests::MockExecutorTest +class MockDAGRequestTest : public DB::tests::InterpreterTest { public: void initializeContext() override { - dag_context_ptr = std::make_unique(1024); - context = MockDAGRequestContext(TiFlashTestEnv::getContext()); + InterpreterTest::initializeContext(); context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeLong}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); @@ -45,6 +42,8 @@ try String expected = "table_scan_0 | {<0, String>, <1, String>}\n"; ASSERT_DAGREQUEST_EQAUL(expected, request); } + + request = context.scan("test_db", "test_table_1").build(context); { String expected = "table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; @@ -63,10 +62,10 @@ try ASSERT_DAGREQUEST_EQAUL(expected, request); } request = context.scan("test_db", "test_table_1") - .filter(And(eq(col("s1"), col("s2")), lt(col("s2"), lt(col("s1"), col("s2"))))) + .filter(And(eq(col("s1"), col("s2")), lt(col("s2"), col("s2")))) // type in lt must be same .build(context); { - String expected = "selection_1 | equals(<0, Long>, <1, String>) and less(<1, String>, less(<0, Long>, <1, String>))}\n" + String expected = "selection_1 | equals(<0, Long>, <1, String>) and less(<1, String>, <1, String>)}\n" " table_scan_0 | {<0, Long>, <1, String>, <2, String>}\n"; ASSERT_DAGREQUEST_EQAUL(expected, request); } diff --git a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp index f94a20c1a65..e4205f6f938 100644 --- a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp +++ b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp @@ -14,10 +14,10 @@ #include #include +#include #include #include #include -#include #include #include From cf0dd8605ae207c49a23a71808e7e06b6e5ad2b1 Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Wed, 11 May 2022 12:06:36 +0800 Subject: [PATCH 76/79] clear storage data before drop its metadata (#4673) ref pingcap/tiflash#3594 --- dbms/src/Common/FailPoint.cpp | 4 +- dbms/src/Debug/DBGInvoker.cpp | 1 + dbms/src/Debug/dbgFuncMisc.cpp | 12 ++ dbms/src/Debug/dbgFuncMisc.h | 7 + .../src/Interpreters/InterpreterDropQuery.cpp | 4 + dbms/src/Server/StorageConfigParser.cpp | 1 - .../Storages/DeltaMerge/DeltaMergeStore.cpp | 61 +++++-- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 7 + dbms/src/Storages/DeltaMerge/Segment.cpp | 7 +- dbms/src/Storages/DeltaMerge/Segment.h | 2 +- .../tests/gtest_dm_storage_delta_merge.cpp | 152 ++++++++++++++++++ dbms/src/Storages/IStorage.h | 40 +++-- dbms/src/Storages/StorageDeltaMerge.cpp | 15 +- dbms/src/Storages/StorageDeltaMerge.h | 2 + .../raft/schema/partition_table_restart.test | 2 + 15 files changed, 286 insertions(+), 31 deletions(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index 8e8b6117def..2c641858e76 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -63,7 +63,9 @@ std::unordered_map> FailPointHelper::f M(force_legacy_or_checkpoint_page_file_exists) \ M(exception_in_creating_set_input_stream) \ M(exception_when_read_from_log) \ - M(exception_mpp_hash_build) + M(exception_mpp_hash_build) \ + M(exception_before_drop_segment) \ + M(exception_after_drop_segment) #define APPLY_FOR_FAILPOINTS(M) \ M(force_set_page_file_write_errno) \ diff --git a/dbms/src/Debug/DBGInvoker.cpp b/dbms/src/Debug/DBGInvoker.cpp index 6ae0f9ebd53..3f633c08e67 100644 --- a/dbms/src/Debug/DBGInvoker.cpp +++ b/dbms/src/Debug/DBGInvoker.cpp @@ -121,6 +121,7 @@ DBGInvoker::DBGInvoker() regSchemalessFunc("search_log_for_key", dbgFuncSearchLogForKey); regSchemalessFunc("tidb_dag", dbgFuncTiDBQueryFromNaturalDag); + regSchemalessFunc("gc_global_storage_pool", dbgFuncTriggerGlobalPageStorageGC); regSchemalessFunc("read_index_stress_test", ReadIndexStressTest::dbgFuncStressTest); } diff --git a/dbms/src/Debug/dbgFuncMisc.cpp b/dbms/src/Debug/dbgFuncMisc.cpp index 8563aaf7433..b9f62317189 100644 --- a/dbms/src/Debug/dbgFuncMisc.cpp +++ b/dbms/src/Debug/dbgFuncMisc.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -97,4 +98,15 @@ void dbgFuncSearchLogForKey(Context & context, const ASTs & args, DBGInvoker::Pr else output("Invalid"); } + +void dbgFuncTriggerGlobalPageStorageGC(Context & context, const ASTs & /*args*/, DBGInvoker::Printer /*output*/) +{ + auto global_storage_pool = context.getGlobalStoragePool(); + if (global_storage_pool) + { + global_storage_pool->meta()->gc(); + global_storage_pool->log()->gc(); + global_storage_pool->data()->gc(); + } +} } // namespace DB diff --git a/dbms/src/Debug/dbgFuncMisc.h b/dbms/src/Debug/dbgFuncMisc.h index c863a6cd8fc..c256e6b41c0 100644 --- a/dbms/src/Debug/dbgFuncMisc.h +++ b/dbms/src/Debug/dbgFuncMisc.h @@ -26,4 +26,11 @@ class Context; // ./storage-client.sh "DBGInvoke search_log_for_key(key)" void dbgFuncSearchLogForKey(Context & context, const ASTs & args, DBGInvoker::Printer output); +// Trigger the gc process of global storage pool. Used to remove obsolete entries left by the previous dropped table +// Note that it is ok for now since we don't store external/ref on GlobalStoragePool.meta, or it may run into same +// problem as https://github.com/pingcap/tiflash/pull/4850 +// Usage: +// ./storage-client.sh "DBGInvoke trigger_global_storage_pool_gc()" +void dbgFuncTriggerGlobalPageStorageGC(Context & context, const ASTs & args, DBGInvoker::Printer output); + } // namespace DB diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index a6060064f74..fe1391cab50 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -159,6 +159,10 @@ BlockIO InterpreterDropQuery::execute() } else { + /// Clear storage data first, and if tiflash crash in the middle of `clearData`, + /// this table can still be restored, and can call `clearData` again. + table.first->clearData(); + /// Delete table metdata and table itself from memory database->removeTable(context, current_table_name); diff --git a/dbms/src/Server/StorageConfigParser.cpp b/dbms/src/Server/StorageConfigParser.cpp index 653a4eb947f..270390ac1fa 100644 --- a/dbms/src/Server/StorageConfigParser.cpp +++ b/dbms/src/Server/StorageConfigParser.cpp @@ -216,7 +216,6 @@ void TiFlashStorageConfig::parseMisc(const String & storage_section, Poco::Logge // config for experimental feature, may remove later enable_ps_v3 = get_bool_config_or_default("enable_ps_v3", enable_ps_v3); - LOG_FMT_INFO(log, "format_version {} lazily_init_store {} enable_ps_v3 {}", format_version, lazily_init_store, enable_ps_v3); } diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 36063e2bd83..b27c94305aa 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -95,6 +95,8 @@ extern const char force_set_segment_ingest_packs_fail[]; extern const char segment_merge_after_ingest_packs[]; extern const char random_exception_after_dt_write_done[]; extern const char force_slow_page_storage_snapshot_release[]; +extern const char exception_before_drop_segment[]; +extern const char exception_after_drop_segment[]; } // namespace FailPoints namespace DM @@ -369,12 +371,8 @@ void DeltaMergeStore::rename(String /*new_path*/, bool clean_rename, String new_ db_name.swap(new_database_name); } -void DeltaMergeStore::drop() +void DeltaMergeStore::dropAllSegments(bool keep_first_segment) { - // Remove all background task first - shutdown(); - - LOG_FMT_INFO(log, "Drop DeltaMerge removing data from filesystem [{}.{}]", db_name, table_name); auto dm_context = newDMContext(global_context, global_context.getSettingsRef()); { std::unique_lock lock(read_write_mutex); @@ -390,31 +388,76 @@ void DeltaMergeStore::drop() while (!segment_ids.empty()) { auto segment_id_to_drop = segment_ids.top(); + if (keep_first_segment && (segment_id_to_drop == DELTA_MERGE_FIRST_SEGMENT_ID)) + { + // This must be the last segment to drop + assert(segment_ids.size() == 1); + break; + } auto segment_to_drop = id_to_segment[segment_id_to_drop]; segment_ids.pop(); + SegmentPtr previous_segment; + SegmentPtr new_previous_segment; if (!segment_ids.empty()) { // This is not the last segment, so we need to set previous segment's next_segment_id to 0 to indicate that this segment has been dropped auto previous_segment_id = segment_ids.top(); - auto previous_segment = id_to_segment[previous_segment_id]; + previous_segment = id_to_segment[previous_segment_id]; assert(previous_segment->nextSegmentId() == segment_id_to_drop); auto previous_lock = previous_segment->mustGetUpdateLock(); - auto new_previous_segment = previous_segment->dropNextSegment(wbs); + + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_before_drop_segment); // No need to abandon previous_segment, because it's delta and stable is managed by the new_previous_segment. // Abandon previous_segment will actually abandon new_previous_segment - segments.emplace(new_previous_segment->getRowKeyRange().getEnd(), new_previous_segment); - id_to_segment.emplace(previous_segment_id, new_previous_segment); + // + // And we need to use the previous_segment to manage the dropped segment's range, + // because if tiflash crash in the middle of the drop table process, and when restoring this table at restart, + // there are some possibilities that this table will trigger some background tasks, + // and in these background tasks, it may check that all ranges of this table should be managed by some segment. + new_previous_segment = previous_segment->dropNextSegment(wbs, segment_to_drop->getRowKeyRange()); + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_after_drop_segment); } // The order to drop the meta and data of this segment doesn't matter, // Because there is no segment pointing to this segment, // so it won't be restored again even the drop process was interrupted by restart segments.erase(segment_to_drop->getRowKeyRange().getEnd()); id_to_segment.erase(segment_id_to_drop); + if (previous_segment) + { + assert(new_previous_segment); + assert(previous_segment->segmentId() == new_previous_segment->segmentId()); + segments.erase(previous_segment->getRowKeyRange().getEnd()); + segments.emplace(new_previous_segment->getRowKeyRange().getEnd(), new_previous_segment); + id_to_segment.erase(previous_segment->segmentId()); + id_to_segment.emplace(new_previous_segment->segmentId(), new_previous_segment); + } auto drop_lock = segment_to_drop->mustGetUpdateLock(); segment_to_drop->abandon(*dm_context); segment_to_drop->drop(global_context.getFileProvider(), wbs); } } +} + +void DeltaMergeStore::clearData() +{ + // Remove all background task first + shutdown(); + LOG_FMT_INFO(log, "Clear DeltaMerge segments data [{}.{}]", db_name, table_name); + // We don't drop the first segment in clearData, because if we drop it and tiflash crashes before drop the table's metadata, + // when restart the table will try to restore the first segment but failed to do it which cause tiflash crash again. + // The reason this happens is that even we delete all data in a PageStorage instance, + // the call to PageStorage::getMaxId is still not 0 so tiflash treat it as an old table and will try to restore it's first segment. + dropAllSegments(true); + LOG_FMT_INFO(log, "Clear DeltaMerge segments data done [{}.{}]", db_name, table_name); +} + +void DeltaMergeStore::drop() +{ + // Remove all background task first + shutdown(); + + LOG_FMT_INFO(log, "Drop DeltaMerge removing data from filesystem [{}.{}]", db_name, table_name); + dropAllSegments(false); storage_pool->drop(); // Drop data in storage path pool diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index de56a622978..9b09b6f37c5 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -311,6 +311,8 @@ class DeltaMergeStore : private boost::noncopyable void rename(String new_path, bool clean_rename, String new_database_name, String new_table_name); + void clearData(); + void drop(); // Stop all background tasks. @@ -457,8 +459,13 @@ class DeltaMergeStore : private boost::noncopyable size_t expected_tasks_count = 1, const SegmentIdSet & read_segments = {}); +private: + void dropAllSegments(bool keep_first_segment); + #ifndef DBMS_PUBLIC_GTEST private: +#else +public: #endif Context & global_context; diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index ac192ff6082..eccc06a8257 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -1262,10 +1262,13 @@ SegmentPtr Segment::applyMerge(DMContext & dm_context, // return merged; } -SegmentPtr Segment::dropNextSegment(WriteBatches & wbs) +SegmentPtr Segment::dropNextSegment(WriteBatches & wbs, const RowKeyRange & next_segment_range) { + assert(rowkey_range.end == next_segment_range.start); + // merge the rowkey range of the next segment to this segment + auto new_rowkey_range = RowKeyRange(rowkey_range.start, next_segment_range.end, rowkey_range.is_common_handle, rowkey_range.rowkey_column_size); auto new_segment = std::make_shared(epoch + 1, // - rowkey_range, + new_rowkey_range, segment_id, 0, delta, diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index a6328d24128..0d048011e18 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -231,7 +231,7 @@ class Segment : private boost::noncopyable WriteBatches & wbs, const StableValueSpacePtr & new_stable) const; - SegmentPtr dropNextSegment(WriteBatches & wbs); + SegmentPtr dropNextSegment(WriteBatches & wbs, const RowKeyRange & next_segment_range); /// Flush delta's cache packs. bool flushCache(DMContext & dm_context); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp index 75eed3ab964..a26471cfe01 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp @@ -44,6 +44,11 @@ namespace DB { +namespace FailPoints +{ +extern const char exception_before_drop_segment[]; +extern const char exception_after_drop_segment[]; +} // namespace FailPoints namespace DM { namespace tests @@ -699,6 +704,153 @@ try } CATCH +TEST(StorageDeltaMergeTest, RestoreAfterClearData) +try +{ + Context ctx = DMTestEnv::getContext(); + auto & settings = ctx.getSettingsRef(); + settings.dt_segment_limit_rows = 11; + settings.dt_segment_limit_size = 20; + settings.dt_segment_delta_limit_rows = 7; + settings.dt_segment_delta_limit_size = 20; + settings.dt_segment_force_split_size = 100; + settings.dt_segment_delta_cache_limit_size = 20; + std::shared_ptr storage; + DataTypes data_types; + Names column_names; + // create table + auto create_table = [&]() { + NamesAndTypesList names_and_types_list{ + {"col1", std::make_shared()}, + {"col2", std::make_shared()}, + }; + for (const auto & name_type : names_and_types_list) + { + data_types.push_back(name_type.type); + column_names.push_back(name_type.name); + } + + const String path_name = DB::tests::TiFlashTestEnv::getTemporaryPath("StorageDeltaMerge_RestoreAfterClearData"); + if (Poco::File path(path_name); path.exists()) + path.remove(true); + + // primary_expr_ast + const String table_name = "t_1233"; + ASTPtr astptr(new ASTIdentifier(table_name, ASTIdentifier::Kind::Table)); + astptr->children.emplace_back(new ASTIdentifier("col1")); + + // table_info.id is used as the ns_id + TiDB::TableInfo table_info; + table_info.id = 1233; + table_info.is_common_handle = false; + table_info.pk_is_handle = false; + + storage = StorageDeltaMerge::create("TiFlash", + /* db_name= */ "default", + table_name, + table_info, + ColumnsDescription{names_and_types_list}, + astptr, + 0, + ctx); + storage->startup(); + }; + auto write_data = [&](Int64 start, Int64 limit) { + ASTPtr insertptr(new ASTInsertQuery()); + BlockOutputStreamPtr output = storage->write(insertptr, ctx.getSettingsRef()); + // prepare block data + Block sample; + sample.insert(DB::tests::createColumn( + createNumbers(start, start + limit), + "col1")); + sample.insert(DB::tests::createColumn( + Strings(limit, "a"), + "col2")); + + output->writePrefix(); + output->write(sample); + output->writeSuffix(); + }; + auto read_data = [&]() { + QueryProcessingStage::Enum stage2; + SelectQueryInfo query_info; + query_info.query = std::make_shared(); + query_info.mvcc_query_info = std::make_unique(ctx.getSettingsRef().resolve_locks, std::numeric_limits::max()); + Names read_columns = {"col1", EXTRA_TABLE_ID_COLUMN_NAME, "col2"}; + BlockInputStreams ins = storage->read(read_columns, query_info, ctx, stage2, 8192, 1); + BlockInputStreamPtr in = ins[0]; + in->readPrefix(); + size_t num_rows_read = 0; + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + return num_rows_read; + }; + + // create table + create_table(); + size_t num_rows_write = 0; + // write until split and use a big enough finite for loop to make sure the test won't hang forever + for (size_t i = 0; i < 100000; i++) + { + write_data(num_rows_write, 1000); + num_rows_write += 1000; + if (storage->getStore()->getSegmentStats().size() > 1) + break; + } + { + ASSERT_GT(storage->getStore()->getSegmentStats().size(), 1); + ASSERT_EQ(read_data(), num_rows_write); + } + storage->flushCache(ctx); + // throw exception before drop first segment + DB::FailPointHelper::enableFailPoint(DB::FailPoints::exception_before_drop_segment); + ASSERT_ANY_THROW(storage->clearData()); + storage->removeFromTMTContext(); + + // restore the table and make sure no data has been dropped + create_table(); + { + ASSERT_EQ(read_data(), num_rows_write); + } + // write more data make sure segments more than 1 + for (size_t i = 0; i < 100000; i++) + { + if (storage->getStore()->getSegmentStats().size() > 1) + break; + write_data(num_rows_write, 1000); + num_rows_write += 1000; + } + { + ASSERT_GT(storage->getStore()->getSegmentStats().size(), 1); + ASSERT_EQ(read_data(), num_rows_write); + } + storage->flushCache(ctx); + // throw exception after drop first segment + DB::FailPointHelper::enableFailPoint(DB::FailPoints::exception_after_drop_segment); + ASSERT_ANY_THROW(storage->clearData()); + storage->removeFromTMTContext(); + + // restore the table and make sure some data has been dropped + create_table(); + { + ASSERT_LT(read_data(), num_rows_write); + } + storage->clearData(); + storage->removeFromTMTContext(); + + // restore the table and make sure there is just one segment left + create_table(); + { + ASSERT_EQ(storage->getStore()->getSegmentStats().size(), 1); + ASSERT_LT(read_data(), num_rows_write); + } + storage->drop(); +} +CATCH + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 9867f088953..1d3016e2853 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -29,7 +29,6 @@ namespace DB { - class Context; class IBlockInputStream; class IBlockOutputStream; @@ -57,7 +56,9 @@ class AlterCommands; * - data storage structure (compression, etc.) * - concurrent access to data (locks, etc.) */ -class IStorage : public std::enable_shared_from_this, private boost::noncopyable, public ITableDeclaration +class IStorage : public std::enable_shared_from_this + , private boost::noncopyable + , public ITableDeclaration { public: /// The main name of the table type (for example, StorageMergeTree). @@ -100,7 +101,8 @@ class IStorage : public std::enable_shared_from_this, private boost::n /// won't be changed by this lock. /// After decoding done, we can release alter lock but keep drop lock for writing data. TableStructureLockHolder lockStructureForShare( - const String & query_id, const std::chrono::milliseconds & acquire_timeout = std::chrono::milliseconds(0)); + const String & query_id, + const std::chrono::milliseconds & acquire_timeout = std::chrono::milliseconds(0)); /// Lock table exclusively. This lock must be acquired if you want to be /// sure, that no other thread (SELECT, merge, ALTER, etc.) doing something @@ -110,7 +112,8 @@ class IStorage : public std::enable_shared_from_this, private boost::n /// NOTE: You have to be 100% sure that you need this lock. It's extremely /// heavyweight and makes table irresponsive. TableExclusiveLockHolder lockExclusively( - const String & query_id, const std::chrono::milliseconds & acquire_timeout = std::chrono::milliseconds(0)); + const String & query_id, + const std::chrono::milliseconds & acquire_timeout = std::chrono::milliseconds(0)); /** Read a set of columns from the table. * Accepts a list of columns to read, as well as a description of the query, @@ -133,11 +136,11 @@ class IStorage : public std::enable_shared_from_this, private boost::n * is guaranteed to be immutable once the input streams are returned. */ virtual BlockInputStreams read(const Names & /*column_names*/, - const SelectQueryInfo & /*query_info*/, - const Context & /*context*/, - QueryProcessingStage::Enum & /*processed_stage*/, - size_t /*max_block_size*/, - unsigned /*num_streams*/) + const SelectQueryInfo & /*query_info*/, + const Context & /*context*/, + QueryProcessingStage::Enum & /*processed_stage*/, + size_t /*max_block_size*/, + unsigned /*num_streams*/) { throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -153,6 +156,11 @@ class IStorage : public std::enable_shared_from_this, private boost::n throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + /** Clear the table data. Called before drop the metadata and data of this storage. + * The difference with `drop` is that after calling `clearData`, the storage must still be able to be restored. + */ + virtual void clearData() {} + /** Delete the table data. Called before deleting the directory with the data. * If you do not need any action other than deleting the directory with data, you can leave this method blank. */ @@ -172,8 +180,7 @@ class IStorage : public std::enable_shared_from_this, private boost::n * This method must fully execute the ALTER query, taking care of the locks itself. * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. */ - virtual void alter(const TableLockHolder &, const AlterCommands & /*params*/, const String & /*database_name*/, - const String & /*table_name*/, const Context & /*context*/) + virtual void alter(const TableLockHolder &, const AlterCommands & /*params*/, const String & /*database_name*/, const String & /*table_name*/, const Context & /*context*/) { throw Exception("Method alter is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -223,7 +230,11 @@ class IStorage : public std::enable_shared_from_this, private boost::n * Returns whether any work has been done. */ virtual bool optimize( - const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*final*/, bool /*deduplicate*/, const Context & /*context*/) + const ASTPtr & /*query*/, + const ASTPtr & /*partition*/, + bool /*final*/, + bool /*deduplicate*/, + const Context & /*context*/) { throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -273,7 +284,10 @@ class IStorage : public std::enable_shared_from_this, private boost::n private: RWLock::LockHolder tryLockTimed( - const RWLockPtr & rwlock, RWLock::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const; + const RWLockPtr & rwlock, + RWLock::Type type, + const String & query_id, + const std::chrono::milliseconds & acquire_timeout) const; /// You always need to take the next two locks in this order. diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 1c6853afeef..f2aa227d29c 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -287,13 +287,20 @@ void StorageDeltaMerge::updateTableColumnInfo() rowkey_column_size = rowkey_column_defines.size(); } +void StorageDeltaMerge::clearData() +{ + shutdown(); + // init the store so it can clear data + auto & store = getAndMaybeInitStore(); + store->clearData(); +} + void StorageDeltaMerge::drop() { shutdown(); - if (storeInited()) - { - _store->drop(); - } + // init the store so it can do the drop work + auto & store = getAndMaybeInitStore(); + store->drop(); } Block StorageDeltaMerge::buildInsertBlock(bool is_import, bool is_delete, const Block & old_block) diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index a6e61f3bebe..560f365e747 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -52,6 +52,8 @@ class StorageDeltaMerge String getTableName() const override; String getDatabaseName() const override; + void clearData() override; + void drop() override; BlockInputStreams read( diff --git a/tests/delta-merge-test/raft/schema/partition_table_restart.test b/tests/delta-merge-test/raft/schema/partition_table_restart.test index 2de5cb92436..bd0facdc153 100644 --- a/tests/delta-merge-test/raft/schema/partition_table_restart.test +++ b/tests/delta-merge-test/raft/schema/partition_table_restart.test @@ -35,6 +35,8 @@ # schema syncer guarantees logical table creation at last, so there won't be cases that logical table exists whereas physical table not. => drop table default.test => DBGInvoke __reset_schemas() +# remove obsolete entry left by previous dropped table +=> DBGInvoke __gc_global_storage_pool() => DBGInvoke __add_column_to_tidb_table(default, test, 'col_3 Nullable(Int8)') => DBGInvoke __rename_tidb_table(default, test, test1) From ec4d0588f9b94be0b3f1311521f0a84fd1b0bfec Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Wed, 11 May 2022 13:44:35 +0800 Subject: [PATCH 77/79] Fix wal may dump write diff type in same page_id (#4850) close pingcap/tiflash#4849 --- dbms/src/Storages/DeltaMerge/StoragePool.cpp | 55 ++++++++++++++++--- dbms/src/Storages/DeltaMerge/StoragePool.h | 24 ++++++++ dbms/src/Storages/Page/PageStorage.h | 15 +++-- dbms/src/Storages/Page/V2/PageStorage.cpp | 18 +++--- dbms/src/Storages/Page/V2/PageStorage.h | 4 +- .../Page/V2/tests/gtest_page_storage.cpp | 28 ++++++++++ .../Storages/Page/V3/PageDirectoryFactory.cpp | 9 +++ .../Storages/Page/V3/PageDirectoryFactory.h | 6 ++ dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 9 +-- dbms/src/Storages/Page/V3/PageStorageImpl.h | 3 +- .../Page/V3/tests/gtest_page_storage.cpp | 46 ++++++++++++++++ tests/delta-merge-test/ddl/alter.test | 23 ++++---- .../raft/txn_mock/partition_table.test | 27 ++++----- 13 files changed, 203 insertions(+), 64 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index bee6b174b55..8c4468ae2ed 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -114,9 +114,9 @@ GlobalStoragePool::GlobalStoragePool(const PathPool & path_pool, Context & globa void GlobalStoragePool::restore() { - log_storage->restore(); - data_storage->restore(); - meta_storage->restore(); + log_max_ids = log_storage->restore(); + data_max_ids = data_storage->restore(); + meta_max_ids = meta_storage->restore(); gc_handle = global_context.getBackgroundPool().addTask( [this] { @@ -187,6 +187,9 @@ StoragePool::StoragePool(NamespaceId ns_id_, const GlobalStoragePool & global_st , data_storage_reader(ns_id, data_storage, nullptr) , meta_storage_reader(ns_id, meta_storage, nullptr) , global_context(global_ctx) + , v3_log_max_ids(global_storage_pool.getLogMaxIds()) + , v3_data_max_ids(global_storage_pool.getDataMaxIds()) + , v3_meta_max_ids(global_storage_pool.getMetaMaxIds()) {} void StoragePool::restore() @@ -194,14 +197,48 @@ void StoragePool::restore() // If the storage instances is not global, we need to initialize it by ourselves and add a gc task. if (owned_storage) { - log_storage->restore(); - data_storage->restore(); - meta_storage->restore(); + auto log_max_ids = log_storage->restore(); + auto data_max_ids = data_storage->restore(); + auto meta_max_ids = meta_storage->restore(); + + assert(log_max_ids.size() == 1); + assert(data_max_ids.size() == 1); + assert(meta_max_ids.size() == 1); + + max_log_page_id = log_max_ids[0]; + max_data_page_id = data_max_ids[0]; + max_meta_page_id = meta_max_ids[0]; } + else + { + if (const auto & it = v3_log_max_ids.find(ns_id); it == v3_log_max_ids.end()) + { + max_log_page_id = 0; + } + else + { + max_log_page_id = it->second; + } - max_log_page_id = log_storage->getMaxId(ns_id); - max_data_page_id = data_storage->getMaxId(ns_id); - max_meta_page_id = meta_storage->getMaxId(ns_id); + if (const auto & it = v3_data_max_ids.find(ns_id); it == v3_data_max_ids.end()) + { + max_data_page_id = 0; + } + else + { + max_data_page_id = it->second; + } + + if (const auto & it = v3_meta_max_ids.find(ns_id); it == v3_meta_max_ids.end()) + { + max_meta_page_id = 0; + } + else + { + max_meta_page_id = it->second; + } + } + // TODO: add a log to show max_*_page_id after mix mode pr ready. } StoragePool::~StoragePool() diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.h b/dbms/src/Storages/DeltaMerge/StoragePool.h index 859be76dbe5..3a02232902b 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.h +++ b/dbms/src/Storages/DeltaMerge/StoragePool.h @@ -53,6 +53,21 @@ class GlobalStoragePool : private boost::noncopyable PageStoragePtr data() const { return data_storage; } PageStoragePtr meta() const { return meta_storage; } + std::map getLogMaxIds() const + { + return log_max_ids; + } + + std::map getDataMaxIds() const + { + return data_max_ids; + } + + std::map getMetaMaxIds() const + { + return meta_max_ids; + } + private: // TODO: maybe more frequent gc for GlobalStoragePool? bool gc(const Settings & settings, const Seconds & try_gc_period = DELTA_MERGE_GC_PERIOD); @@ -62,6 +77,10 @@ class GlobalStoragePool : private boost::noncopyable PageStoragePtr data_storage; PageStoragePtr meta_storage; + std::map log_max_ids; + std::map data_max_ids; + std::map meta_max_ids; + std::atomic last_try_gc_time = Clock::now(); std::mutex mutex; @@ -143,6 +162,11 @@ class StoragePool : private boost::noncopyable Context & global_context; + // TBD: Will be replaced GlobalPathPoolPtr after mix mode ptr ready + std::map v3_log_max_ids; + std::map v3_data_max_ids; + std::map v3_meta_max_ids; + std::atomic max_log_page_id = 0; std::atomic max_data_page_id = 0; std::atomic max_meta_page_id = 0; diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 29fea2d30e3..34d91dbd1ad 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -211,12 +211,16 @@ class PageStorage : private boost::noncopyable virtual ~PageStorage() = default; - virtual void restore() = 0; + // Return the map[ns_id, max_page_id] + // The caller should ensure that it only allocate new id that is larger than `max_page_id`. Reusing the + // same ID for different kind of write (put/ref/put_external) would make PageStorage run into unexpected error. + // + // Note that for V2, we always return a map with only one element: cause V2 have no + // idea about ns_id. + virtual std::map restore() = 0; virtual void drop() = 0; - virtual PageId getMaxId(NamespaceId ns_id) = 0; - virtual SnapshotPtr getSnapshot(const String & tracing_id) = 0; // Get some statistics of all living snapshots and the oldest living snapshot. @@ -362,11 +366,6 @@ class PageReader : private boost::noncopyable return storage->read(ns_id, page_fields, read_limiter, snap); } - PageId getMaxId() const - { - return storage->getMaxId(ns_id); - } - PageId getNormalPageId(PageId page_id) const { return storage->getNormalPageId(ns_id, page_id, snap); diff --git a/dbms/src/Storages/Page/V2/PageStorage.cpp b/dbms/src/Storages/Page/V2/PageStorage.cpp index 32367c4e708..4c47645e7d7 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.cpp +++ b/dbms/src/Storages/Page/V2/PageStorage.cpp @@ -196,7 +196,7 @@ toConcreteSnapshot(const DB::PageStorage::SnapshotPtr & ptr) return assert_cast(ptr.get()); } -void PageStorage::restore() +std::map PageStorage::restore() { LOG_FMT_INFO(log, "{} begin to restore data from disk. [path={}] [num_writers={}]", storage_name, delegator->defaultPath(), write_files.size()); @@ -349,17 +349,13 @@ void PageStorage::restore() #endif statistics = restore_info; - { - auto snapshot = getConcreteSnapshot(); - size_t num_pages = snapshot->version()->numPages(); - LOG_FMT_INFO(log, "{} restore {} pages, write batch sequence: {}, {}", storage_name, num_pages, write_batch_seq, statistics.toString()); - } -} -PageId PageStorage::getMaxId(NamespaceId /*ns_id*/) -{ - std::lock_guard write_lock(write_mutex); - return versioned_page_entries.getSnapshot("")->version()->maxId(); + auto snapshot = getConcreteSnapshot(); + size_t num_pages = snapshot->version()->numPages(); + LOG_FMT_INFO(log, "{} restore {} pages, write batch sequence: {}, {}", storage_name, num_pages, write_batch_seq, statistics.toString()); + + // Fixed namespace id 0 + return {{0, snapshot->version()->maxId()}}; } PageId PageStorage::getNormalPageIdImpl(NamespaceId /*ns_id*/, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) diff --git a/dbms/src/Storages/Page/V2/PageStorage.h b/dbms/src/Storages/Page/V2/PageStorage.h index 6cefb6407d2..1e32de40dd0 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.h +++ b/dbms/src/Storages/Page/V2/PageStorage.h @@ -91,12 +91,10 @@ class PageStorage : public DB::PageStorage const FileProviderPtr & file_provider_); ~PageStorage() = default; - void restore() override; + std::map restore() override; void drop() override; - PageId getMaxId(NamespaceId ns_id) override; - PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) override; DB::PageStorage::SnapshotPtr getSnapshot(const String & tracing_id) override; diff --git a/dbms/src/Storages/Page/V2/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V2/tests/gtest_page_storage.cpp index fc429dde0ac..5bbd319192b 100644 --- a/dbms/src/Storages/Page/V2/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V2/tests/gtest_page_storage.cpp @@ -79,6 +79,15 @@ class PageStorage_test : public DB::base::TiFlashStorageTestBasic return storage; } + std::pair, std::map> reopen() + { + auto delegator = path_pool->getPSDiskDelegatorSingle("log"); + auto storage = std::make_shared("test.t", delegator, config, file_provider); + auto max_ids = storage->restore(); + return {storage, max_ids}; + } + + protected: PageStorage::Config config; std::shared_ptr storage; @@ -727,6 +736,25 @@ try } CATCH +TEST_F(PageStorage_test, getMaxIdsFromRestore) +try +{ + { + WriteBatch batch; + batch.putExternal(1, 0); + batch.putExternal(2, 0); + batch.delPage(1); + batch.delPage(2); + storage->write(std::move(batch)); + } + + storage = nullptr; + auto [page_storage, max_ids] = reopen(); + ASSERT_EQ(max_ids.size(), 1); + ASSERT_EQ(max_ids[0], 2); +} +CATCH + TEST_F(PageStorage_test, IgnoreIncompleteWriteBatch1) try { diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 7f7e7f19989..4f2a8a3fbd4 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -105,6 +105,15 @@ void PageDirectoryFactory::loadEdit(const PageDirectoryPtr & dir, const PageEntr for (const auto & r : edit.getRecords()) { + if (auto it = max_apply_page_ids.find(r.page_id.high); it == max_apply_page_ids.end()) + { + max_apply_page_ids[r.page_id.high] = r.page_id.low; + } + else + { + it->second = std::max(it->second, r.page_id.low); + } + if (max_applied_ver < r.version) max_applied_ver = r.version; max_applied_page_id = std::max(r.page_id, max_applied_page_id); diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h index 278298d7010..4136e626050 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h @@ -58,11 +58,17 @@ class PageDirectoryFactory return *this; } + std::map getMaxApplyPageIds() const + { + return max_apply_page_ids; + } + private: void loadFromDisk(const PageDirectoryPtr & dir, WALStoreReaderPtr && reader); void loadEdit(const PageDirectoryPtr & dir, const PageEntriesEdit & edit); BlobStore::BlobStats * blob_stats = nullptr; + std::map max_apply_page_ids; }; } // namespace PS::V3 diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index bb48f18c658..ab1ba0b04e1 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -43,7 +43,7 @@ PageStorageImpl::PageStorageImpl( PageStorageImpl::~PageStorageImpl() = default; -void PageStorageImpl::restore() +std::map PageStorageImpl::restore() { // TODO: clean up blobstore. // TODO: Speedup restoring @@ -53,7 +53,7 @@ void PageStorageImpl::restore() page_directory = factory .setBlobStore(blob_store) .create(storage_name, file_provider, delegator, parseWALConfig(config)); - // factory.max_applied_page_id // TODO: return it to outer function + return factory.getMaxApplyPageIds(); } void PageStorageImpl::drop() @@ -61,11 +61,6 @@ void PageStorageImpl::drop() throw Exception("Not implemented", ErrorCodes::NOT_IMPLEMENTED); } -PageId PageStorageImpl::getMaxId(NamespaceId ns_id) -{ - return page_directory->getMaxId(ns_id); -} - PageId PageStorageImpl::getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) { if (!snapshot) diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index e3df872b1e1..7e86a110ab9 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -60,11 +60,10 @@ class PageStorageImpl : public DB::PageStorage return wal_config; } - void restore() override; + std::map restore() override; void drop() override; - PageId getMaxId(NamespaceId ns_id) override; PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) override; diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 7bbc882f62b..a1cf73dd10a 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -66,6 +66,16 @@ class PageStorageTest : public DB::base::TiFlashStorageTestBasic return storage; } + + std::pair, std::map> reopen() + { + auto path = getTemporaryPath(); + auto delegator = std::make_shared(path); + auto storage = std::make_shared("test.t", delegator, config, file_provider); + auto max_ids = storage->restore(); + return {storage, max_ids}; + } + protected: FileProviderPtr file_provider; std::unique_ptr path_pool; @@ -1300,5 +1310,41 @@ try } CATCH +TEST_F(PageStorageTest, getMaxIdsFromRestore) +try +{ + { + WriteBatch batch; + batch.putExternal(1, 0); + batch.putExternal(2, 0); + batch.delPage(1); + batch.delPage(2); + page_storage->write(std::move(batch)); + + WriteBatch batch2{TEST_NAMESPACE_ID + 1}; + batch2.putExternal(1, 0); + batch2.putExternal(2, 0); + batch2.putRefPage(3, 1); + batch2.putRefPage(100, 2); + page_storage->write(std::move(batch2)); + + WriteBatch batch3{TEST_NAMESPACE_ID + 2}; + batch3.putExternal(1, 0); + batch3.putExternal(2, 0); + batch3.putRefPage(3, 1); + batch3.putRefPage(10, 2); + batch3.delPage(10); + page_storage->write(std::move(batch3)); + } + + page_storage = nullptr; + auto [page_storage, max_ids] = reopen(); + ASSERT_EQ(max_ids.size(), 3); + ASSERT_EQ(max_ids[TEST_NAMESPACE_ID], 2); // external page 2 is marked as deleted, but we can still restore it. + ASSERT_EQ(max_ids[TEST_NAMESPACE_ID + 1], 100); + ASSERT_EQ(max_ids[TEST_NAMESPACE_ID + 2], 10); // page 10 is marked as deleted, but we can still restore it. +} +CATCH + } // namespace PS::V3::tests } // namespace DB diff --git a/tests/delta-merge-test/ddl/alter.test b/tests/delta-merge-test/ddl/alter.test index 4bf405ac9e1..3dc57b05843 100644 --- a/tests/delta-merge-test/ddl/alter.test +++ b/tests/delta-merge-test/ddl/alter.test @@ -73,18 +73,19 @@ ## rename table ->> drop table if exists dm_test_renamed ->> rename table dm_test to dm_test_renamed ->> select * from dm_test -Received exception from server (version {#WORD}): -Code: 60. DB::Exception: Received from {#WORD} DB::Exception: Table default.dm_test doesn't exist.. +# FIXME: No support rename after PR 4850 (PageStorage V3) +# >> drop table if exists dm_test_renamed +# >> rename table dm_test to dm_test_renamed +# >> select * from dm_test +# Received exception from server (version {#WORD}): +# Code: 60. DB::Exception: Received from {#WORD} DB::Exception: Table default.dm_test doesn't exist.. ->> select * from dm_test_renamed -┌─a─┬────b─┬─────c─┬────d─┐ -│ 1 │ 0 │ 0 │ \N │ -│ 2 │ 1024 │ 65535 │ 4096 │ -│ 3 │ 2048 │ 65536 │ \N │ -└───┴──────┴───────┴──────┘ +# >> select * from dm_test_renamed +# ┌─a─┬────b─┬─────c─┬────d─┐ +# │ 1 │ 0 │ 0 │ \N │ +# │ 2 │ 1024 │ 65535 │ 4096 │ +# │ 3 │ 2048 │ 65536 │ \N │ +# └───┴──────┴───────┴──────┘ ## Clean up diff --git a/tests/delta-merge-test/raft/txn_mock/partition_table.test b/tests/delta-merge-test/raft/txn_mock/partition_table.test index 2f8e67a61a8..3967d34b48e 100644 --- a/tests/delta-merge-test/raft/txn_mock/partition_table.test +++ b/tests/delta-merge-test/raft/txn_mock/partition_table.test @@ -94,19 +94,20 @@ │ 0 │ └──────────────┘ -=> DBGInvoke __rename_tidb_table(default, test, test1) -=> DBGInvoke __refresh_schemas() -=> select count(*) from default.test1_9997 -┌─count()─┐ -│ 2 │ -└─────────┘ - -=> DBGInvoke __drop_tidb_table(default, test1) -=> DBGInvoke __refresh_schemas() -=> DBGInvoke is_tombstone(default, test1_9999) -┌─is_tombstone(default, test_9999)─┐ -│ true │ -└──────────────────────────────────┘ +# FIXME: No support rename after PR 4850 (PageStorage V3) +# => DBGInvoke __rename_tidb_table(default, test, test1) +# => DBGInvoke __refresh_schemas() +# => select count(*) from default.test1_9997 +# ┌─count()─┐ +# │ 2 │ +# └─────────┘ + +# => DBGInvoke __drop_tidb_table(default, test1) +# => DBGInvoke __refresh_schemas() +# => DBGInvoke is_tombstone(default, test1_9999) +# ┌─is_tombstone(default, test_9999)─┐ +# │ true │ +# └──────────────────────────────────┘ => drop table if exists default.test => drop table if exists default.test1 From 1b791ce7c15193d318e9c950beece286c555fd08 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Wed, 11 May 2022 15:20:35 +0800 Subject: [PATCH 78/79] Fix bug for deseri type error that cause data error (#4854) close pingcap/tiflash#4851 --- dbms/src/Storages/Page/V3/PageDirectory.cpp | 10 +++ dbms/src/Storages/Page/V3/WAL/serialize.cpp | 3 +- .../Page/V3/tests/gtest_page_directory.cpp | 75 ++++++++++++++++++- .../Page/V3/tests/gtest_wal_store.cpp | 55 ++++++++++++++ 4 files changed, 140 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index aef4e9e1922..2a272392b70 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -334,6 +334,11 @@ VersionedPageEntries::resolveToPageId(UInt64 seq, bool check_prev, PageEntryV3 * return {RESOLVE_TO_REF, ori_page_id, create_ver}; } } + else + { + LOG_FMT_WARNING(&Poco::Logger::get("VersionedPageEntries"), "Can't reslove the EditRecordType {}", type); + } + return {RESOLVE_FAIL, buildV3Id(0, 0), PageVersionType(0)}; } @@ -716,6 +721,11 @@ PageIDAndEntryV3 PageDirectory::get(PageIdV3Internal page_id, const PageDirector { if (throw_on_not_exist) { + LOG_FMT_WARNING(log, "Dump state for invalid page id [page_id={}]", page_id); + for (const auto & [dump_id, dump_entry] : mvcc_table_directory) + { + LOG_FMT_WARNING(log, "Dumping state [page_id={}] [entry={}]", dump_id, dump_entry == nullptr ? "" : dump_entry->toDebugString()); + } throw Exception(fmt::format("Invalid page id, entry not exist [page_id={}] [resolve_id={}]", page_id, id_to_resolve), ErrorCodes::PS_ENTRY_NOT_EXISTS); } else diff --git a/dbms/src/Storages/Page/V3/WAL/serialize.cpp b/dbms/src/Storages/Page/V3/WAL/serialize.cpp index 45104b50cea..26854f9a640 100644 --- a/dbms/src/Storages/Page/V3/WAL/serialize.cpp +++ b/dbms/src/Storages/Page/V3/WAL/serialize.cpp @@ -100,7 +100,6 @@ void deserializePutFrom([[maybe_unused]] const EditRecordType record_type, ReadB UInt32 flags = 0; readIntBinary(flags, buf); - // All consider as put PageEntriesEdit::EditRecord rec; rec.type = record_type; readIntBinary(rec.page_id, buf); @@ -152,7 +151,7 @@ void deserializePutExternalFrom([[maybe_unused]] const EditRecordType record_typ assert(record_type == EditRecordType::PUT_EXTERNAL || record_type == EditRecordType::VAR_EXTERNAL); PageEntriesEdit::EditRecord rec; - rec.type = EditRecordType::PUT_EXTERNAL; + rec.type = record_type; readIntBinary(rec.page_id, buf); deserializeVersionFrom(buf, rec.version); readIntBinary(rec.being_ref_count, buf); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index ad00c47c097..9789fd0fe83 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1760,16 +1761,88 @@ try } CATCH + +TEST_F(PageDirectoryGCTest, GCOnRefedExternalEntries2) +try +{ + { + PageEntriesEdit edit; // ingest + edit.putExternal(352); + dir->apply(std::move(edit)); + } + { + PageEntriesEdit edit; + edit.ref(353, 352); + dir->apply(std::move(edit)); + } + { + PageEntriesEdit edit; // ingest done + edit.del(352); + dir->apply(std::move(edit)); + } + { + PageEntriesEdit edit; // split + edit.ref(357, 352); + edit.ref(359, 352); + dir->apply(std::move(edit)); + } + { + PageEntriesEdit edit; // split done + edit.del(353); + dir->apply(std::move(edit)); + } + { + PageEntriesEdit edit; // one of segment delta-merge + edit.del(359); + dir->apply(std::move(edit)); + } + + { + auto snap = dir->createSnapshot(); + auto normal_id = dir->getNormalPageId(357, snap); + EXPECT_EQ(normal_id.low, 352); + } + dir->gcInMemEntries(); + { + auto snap = dir->createSnapshot(); + auto normal_id = dir->getNormalPageId(357, snap); + EXPECT_EQ(normal_id.low, 352); + } + + auto s0 = dir->createSnapshot(); + auto edit = dir->dumpSnapshotToEdit(s0); + edit.size(); + auto restore_from_edit = [](const PageEntriesEdit & edit) { + auto deseri_edit = DB::PS::V3::ser::deserializeFrom(DB::PS::V3::ser::serializeTo(edit)); + auto ctx = DB::tests::TiFlashTestEnv::getContext(); + auto provider = ctx.getFileProvider(); + auto path = getTemporaryPath(); + PSDiskDelegatorPtr delegator = std::make_shared(path); + PageDirectoryFactory factory; + auto d = factory.createFromEdit(getCurrentTestName(), provider, delegator, deseri_edit); + return d; + }; + { + auto restored_dir = restore_from_edit(edit); + auto snap = restored_dir->createSnapshot(); + auto normal_id = restored_dir->getNormalPageId(357, snap); + EXPECT_EQ(normal_id.low, 352); + } +} +CATCH + + TEST_F(PageDirectoryGCTest, DumpAndRestore) try { auto restore_from_edit = [](const PageEntriesEdit & edit) { + auto deseri_edit = DB::PS::V3::ser::deserializeFrom(DB::PS::V3::ser::serializeTo(edit)); auto ctx = DB::tests::TiFlashTestEnv::getContext(); auto provider = ctx.getFileProvider(); auto path = getTemporaryPath(); PSDiskDelegatorPtr delegator = std::make_shared(path); PageDirectoryFactory factory; - auto d = factory.createFromEdit(getCurrentTestName(), provider, delegator, edit); + auto d = factory.createFromEdit(getCurrentTestName(), provider, delegator, deseri_edit); return d; }; diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp index 23ee2e93f07..fadc0fb3bae 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp @@ -133,6 +133,61 @@ TEST(WALSeriTest, Upserts) EXPECT_SAME_ENTRY(iter->entry, entry_p5_2); } +TEST(WALSeriTest, RefExternalAndEntry) +{ + PageVersionType ver1_0(/*seq=*/1, /*epoch*/ 0); + PageVersionType ver2_0(/*seq=*/2, /*epoch*/ 0); + PageVersionType ver3_0(/*seq=*/3, /*epoch*/ 0); + { + PageEntriesEdit edit; + edit.varExternal(1, ver1_0, 2); + edit.varDel(1, ver2_0); + edit.varRef(2, ver3_0, 1); + + auto deseri_edit = DB::PS::V3::ser::deserializeFrom(DB::PS::V3::ser::serializeTo(edit)); + ASSERT_EQ(deseri_edit.size(), 3); + auto iter = deseri_edit.getRecords().begin(); + EXPECT_EQ(iter->type, EditRecordType::VAR_EXTERNAL); + EXPECT_EQ(iter->page_id.low, 1); + EXPECT_EQ(iter->version, ver1_0); + EXPECT_EQ(iter->being_ref_count, 2); + iter++; + EXPECT_EQ(iter->type, EditRecordType::VAR_DELETE); + EXPECT_EQ(iter->page_id.low, 1); + EXPECT_EQ(iter->version, ver2_0); + EXPECT_EQ(iter->being_ref_count, 1); + iter++; + EXPECT_EQ(iter->type, EditRecordType::VAR_REF); + EXPECT_EQ(iter->page_id.low, 2); + EXPECT_EQ(iter->version, ver3_0); + } + + { + PageEntriesEdit edit; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + edit.varEntry(1, ver1_0, entry_p1_2, 2); + edit.varDel(1, ver2_0); + edit.varRef(2, ver3_0, 1); + + auto deseri_edit = DB::PS::V3::ser::deserializeFrom(DB::PS::V3::ser::serializeTo(edit)); + ASSERT_EQ(deseri_edit.size(), 3); + auto iter = deseri_edit.getRecords().begin(); + EXPECT_EQ(iter->type, EditRecordType::VAR_ENTRY); + EXPECT_EQ(iter->page_id.low, 1); + EXPECT_EQ(iter->version, ver1_0); + EXPECT_EQ(iter->being_ref_count, 2); + iter++; + EXPECT_EQ(iter->type, EditRecordType::VAR_DELETE); + EXPECT_EQ(iter->page_id.low, 1); + EXPECT_EQ(iter->version, ver2_0); + EXPECT_EQ(iter->being_ref_count, 1); + iter++; + EXPECT_EQ(iter->type, EditRecordType::VAR_REF); + EXPECT_EQ(iter->page_id.low, 2); + EXPECT_EQ(iter->version, ver3_0); + } +} + TEST(WALLognameTest, parsing) { LoggerPtr log = Logger::get("WALLognameTest"); From a446006bccaf6a9137a72ad67541408cf0b052a6 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Wed, 11 May 2022 16:16:34 +0800 Subject: [PATCH 79/79] Interpreter: Remove Optimize query. (#4589) ref pingcap/tiflash#4860 --- dbms/src/Interpreters/InterpreterFactory.cpp | 7 -- .../Interpreters/InterpreterOptimizeQuery.cpp | 43 ---------- .../Interpreters/InterpreterOptimizeQuery.h | 46 ---------- dbms/src/Parsers/ASTOptimizeQuery.h | 76 ---------------- dbms/src/Parsers/ParserOptimizeQuery.cpp | 86 ------------------- dbms/src/Parsers/ParserOptimizeQuery.h | 33 ------- dbms/src/Parsers/ParserQuery.cpp | 21 ++--- 7 files changed, 8 insertions(+), 304 deletions(-) delete mode 100644 dbms/src/Interpreters/InterpreterOptimizeQuery.cpp delete mode 100644 dbms/src/Interpreters/InterpreterOptimizeQuery.h delete mode 100644 dbms/src/Parsers/ASTOptimizeQuery.h delete mode 100644 dbms/src/Parsers/ParserOptimizeQuery.cpp delete mode 100644 dbms/src/Parsers/ParserOptimizeQuery.h diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index 5231bbd3dd6..631df49227e 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -45,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -126,11 +124,6 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & /// readonly is checked inside InterpreterSetQuery return std::make_unique(query, context); } - else if (typeid_cast(query.get())) - { - throwIfReadOnly(context); - return std::make_unique(query, context); - } else if (typeid_cast(query.get())) { return std::make_unique(query, context); diff --git a/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp b/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp deleted file mode 100644 index 173065512b8..00000000000 --- a/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - - -BlockIO InterpreterOptimizeQuery::execute() -{ - const ASTOptimizeQuery & ast = typeid_cast(*query_ptr); - - if (ast.final && !ast.partition) - throw Exception("FINAL flag for OPTIMIZE query is meaningful only with specified PARTITION", ErrorCodes::BAD_ARGUMENTS); - - StoragePtr table = context.getTable(ast.database, ast.table); - auto table_lock = table->lockStructureForShare(RWLock::NO_QUERY); - table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context); - return {}; -} - -} // namespace DB diff --git a/dbms/src/Interpreters/InterpreterOptimizeQuery.h b/dbms/src/Interpreters/InterpreterOptimizeQuery.h deleted file mode 100644 index bf39aba9c71..00000000000 --- a/dbms/src/Interpreters/InterpreterOptimizeQuery.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - - -namespace DB -{ -class Context; -class IAST; -using ASTPtr = std::shared_ptr; - - -/** Just call method "optimize" for table. - */ -class InterpreterOptimizeQuery : public IInterpreter -{ -public: - InterpreterOptimizeQuery(const ASTPtr & query_ptr_, Context & context_) - : query_ptr(query_ptr_) - , context(context_) - { - } - - BlockIO execute() override; - -private: - ASTPtr query_ptr; - Context & context; -}; - - -} // namespace DB diff --git a/dbms/src/Parsers/ASTOptimizeQuery.h b/dbms/src/Parsers/ASTOptimizeQuery.h deleted file mode 100644 index 17d9e6ab41a..00000000000 --- a/dbms/src/Parsers/ASTOptimizeQuery.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - - -namespace DB -{ - - -/** OPTIMIZE query - */ -class ASTOptimizeQuery : public IAST -{ -public: - String database; - String table; - - /// The partition to optimize can be specified. - ASTPtr partition; - /// A flag can be specified - perform optimization "to the end" instead of one step. - bool final; - /// Do deduplicate (default: false) - bool deduplicate; - - /** Get the text that identifies this element. */ - String getID() const override { return "OptimizeQuery_" + database + "_" + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); }; - - ASTPtr clone() const override - { - auto res = std::make_shared(*this); - res->children.clear(); - - if (partition) - { - res->partition = partition->clone(); - res->children.push_back(res->partition); - } - - return res; - } - -protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "") - << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); - - if (partition) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " PARTITION " << (settings.hilite ? hilite_none : ""); - partition->formatImpl(settings, state, frame); - } - - if (final) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FINAL" << (settings.hilite ? hilite_none : ""); - - if (deduplicate) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : ""); - } -}; - -} diff --git a/dbms/src/Parsers/ParserOptimizeQuery.cpp b/dbms/src/Parsers/ParserOptimizeQuery.cpp deleted file mode 100644 index 1b5f82ff682..00000000000 --- a/dbms/src/Parsers/ParserOptimizeQuery.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include - -#include -#include -#include - -#include - - -namespace DB -{ - - -bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword s_optimize_table("OPTIMIZE TABLE"); - ParserKeyword s_partition("PARTITION"); - ParserKeyword s_final("FINAL"); - ParserKeyword s_deduplicate("DEDUPLICATE"); - ParserToken s_dot(TokenType::Dot); - ParserIdentifier name_p; - ParserPartition partition_p; - - ASTPtr database; - ASTPtr table; - ASTPtr partition; - bool final = false; - bool deduplicate = false; - - if (!s_optimize_table.ignore(pos, expected)) - return false; - - if (!name_p.parse(pos, table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } - - if (s_partition.ignore(pos, expected)) - { - if (!partition_p.parse(pos, partition, expected)) - return false; - } - - if (s_final.ignore(pos, expected)) - final = true; - - if (s_deduplicate.ignore(pos, expected)) - deduplicate = true; - - auto query = std::make_shared(); - node = query; - - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; - query->partition = partition; - query->final = final; - query->deduplicate = deduplicate; - - return true; -} - - -} diff --git a/dbms/src/Parsers/ParserOptimizeQuery.h b/dbms/src/Parsers/ParserOptimizeQuery.h deleted file mode 100644 index c12cfb80c90..00000000000 --- a/dbms/src/Parsers/ParserOptimizeQuery.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - - -namespace DB -{ - -/** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE] - */ -class ParserOptimizeQuery : public IParserBase -{ -protected: - const char * getName() const { return "OPTIMIZE query"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); -}; - -} diff --git a/dbms/src/Parsers/ParserQuery.cpp b/dbms/src/Parsers/ParserQuery.cpp index f6bb7d8ca32..4e20221df82 100644 --- a/dbms/src/Parsers/ParserQuery.cpp +++ b/dbms/src/Parsers/ParserQuery.cpp @@ -12,26 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include +#include #include -#include -#include #include +#include #include +#include +#include +#include +#include #include -#include -#include #include -#include #include #include -#include +#include namespace DB { - - bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserQueryWithOutput query_with_output_p; @@ -40,7 +37,6 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserSetQuery set_p; ParserDeleteQuery delete_p; ParserDBGInvokeQuery dbginvoke_p; - ParserOptimizeQuery optimize_p; ParserSystemQuery system_p; ParserTruncateQuery truncate_p; ParserManageQuery manage_p; @@ -51,7 +47,6 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || set_p.parse(pos, node, expected) || delete_p.parse(pos, node, expected) || dbginvoke_p.parse(pos, node, expected) - || optimize_p.parse(pos, node, expected) || system_p.parse(pos, node, expected) || truncate_p.parse(pos, node, expected) || manage_p.parse(pos, node, expected); @@ -59,4 +54,4 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return res; } -} +} // namespace DB