From 167d39ff16696ec5d62f3a738b2f60c8c1db8563 Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Thu, 9 Jun 2022 17:34:31 +0800 Subject: [PATCH 01/10] Fix some fail cases when enable TASN (#5086) close pingcap/tiflash#5085 --- .../Management/tests/gtest_manual_compact.cpp | 8 +- dbms/src/Storages/DeltaMerge/StoragePool.cpp | 4 +- .../DeltaMerge/tests/MultiSegmentTestUtil.h | 3 + .../tests/gtest_dm_delta_merge_store.cpp | 3 - .../Page/V3/tests/gtest_blob_store.cpp | 3 + .../Page/V3/tests/gtest_page_directory.cpp | 102 +++++++++--------- .../Page/V3/tests/gtest_wal_store.cpp | 48 ++++----- dbms/src/TestUtils/ColumnsToTiPBExpr.cpp | 2 + 8 files changed, 87 insertions(+), 86 deletions(-) diff --git a/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp b/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp index df6c881c306..1e9da93ffe3 100644 --- a/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp +++ b/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -48,7 +47,6 @@ class BasicManualCompactTest BasicManualCompactTest() { - log = &Poco::Logger::get(DB::base::TiFlashStorageTestBasic::getCurrentFullTestName()); pk_type = GetParam(); } @@ -63,7 +61,7 @@ class BasicManualCompactTest setupStorage(); // In tests let's only compact one segment. - db_context->setSetting("manual_compact_more_until_ms", UInt64(0)); + db_context->setSetting("manual_compact_more_until_ms", Field(UInt64(0))); // Split into 4 segments, and prepare some delta data for first 3 segments. helper = std::make_unique(*db_context); @@ -116,8 +114,6 @@ class BasicManualCompactTest std::unique_ptr manager; DM::tests::DMTestEnv::PkType pk_type; - - [[maybe_unused]] Poco::Logger * log; }; @@ -315,7 +311,7 @@ CATCH TEST_P(BasicManualCompactTest, CompactMultiple) try { - db_context->setSetting("manual_compact_more_until_ms", UInt64(60 * 1000)); // Hope it's long enough! + db_context->setSetting("manual_compact_more_until_ms", Field(UInt64(60 * 1000))); // Hope it's long enough! auto request = ::kvrpcpb::CompactRequest(); request.set_physical_table_id(TABLE_ID); diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index 752898f9c75..2791a74e9e3 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -624,8 +624,8 @@ PageId StoragePool::newDataPageIdForDTFile(StableDiskDelegator & delegator, cons auto existed_path = delegator.getDTFilePath(dtfile_id, /*throw_on_not_exist=*/false); fiu_do_on(FailPoints::force_set_dtfile_exist_when_acquire_id, { - static size_t fail_point_called = 0; - if (existed_path.empty() && fail_point_called % 10 == 0) + static std::atomic fail_point_called(0); + if (existed_path.empty() && fail_point_called.load() % 10 == 0) { existed_path = ""; } diff --git a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h index 7c5b0b2416d..787a521ded3 100644 --- a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h +++ b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h @@ -88,6 +88,7 @@ class MultiSegmentTestUtil : private boost::noncopyable // Check there is only one segment ASSERT_EQ(store->segments.size(), 1); const auto & [_key, seg] = *store->segments.begin(); + (void)_key; ASSERT_EQ(seg->getDelta()->getRows(), n_avg_rows_per_segment * 4); ASSERT_EQ(seg->getStable()->getRows(), 0); @@ -108,6 +109,7 @@ class MultiSegmentTestUtil : private boost::noncopyable auto segment_idx = 0; for (auto & [_key, seg] : store->segments) { + (void)_key; LOG_FMT_INFO(log, "Segment #{}: Range = {}", segment_idx, seg->getRowKeyRange().toDebugString()); ASSERT_EQ(seg->getDelta()->getRows(), 0); ASSERT_GT(seg->getStable()->getRows(), 0); // We don't check the exact rows of each segment. @@ -147,6 +149,7 @@ class MultiSegmentTestUtil : private boost::noncopyable auto segment_idx = 0; for (auto & [_key, seg] : store->segments) { + (void)_key; ASSERT_EQ(seg->getDelta()->getRows(), expected_delta_rows[segment_idx]) << "Assert failed for segment #" << segment_idx; ASSERT_EQ(seg->getStable()->getRows(), expected_stable_rows[segment_idx]) << "Assert failed for segment #" << segment_idx; segment_idx++; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index e934f7a2049..d46e1b7aa36 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -3564,7 +3564,6 @@ class DeltaMergeStoreMergeDeltaBySegmentTest public: DeltaMergeStoreMergeDeltaBySegmentTest() { - log = &Poco::Logger::get(DB::base::TiFlashStorageTestBasic::getCurrentFullTestName()); std::tie(ps_ver, pk_type) = GetParam(); } @@ -3607,8 +3606,6 @@ class DeltaMergeStoreMergeDeltaBySegmentTest UInt64 ps_ver; DMTestEnv::PkType pk_type; - - [[maybe_unused]] Poco::Logger * log; }; INSTANTIATE_TEST_CASE_P( diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index 048140ed04f..94bb69045ba 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -82,6 +82,7 @@ try stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 128, + .padded_size = 0, .tag = 0, .offset = 1024, .checksum = 0x4567, @@ -89,6 +90,7 @@ try stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, @@ -96,6 +98,7 @@ try stats.restoreByEntry(PageEntryV3{ .file_id = file_id2, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 6e2b0efa1ea..83e07f75d37 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -75,7 +75,7 @@ try auto snap0 = dir->createSnapshot(); EXPECT_ENTRY_NOT_EXIST(dir, 1, snap0); - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -85,7 +85,7 @@ try auto snap1 = dir->createSnapshot(); EXPECT_ENTRY_EQ(entry1, dir, 1, snap1); - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(2, entry2); @@ -102,7 +102,7 @@ try EXPECT_ENTRIES_EQ(expected_entries, dir, ids, snap2); } - PageEntryV3 entry2_v2{.file_id = 2 + 102, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2_v2{.file_id = 2 + 102, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(2); @@ -123,7 +123,7 @@ try auto snap0 = dir->createSnapshot(); EXPECT_ENTRY_NOT_EXIST(dir, page_id, snap0); - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(page_id, entry1); @@ -133,7 +133,7 @@ try auto snap1 = dir->createSnapshot(); EXPECT_ENTRY_EQ(entry1, dir, page_id, snap1); - PageEntryV3 entry2{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x1234, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x1234, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(page_id, entry2); @@ -151,7 +151,7 @@ try // Put identical page within one `edit` page_id++; - PageEntryV3 entry3{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x12345, .checksum = 0x4567}; + PageEntryV3 entry3{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x12345, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(page_id, entry1); @@ -172,8 +172,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyPutDelRead) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -185,8 +185,8 @@ try EXPECT_ENTRY_EQ(entry1, dir, 1, snap1); EXPECT_ENTRY_EQ(entry2, dir, 2, snap1); - PageEntryV3 entry3{.file_id = 3, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry4{.file_id = 4, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry3{.file_id = 3, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry4{.file_id = 4, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(2); @@ -217,8 +217,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyUpdateOnRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -236,14 +236,14 @@ try EXPECT_ENTRY_EQ(entry2, dir, 3, snap1); // Update on ref page is not allowed - PageEntryV3 entry_updated{.file_id = 999, .size = 16, .tag = 0, .offset = 0x123, .checksum = 0x123}; + PageEntryV3 entry_updated{.file_id = 999, .size = 16, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x123}; { PageEntriesEdit edit; edit.put(3, entry_updated); ASSERT_ANY_THROW(dir->apply(std::move(edit))); } - PageEntryV3 entry_updated2{.file_id = 777, .size = 16, .tag = 0, .offset = 0x123, .checksum = 0x123}; + PageEntryV3 entry_updated2{.file_id = 777, .size = 16, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x123}; { PageEntriesEdit edit; edit.put(2, entry_updated2); @@ -255,8 +255,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyDeleteOnRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -305,8 +305,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyRefOnRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -343,8 +343,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyDuplicatedRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -410,8 +410,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyCollapseDuplicatedRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -447,9 +447,9 @@ CATCH TEST_F(PageDirectoryTest, ApplyRefToNotExistEntry) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry3{.file_id = 3, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry3{.file_id = 3, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -628,12 +628,12 @@ try } CATCH -#define INSERT_BLOBID_ENTRY(BLOBID, VERSION) \ - PageEntryV3 entry_v##VERSION{.file_id = (BLOBID), .size = (VERSION), .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ +#define INSERT_BLOBID_ENTRY(BLOBID, VERSION) \ + PageEntryV3 entry_v##VERSION{.file_id = (BLOBID), .size = (VERSION), .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ entries.createNewEntry(PageVersion(VERSION), entry_v##VERSION); #define INSERT_ENTRY(VERSION) INSERT_BLOBID_ENTRY(1, VERSION) -#define INSERT_GC_ENTRY(VERSION, EPOCH) \ - PageEntryV3 entry_gc_v##VERSION##_##EPOCH{.file_id = 2, .size = 100 * (VERSION) + (EPOCH), .tag = 0, .offset = 0x234, .checksum = 0x5678}; \ +#define INSERT_GC_ENTRY(VERSION, EPOCH) \ + PageEntryV3 entry_gc_v##VERSION##_##EPOCH{.file_id = 2, .size = 100 * (VERSION) + (EPOCH), .padded_size = 0, .tag = 0, .offset = 0x234, .checksum = 0x5678}; \ entries.createNewEntry(PageVersion((VERSION), (EPOCH)), entry_gc_v##VERSION##_##EPOCH); class VersionedEntriesTest : public ::testing::Test @@ -1271,12 +1271,12 @@ class PageDirectoryGCTest : public PageDirectoryTest { }; -#define INSERT_ENTRY_TO(PAGE_ID, VERSION, BLOB_FILE_ID) \ - PageEntryV3 entry_v##VERSION{.file_id = (BLOB_FILE_ID), .size = (VERSION), .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ - { \ - PageEntriesEdit edit; \ - edit.put((PAGE_ID), entry_v##VERSION); \ - dir->apply(std::move(edit)); \ +#define INSERT_ENTRY_TO(PAGE_ID, VERSION, BLOB_FILE_ID) \ + PageEntryV3 entry_v##VERSION{.file_id = (BLOB_FILE_ID), .size = (VERSION), .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ + { \ + PageEntriesEdit edit; \ + edit.put((PAGE_ID), entry_v##VERSION); \ + dir->apply(std::move(edit)); \ } // Insert an entry into mvcc directory #define INSERT_ENTRY(PAGE_ID, VERSION) INSERT_ENTRY_TO(PAGE_ID, VERSION, 1) @@ -1566,7 +1566,7 @@ try INSERT_ENTRY_ACQ_SNAP(page_id, 5); INSERT_ENTRY(another_page_id, 6); INSERT_ENTRY(another_page_id, 7); - PageEntryV3 entry_v8{.file_id = 1, .size = 8, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_v8{.file_id = 1, .size = 8, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(page_id); @@ -1756,7 +1756,7 @@ TEST_F(PageDirectoryGCTest, GCOnRefedEntries) try { // 10->entry1, 11->10=>11->entry1; del 10->entry1 - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(10, entry1); @@ -1793,7 +1793,7 @@ TEST_F(PageDirectoryGCTest, GCOnRefedEntries2) try { // 10->entry1, 11->10=>11->entry1; del 10->entry1 - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(10, entry1); @@ -1836,7 +1836,7 @@ TEST_F(PageDirectoryGCTest, UpsertOnRefedEntries) try { // 10->entry1, 11->10, 12->10 - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(10, entry1); @@ -1860,7 +1860,7 @@ try } // upsert 10->entry2 - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; auto full_gc_entries = dir->getEntriesByBlobIds({1}); @@ -2024,10 +2024,10 @@ try return d; }; - PageEntryV3 entry_1_v1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_1_v2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_2_v1{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_2_v2{.file_id = 2, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_1_v1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_1_v2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_2_v1{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_2_v2{.file_id = 2, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry_1_v1); @@ -2055,8 +2055,8 @@ try // 10->ext, 11->10, del 10->ext // 50->entry, 51->50, 52->51=>50, del 50 - PageEntryV3 entry_50{.file_id = 1, .size = 50, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_60{.file_id = 1, .size = 90, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50{.file_id = 1, .size = 50, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_60{.file_id = 1, .size = 90, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(2); @@ -2218,9 +2218,9 @@ try Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); - PageEntryV3 entry_1_v1{.file_id = file_id1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_5_v1{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x100, .checksum = 0x4567}; - PageEntryV3 entry_5_v2{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x400, .checksum = 0x4567}; + PageEntryV3 entry_1_v1{.file_id = file_id1, .size = 7890, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_5_v1{.file_id = file_id2, .size = 255, .padded_size = 0, .tag = 0, .offset = 0x100, .checksum = 0x4567}; + PageEntryV3 entry_5_v2{.file_id = file_id2, .size = 255, .padded_size = 0, .tag = 0, .offset = 0x400, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry_1_v1); @@ -2275,8 +2275,8 @@ CATCH TEST_F(PageDirectoryGCTest, CleanAfterDecreaseRef) try { - PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_50_2{.file_id = 2, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50_2{.file_id = 2, .size = 7890, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; auto restore_from_edit = [](const PageEntriesEdit & edit) { auto ctx = ::DB::tests::TiFlashTestEnv::getContext(); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp index 6d47adabbc5..b4e6c2d9204 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp @@ -34,8 +34,8 @@ namespace DB::PS::V3::tests { TEST(WALSeriTest, AllPuts) { - PageEntryV3 entry_p1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20(/*seq=*/20); PageEntriesEdit edit; edit.put(1, entry_p1); @@ -56,8 +56,8 @@ TEST(WALSeriTest, AllPuts) TEST(WALSeriTest, PutsAndRefsAndDels) try { - PageEntryV3 entry_p3{.file_id = 1, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5{.file_id = 1, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3{.file_id = 1, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5{.file_id = 1, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver21(/*seq=*/21); PageEntriesEdit edit; edit.put(3, entry_p3); @@ -104,9 +104,9 @@ CATCH TEST(WALSeriTest, Upserts) { - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20_1(/*seq=*/20, /*epoch*/ 1); PageVersion ver21_1(/*seq=*/21, /*epoch*/ 1); PageEntriesEdit edit; @@ -164,7 +164,7 @@ TEST(WALSeriTest, RefExternalAndEntry) { PageEntriesEdit edit; - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; edit.varEntry(1, ver1_0, entry_p1_2, 2); edit.varDel(1, ver2_0); edit.varRef(2, ver3_0, 1); @@ -405,8 +405,8 @@ try ASSERT_NE(wal, nullptr); // Stage 2. Apply with only puts - PageEntryV3 entry_p1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20(/*seq=*/20); { PageEntriesEdit edit; @@ -435,8 +435,8 @@ try } // Stage 3. Apply with puts and refs - PageEntryV3 entry_p3{.file_id = 1, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5{.file_id = 1, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3{.file_id = 1, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5{.file_id = 1, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver21(/*seq=*/21); { PageEntriesEdit edit; @@ -468,9 +468,9 @@ try // Stage 4. Apply with delete and upsert - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20_1(/*seq=*/20, /*epoch*/ 1); PageVersion ver21_1(/*seq=*/21, /*epoch*/ 1); { @@ -514,8 +514,8 @@ try std::vector size_each_edit; // Stage 1. Apply with only puts - PageEntryV3 entry_p1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20(/*seq=*/20); { PageEntriesEdit edit; @@ -526,8 +526,8 @@ try } // Stage 2. Apply with puts and refs - PageEntryV3 entry_p3{.file_id = 1, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5{.file_id = 1, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3{.file_id = 1, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5{.file_id = 1, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver21(/*seq=*/21); { PageEntriesEdit edit; @@ -540,9 +540,9 @@ try } // Stage 3. Apply with delete and upsert - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20_1(/*seq=*/20, /*epoch*/ 1); PageVersion ver21_1(/*seq=*/21, /*epoch*/ 1); { @@ -615,7 +615,7 @@ try PageVersion ver(/*seq*/ 32); for (size_t i = 0; i < num_edits_test; ++i) { - PageEntryV3 entry{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageEntriesEdit edit; const size_t num_pages_put = d_20(rd); for (size_t p = 0; p < num_pages_put; ++p) @@ -660,7 +660,7 @@ try .persisted_log_files = persisted_log_files}; PageEntriesEdit snap_edit; - PageEntryV3 entry{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; std::uniform_int_distribution<> d_10000(0, 10000); // just fill in some random entry for (size_t i = 0; i < 70; ++i) diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp index dcf727614b1..ea19ff08dd3 100644 --- a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp @@ -36,6 +36,7 @@ void columnToTiPBExpr(tipb::Expr * expr, const ColumnWithTypeAndName column, siz if (column.column->isColumnNullable()) { auto [col, null_map] = removeNullable(column.column.get()); + (void)null_map; is_const = col->isColumnConst(); } } @@ -97,6 +98,7 @@ void columnsToTiPBExprForTiDBCast( if (type_column.column->isColumnNullable()) { auto [col, null_map] = removeNullable(type_column.column.get()); + (void)null_map; is_const = col->isColumnConst(); } } From ba725cc09fe85074b663e8fbafa117d7c3b7af87 Mon Sep 17 00:00:00 2001 From: JaySon Date: Thu, 9 Jun 2022 20:18:30 +0800 Subject: [PATCH 02/10] PageStorage: Fix entry.tag after full gc && add more debug message (#5094) ref pingcap/tiflash#5076, close pingcap/tiflash#5093 --- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 5 +- dbms/src/Storages/Page/PageUtil.h | 2 +- .../Page/V2/tests/gtest_page_util.cpp | 3 ++ dbms/src/Storages/Page/V3/BlobStore.cpp | 42 +++++++++------- dbms/src/Storages/Page/V3/BlobStore.h | 2 +- .../Page/V3/tests/gtest_blob_store.cpp | 9 ++-- .../Page/V3/tests/gtest_page_storage.cpp | 49 +++++++++++++++++++ 7 files changed, 87 insertions(+), 25 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index a74404f3dbb..195ed5c53c2 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1137,9 +1138,11 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context, } fiu_do_on(FailPoints::force_slow_page_storage_snapshot_release, { - std::thread thread_hold_snapshots([tasks]() { + std::thread thread_hold_snapshots([this, tasks]() { + LOG_FMT_WARNING(log, "failpoint force_slow_page_storage_snapshot_release begin"); std::this_thread::sleep_for(std::chrono::seconds(5 * 60)); (void)tasks; + LOG_FMT_WARNING(log, "failpoint force_slow_page_storage_snapshot_release end"); }); thread_hold_snapshots.detach(); }); diff --git a/dbms/src/Storages/Page/PageUtil.h b/dbms/src/Storages/Page/PageUtil.h index cebcbdb27f2..b0d8f0f88c8 100644 --- a/dbms/src/Storages/Page/PageUtil.h +++ b/dbms/src/Storages/Page/PageUtil.h @@ -281,7 +281,7 @@ void readFile(T & file, } if (unlikely(bytes_read != expected_bytes)) - throw DB::TiFlashException(fmt::format("No enough data in file {}, read bytes: {} , expected bytes: {}", file->getFileName(), bytes_read, expected_bytes), + throw DB::TiFlashException(fmt::format("No enough data in file {}, read bytes: {}, expected bytes: {}, offset: {}", file->getFileName(), bytes_read, expected_bytes, offset), Errors::PageStorage::FileSizeNotMatch); } diff --git a/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp b/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp index e72c7a87541..c4dd2178eb9 100644 --- a/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp +++ b/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB { @@ -30,6 +31,7 @@ namespace tests static const std::string FileName = "page_util_test"; TEST(PageUtilsTest, ReadWriteFile) +try { ::remove(FileName.c_str()); @@ -52,6 +54,7 @@ TEST(PageUtilsTest, ReadWriteFile) ::remove(FileName.c_str()); } +CATCH TEST(PageUtilsTest, FileNotExists) { diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index d5f71841b91..37a4fd429f4 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -555,7 +556,7 @@ void BlobStore::read(PageIDAndEntriesV3 & entries, const PageHandler & handler, for (const auto & [page_id_v3, entry] : entries) { - auto blob_file = read(entry.file_id, entry.offset, data_buf, entry.size, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset, data_buf, entry.size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { @@ -635,7 +636,7 @@ PageMap BlobStore::read(FieldReadInfos & to_read, const ReadLimiterPtr & read_li // TODO: Continuously fields can read by one system call. const auto [beg_offset, end_offset] = entry.getFieldOffsets(field_index); const auto size_to_read = end_offset - beg_offset; - auto blob_file = read(entry.file_id, entry.offset + beg_offset, write_offset, size_to_read, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset + beg_offset, write_offset, size_to_read, read_limiter); fields_offset_in_page.emplace(field_index, read_size_this_entry); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) @@ -732,7 +733,7 @@ PageMap BlobStore::read(PageIDAndEntriesV3 & entries, const ReadLimiterPtr & rea PageMap page_map; for (const auto & [page_id_v3, entry] : entries) { - auto blob_file = read(entry.file_id, entry.offset, pos, entry.size, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset, pos, entry.size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { @@ -797,7 +798,7 @@ Page BlobStore::read(const PageIDAndEntryV3 & id_entry, const ReadLimiterPtr & r free(p, buf_size); }); - auto blob_file = read(entry.file_id, entry.offset, data_buf, buf_size, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset, data_buf, buf_size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { ChecksumClass digest; @@ -824,11 +825,20 @@ Page BlobStore::read(const PageIDAndEntryV3 & id_entry, const ReadLimiterPtr & r return page; } -BlobFilePtr BlobStore::read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter, bool background) +BlobFilePtr BlobStore::read(const PageIdV3Internal & page_id_v3, BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter, bool background) { assert(buffers != nullptr); - auto blob_file = getBlobFile(blob_id); - blob_file->read(buffers, offset, size, read_limiter, background); + BlobFilePtr blob_file = getBlobFile(blob_id); + try + { + blob_file->read(buffers, offset, size, read_limiter, background); + } + catch (DB::Exception & e) + { + // add debug message + e.addMessage(fmt::format("(error while reading page data [page_id={}] [blob_id={}] [offset={}] [size={}] [background={}])", page_id_v3, blob_id, offset, size, background)); + e.rethrow(); + } return blob_file; } @@ -1117,21 +1127,15 @@ PageEntriesEdit BlobStore::gc(std::map & std::tie(blobfile_id, file_offset_beg) = getPosFromStats(next_alloc_size); } - PageEntryV3 new_entry; - - read(file_id, entry.offset, data_pos, entry.size, read_limiter, /*background*/ true); - - // No need do crc again, crc won't be changed. - new_entry.checksum = entry.checksum; - - // Need copy the field_offsets - new_entry.field_offsets = entry.field_offsets; - - // Entry size won't be changed. - new_entry.size = entry.size; + // Read the data into buffer by old entry + read(page_id, file_id, entry.offset, data_pos, entry.size, read_limiter, /*background*/ true); + // Most vars of the entry is not changed, but the file id and offset + // need to be updated. + PageEntryV3 new_entry = entry; new_entry.file_id = blobfile_id; new_entry.offset = file_offset_beg + offset_in_data; + new_entry.padded_size = 0; // reset padded size to be zero offset_in_data += new_entry.size; data_pos += new_entry.size; diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index 24bf4652123..6b139b98557 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -296,7 +296,7 @@ class BlobStore : private Allocator PageEntriesEdit handleLargeWrite(DB::WriteBatch & wb, const WriteLimiterPtr & write_limiter = nullptr); - BlobFilePtr read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter = nullptr, bool background = false); + BlobFilePtr read(const PageIdV3Internal & page_id_v3, BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter = nullptr, bool background = false); /** * Ask BlobStats to get a span from BlobStat. diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index 94bb69045ba..fdd08c7cb8e 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -534,7 +534,8 @@ TEST_F(BlobStoreTest, testWriteRead) ASSERT_EQ(record.entry.file_id, 1); // Read directly from the file - blob_store.read(record.entry.file_id, + blob_store.read(buildV3Id(TEST_NAMESPACE_ID, page_id), + record.entry.file_id, record.entry.offset, c_buff_read + index * buff_size, record.entry.size, @@ -634,7 +635,8 @@ TEST_F(BlobStoreTest, testWriteReadWithIOLimiter) { for (const auto & record : edits[i].getRecords()) { - blob_store.read(record.entry.file_id, + blob_store.read(buildV3Id(TEST_NAMESPACE_ID, page_id), + record.entry.file_id, record.entry.offset, c_buff_read + i * buff_size, record.entry.size, @@ -812,7 +814,8 @@ TEST_F(BlobStoreTest, testFeildOffsetWriteRead) ASSERT_EQ(check_field_sizes, offsets); // Read - blob_store.read(record.entry.file_id, + blob_store.read(buildV3Id(TEST_NAMESPACE_ID, page_id), + record.entry.file_id, record.entry.offset, c_buff_read + index * buff_size, record.entry.size, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index f7ba33c46c8..f9ef25cb973 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1441,6 +1441,55 @@ try } CATCH +TEST_F(PageStorageTest, EntryTagAfterFullGC) +try +{ + { + PageStorage::Config config; + config.blob_heavy_gc_valid_rate = 1.0; /// always run full gc + page_storage = reopenWithConfig(config); + } + + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + PageId page_id = 120; + UInt64 tag = 12345; + { + WriteBatch batch; + batch.putPage(page_id, tag, std::make_shared(c_buff, buf_sz), buf_sz, {}); + page_storage->write(std::move(batch)); + } + + { + auto entry = page_storage->getEntry(page_id); + ASSERT_EQ(entry.tag, tag); + auto page = page_storage->read(page_id); + for (size_t i = 0; i < buf_sz; ++i) + { + EXPECT_EQ(*(page.data.begin() + i), static_cast(i % 0xff)); + } + } + + auto done_full_gc = page_storage->gc(); + EXPECT_TRUE(done_full_gc); + + { + auto entry = page_storage->getEntry(page_id); + ASSERT_EQ(entry.tag, tag); + auto page = page_storage->read(page_id); + for (size_t i = 0; i < buf_sz; ++i) + { + EXPECT_EQ(*(page.data.begin() + i), static_cast(i % 0xff)); + } + } +} +CATCH } // namespace PS::V3::tests } // namespace DB From a9b322ab2a467827d0ce0574336fb214468362d8 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Fri, 10 Jun 2022 10:54:31 +0800 Subject: [PATCH 03/10] feat: add page_stress_testing as a subcommand of tiflash (#5038) close pingcap/tiflash#5037 --- CMakeLists.txt | 3 - dbms/CMakeLists.txt | 13 ++-- dbms/src/Server/CMakeLists.txt | 4 ++ dbms/src/Server/config_tools.h.in | 1 + dbms/src/Server/main.cpp | 8 ++- dbms/src/Storages/CMakeLists.txt | 9 ++- .../DeltaMerge/tools/workload/Main.cpp | 22 ------ .../{tools => }/workload/CMakeLists.txt | 3 - .../{tools => }/workload/DTWorkload.cpp | 20 +++--- .../{tools => }/workload/DTWorkload.h | 2 +- .../{tools => }/workload/DataGenerator.cpp | 16 +++-- .../{tools => }/workload/DataGenerator.h | 2 +- .../DeltaMerge/{tools => }/workload/Handle.h | 18 ++--- .../{tools => }/workload/KeyGenerator.cpp | 10 +-- .../{tools => }/workload/KeyGenerator.h | 4 +- .../{tools => }/workload/Limiter.cpp | 10 +-- .../DeltaMerge/{tools => }/workload/Limiter.h | 2 +- .../{tools => }/workload/MainEntry.cpp | 8 +-- .../{tools => }/workload/Options.cpp | 4 +- .../DeltaMerge/{tools => }/workload/Options.h | 0 .../workload/ReadColumnsGenerator.h | 4 +- .../{tools => }/workload/TableGenerator.cpp | 8 +-- .../{tools => }/workload/TableGenerator.h | 2 +- .../{tools => }/workload/TimestampGenerator.h | 0 .../DeltaMerge/{tools => }/workload/Utils.cpp | 8 +-- .../DeltaMerge/{tools => }/workload/Utils.h | 0 dbms/src/Storages/Page/CMakeLists.txt | 10 --- .../Page/stress/stress_page_storage.cpp | 46 ------------ .../tools => Page/workload}/CMakeLists.txt | 7 +- .../workload/HeavyMemoryCostInGC.cpp | 5 +- .../Page/{stress => }/workload/HeavyRead.cpp | 7 +- .../workload/HeavySkewWriteRead.cpp | 7 +- .../Page/{stress => }/workload/HeavyWrite.cpp | 7 +- .../workload/HighValidBigFileGC.cpp | 5 +- .../workload/HoldSnapshotsLongTime.cpp | 7 +- dbms/src/Storages/Page/workload/MainEntry.cpp | 70 +++++++++++++++++++ .../Page/{stress => }/workload/Normal.cpp | 5 +- .../{stress => workload}/PSBackground.cpp | 6 +- .../Page/{stress => workload}/PSBackground.h | 5 +- .../Page/{stress => workload}/PSRunnable.cpp | 13 ++-- .../Page/{stress => workload}/PSRunnable.h | 37 +++++----- .../Page/{stress => workload}/PSStressEnv.cpp | 7 +- .../Page/{stress => workload}/PSStressEnv.h | 3 + .../Page/{stress => workload}/PSWorkload.cpp | 5 +- .../Page/{stress => workload}/PSWorkload.h | 30 ++++---- .../workload/PageStorageInMemoryCapacity.cpp | 12 ++-- .../workload/ThousandsOfOffset.cpp | 7 +- 47 files changed, 270 insertions(+), 212 deletions(-) delete mode 100644 dbms/src/Storages/DeltaMerge/tools/workload/Main.cpp rename dbms/src/Storages/DeltaMerge/{tools => }/workload/CMakeLists.txt (86%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/DTWorkload.cpp (94%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/DTWorkload.h (97%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/DataGenerator.cpp (95%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/DataGenerator.h (96%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/Handle.h (90%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/KeyGenerator.cpp (92%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/KeyGenerator.h (92%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/Limiter.cpp (77%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/Limiter.h (96%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/MainEntry.cpp (97%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/Options.cpp (98%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/Options.h (100%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/ReadColumnsGenerator.h (93%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/TableGenerator.cpp (97%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/TableGenerator.h (96%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/TimestampGenerator.h (100%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/Utils.cpp (94%) rename dbms/src/Storages/DeltaMerge/{tools => }/workload/Utils.h (100%) delete mode 100644 dbms/src/Storages/Page/stress/stress_page_storage.cpp rename dbms/src/Storages/{DeltaMerge/tools => Page/workload}/CMakeLists.txt (53%) rename dbms/src/Storages/Page/{stress => }/workload/HeavyMemoryCostInGC.cpp (96%) rename dbms/src/Storages/Page/{stress => }/workload/HeavyRead.cpp (94%) rename dbms/src/Storages/Page/{stress => }/workload/HeavySkewWriteRead.cpp (95%) rename dbms/src/Storages/Page/{stress => }/workload/HeavyWrite.cpp (94%) rename dbms/src/Storages/Page/{stress => }/workload/HighValidBigFileGC.cpp (97%) rename dbms/src/Storages/Page/{stress => }/workload/HoldSnapshotsLongTime.cpp (95%) create mode 100644 dbms/src/Storages/Page/workload/MainEntry.cpp rename dbms/src/Storages/Page/{stress => }/workload/Normal.cpp (95%) rename dbms/src/Storages/Page/{stress => workload}/PSBackground.cpp (96%) rename dbms/src/Storages/Page/{stress => workload}/PSBackground.h (97%) rename dbms/src/Storages/Page/{stress => workload}/PSRunnable.cpp (97%) rename dbms/src/Storages/Page/{stress => workload}/PSRunnable.h (90%) rename dbms/src/Storages/Page/{stress => workload}/PSStressEnv.cpp (97%) rename dbms/src/Storages/Page/{stress => workload}/PSStressEnv.h (98%) rename dbms/src/Storages/Page/{stress => workload}/PSWorkload.cpp (98%) rename dbms/src/Storages/Page/{stress => workload}/PSWorkload.h (92%) rename dbms/src/Storages/Page/{stress => }/workload/PageStorageInMemoryCapacity.cpp (96%) rename dbms/src/Storages/Page/{stress => }/workload/ThousandsOfOffset.cpp (97%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e14c205f18..f2ec9f3316b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -432,9 +432,6 @@ else (ENABLE_FAILPOINTS) message (STATUS "Failpoints are disabled") endif (ENABLE_FAILPOINTS) -# Enable PageStorage V3 test. -option (ENABLE_V3_PAGESTORAGE "Enables V3 PageStorage" ON) - # Flags for test coverage option (TEST_COVERAGE "Enables flags for test coverage" OFF) option (TEST_COVERAGE_XML "Output XML report for test coverage" OFF) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index cce11bd6997..e1e52fab73b 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -91,12 +91,10 @@ add_headers_and_sources(dbms src/Storages/Page/V2/VersionSet) add_headers_and_sources(dbms src/Storages/Page/V2/gc) add_headers_and_sources(dbms src/WindowFunctions) add_headers_and_sources(dbms src/TiDB/Schema) -if (ENABLE_V3_PAGESTORAGE) - add_headers_and_sources(dbms src/Storages/Page/V3) - add_headers_and_sources(dbms src/Storages/Page/V3/LogFile) - add_headers_and_sources(dbms src/Storages/Page/V3/WAL) - add_headers_and_sources(dbms src/Storages/Page/V3/spacemap) -endif() +add_headers_and_sources(dbms src/Storages/Page/V3) +add_headers_and_sources(dbms src/Storages/Page/V3/LogFile) +add_headers_and_sources(dbms src/Storages/Page/V3/WAL) +add_headers_and_sources(dbms src/Storages/Page/V3/spacemap) add_headers_and_sources(dbms src/Storages/Page/) add_headers_and_sources(dbms src/TiDB) add_headers_and_sources(dbms src/Client) @@ -323,6 +321,9 @@ if (ENABLE_TESTS) if (ENABLE_TIFLASH_DTWORKLOAD) target_link_libraries(bench_dbms dt-workload-lib) endif () + if (ENABLE_TIFLASH_PAGEWORKLOAD) + target_link_libraries(bench_dbms page-workload-lib) + endif () add_check(bench_dbms) endif () diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index 63cf6d0e1f9..104b4f34e4a 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -22,6 +22,7 @@ option(ENABLE_CLICKHOUSE_SERVER "Enable server" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_CLICKHOUSE_CLIENT "Enable client" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_TIFLASH_DTTOOL "Enable dttool: tools to manage dmfile" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_TIFLASH_DTWORKLOAD "Enable dtworkload: tools to test and stress DeltaTree" ${ENABLE_CLICKHOUSE_ALL}) +option(ENABLE_TIFLASH_PAGEWORKLOAD "Enable pageworkload: tools to test and stress PageStorage" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_TIFLASH_PAGECTL "Enable pagectl: tools to debug page storage" ${ENABLE_CLICKHOUSE_ALL}) configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h) @@ -136,6 +137,9 @@ endif () if (ENABLE_TIFLASH_DTWORKLOAD) target_link_libraries(tiflash dt-workload-lib) endif () +if (ENABLE_TIFLASH_PAGEWORKLOAD) + target_link_libraries(tiflash page-workload-lib) +endif() if (ENABLE_TIFLASH_PAGECTL) target_link_libraries(tiflash page-ctl-lib) endif () diff --git a/dbms/src/Server/config_tools.h.in b/dbms/src/Server/config_tools.h.in index 61aa3f41591..03a478a6473 100644 --- a/dbms/src/Server/config_tools.h.in +++ b/dbms/src/Server/config_tools.h.in @@ -6,4 +6,5 @@ #cmakedefine01 ENABLE_CLICKHOUSE_CLIENT #cmakedefine01 ENABLE_TIFLASH_DTTOOL #cmakedefine01 ENABLE_TIFLASH_DTWORKLOAD +#cmakedefine01 ENABLE_TIFLASH_PAGEWORKLOAD #cmakedefine01 ENABLE_TIFLASH_PAGECTL diff --git a/dbms/src/Server/main.cpp b/dbms/src/Server/main.cpp index 11cccf84729..dbcaa4f38fc 100644 --- a/dbms/src/Server/main.cpp +++ b/dbms/src/Server/main.cpp @@ -36,7 +36,10 @@ #include #endif #if ENABLE_TIFLASH_DTWORKLOAD -#include +#include +#endif +#if ENABLE_TIFLASH_PAGEWORKLOAD +#include #endif #if ENABLE_TIFLASH_PAGECTL #include @@ -107,6 +110,9 @@ std::pair clickhouse_applications[] = { #if ENABLE_TIFLASH_DTWORKLOAD {"dtworkload", DB::DM::tests::DTWorkload::mainEntry}, #endif +#if ENABLE_TIFLASH_PAGEWORKLOAD + {"pageworkload", DB::PS::tests::StressWorkload::mainEntry}, +#endif #if ENABLE_TIFLASH_PAGECTL {"pagectl", DB::PageStorageCtl::mainEntry}, #endif diff --git a/dbms/src/Storages/CMakeLists.txt b/dbms/src/Storages/CMakeLists.txt index 90cc7a01d5b..68a2e6c9a74 100644 --- a/dbms/src/Storages/CMakeLists.txt +++ b/dbms/src/Storages/CMakeLists.txt @@ -15,16 +15,15 @@ add_subdirectory (System) add_subdirectory (Page) add_subdirectory (DeltaMerge/File/dtpb) -add_subdirectory (DeltaMerge/tools) +add_subdirectory (DeltaMerge/workload) +add_subdirectory (Page/workload) if (ENABLE_TESTS) add_subdirectory (tests EXCLUDE_FROM_ALL) add_subdirectory (Transaction/tests EXCLUDE_FROM_ALL) add_subdirectory (Page/V2/tests EXCLUDE_FROM_ALL) - if (ENABLE_V3_PAGESTORAGE) - add_subdirectory (Page/V3 EXCLUDE_FROM_ALL) - add_subdirectory (Page/V3/tests EXCLUDE_FROM_ALL) - endif () + add_subdirectory (Page/V3 EXCLUDE_FROM_ALL) + add_subdirectory (Page/V3/tests EXCLUDE_FROM_ALL) add_subdirectory (DeltaMerge/tests EXCLUDE_FROM_ALL) endif () diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Main.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/Main.cpp deleted file mode 100644 index 092c8a89a42..00000000000 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Main.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -using namespace DB::DM::tests; - -int main(int argc, char ** argv) -{ - return DTWorkload::mainEntry(argc, argv); -} diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/CMakeLists.txt b/dbms/src/Storages/DeltaMerge/workload/CMakeLists.txt similarity index 86% rename from dbms/src/Storages/DeltaMerge/tools/workload/CMakeLists.txt rename to dbms/src/Storages/DeltaMerge/workload/CMakeLists.txt index 7227f1cf563..7a83cbec57c 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/CMakeLists.txt +++ b/dbms/src/Storages/DeltaMerge/workload/CMakeLists.txt @@ -18,6 +18,3 @@ set(dt-workload-src MainEntry.cpp DTWorkload.cpp KeyGenerator.cpp TableGenerator add_library(dt-workload-lib ${dt-workload-src}) target_link_libraries(dt-workload-lib dbms clickhouse_functions clickhouse-server-lib) - -add_executable(dt-workload Main.cpp ${dt-workload-src}) -target_link_libraries(dt-workload dbms gtest clickhouse_functions clickhouse-server-lib) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp similarity index 94% rename from dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp rename to dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp index a6113f91d91..a53a1b9ebbd 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp @@ -19,16 +19,16 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.h b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.h similarity index 97% rename from dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.h rename to dbms/src/Storages/DeltaMerge/workload/DTWorkload.h index 26cc5b6e07c..1ee5ba6b871 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.h +++ b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.h @@ -73,7 +73,7 @@ class ThreadStat class Statistics { public: - Statistics(int write_thread_count = 0, int read_thread_count = 0) + explicit Statistics(int write_thread_count = 0, int read_thread_count = 0) : init_ms(0) , write_stats(write_thread_count) , read_stats(read_thread_count) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.cpp b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp similarity index 95% rename from dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.cpp rename to dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp index be6ff1dcbbe..479977d46d1 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp @@ -13,11 +13,11 @@ // limitations under the License. #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -33,7 +33,7 @@ class RandomDataGenerator : public DataGenerator , rand_gen(std::random_device()()) {} - virtual std::tuple get(uint64_t key) override + std::tuple get(uint64_t key) override { Block block; // Generate 'rowkeys'. @@ -227,7 +227,9 @@ class RandomDataGenerator : public DataGenerator struct tm randomLocalTime() { time_t t = randomUTCTimestamp(); - struct tm res; + struct tm res + { + }; if (localtime_r(&t, &res) == nullptr) { throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno))); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.h b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.h similarity index 96% rename from dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/DataGenerator.h index e32de4591e6..cd29f1a3a80 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.h @@ -27,7 +27,7 @@ class DataGenerator public: static std::unique_ptr create(const WorkloadOptions & opts, const TableInfo & table_info, TimestampGenerator & ts_gen); virtual std::tuple get(uint64_t key) = 0; - virtual ~DataGenerator() {} + virtual ~DataGenerator() = default; }; std::string blockToString(const Block & block); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Handle.h b/dbms/src/Storages/DeltaMerge/workload/Handle.h similarity index 90% rename from dbms/src/Storages/DeltaMerge/tools/workload/Handle.h rename to dbms/src/Storages/DeltaMerge/workload/Handle.h index eb117a4fddd..c4949c15a1f 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Handle.h +++ b/dbms/src/Storages/DeltaMerge/workload/Handle.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include @@ -40,7 +40,7 @@ class HandleLock static constexpr uint64_t default_lock_count = 4096; static std::unique_ptr create(const TableInfo & table_info); - HandleLock(uint64_t lock_count = default_lock_count) + explicit HandleLock(uint64_t lock_count = default_lock_count) : rmtxs(lock_count) {} @@ -51,14 +51,14 @@ class HandleLock std::vector> getLocks(const std::vector & handles) { - std::vector indexes; + std::vector indexes(handles.size()); for (const auto & h : handles) { indexes.push_back(index(h)); } // Sort mutex indexes to avoid dead lock. sort(indexes.begin(), indexes.end()); - std::vector> locks; + std::vector> locks(indexes.size()); for (auto i : indexes) { locks.push_back(getLockByIndex(i)); @@ -105,7 +105,7 @@ class HandleTable std::lock_guard lock(mtx); handle_to_ts[handle] = ts; Record r{handle, ts}; - if (wal != nullptr && wal->write((char *)&r, sizeof(r)) != sizeof(r)) + if (wal != nullptr && wal->write(reinterpret_cast(&r), sizeof(r)) != sizeof(r)) { throw std::runtime_error(fmt::format("write ret {}", strerror(errno))); } @@ -134,8 +134,8 @@ class HandleTable try { PosixRandomAccessFile f(fname, -1); - Record r; - while (f.read((char *)&r, sizeof(r)) == sizeof(r)) + Record r{}; + while (f.read(reinterpret_cast(&r), sizeof(r)) == sizeof(r)) { handle_to_ts[r.handle] = r.ts; } @@ -156,7 +156,7 @@ class HandleTable for (const auto & pa : handle_to_ts) { Record r{pa.first, pa.second}; - if (f.write((char *)&r, sizeof(r)) != sizeof(r)) + if (f.write(reinterpret_cast(&r), sizeof(r)) != sizeof(r)) { throw std::runtime_error(fmt::format("write ret {}", strerror(errno))); } @@ -191,7 +191,7 @@ class SharedHandleTable public: static constexpr uint64_t default_shared_count = 4096; - SharedHandleTable(uint64_t max_key_count, const std::string & waldir = "", uint64_t shared_cnt = default_shared_count) + explicit SharedHandleTable(uint64_t max_key_count, const std::string & waldir = "", uint64_t shared_cnt = default_shared_count) : tables(shared_cnt) { uint64_t max_key_count_per_shared = max_key_count / default_shared_count + 1; diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.cpp b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.cpp similarity index 92% rename from dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.cpp rename to dbms/src/Storages/DeltaMerge/workload/KeyGenerator.cpp index bb2f2253279..f899ec71b4b 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include +#include +#include #include #include @@ -31,7 +31,7 @@ class IncrementalKeyGenerator : public KeyGenerator , key(0) {} - virtual uint64_t get64() override + uint64_t get64() override { return key.fetch_add(1, std::memory_order_relaxed) % key_count + start_key; } @@ -54,7 +54,7 @@ class UniformDistributionKeyGenerator : public KeyGenerator , uniform_dist(0, key_count) {} - virtual uint64_t get64() override + uint64_t get64() override { std::lock_guard lock(mtx); return uniform_dist(rand_gen); @@ -78,7 +78,7 @@ class NormalDistributionKeyGenerator : public KeyGenerator , normal_dist(key_count / 2.0, key_count / 20.0) {} - virtual uint64_t get64() override + uint64_t get64() override { std::lock_guard lock(mtx); return normal_dist(rand_gen); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.h b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.h similarity index 92% rename from dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/KeyGenerator.h index 447f3ffc27a..7c8b8fd0080 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.h @@ -23,8 +23,8 @@ class KeyGenerator public: static std::unique_ptr create(const WorkloadOptions & opts); - KeyGenerator() {} - virtual ~KeyGenerator() {} + KeyGenerator() = default; + virtual ~KeyGenerator() = default; virtual uint64_t get64() = 0; }; diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.cpp b/dbms/src/Storages/DeltaMerge/workload/Limiter.cpp similarity index 77% rename from dbms/src/Storages/DeltaMerge/tools/workload/Limiter.cpp rename to dbms/src/Storages/DeltaMerge/workload/Limiter.cpp index 73764d27bc5..65f9e3ce72c 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/Limiter.cpp @@ -13,8 +13,8 @@ // limitations under the License. #include -#include -#include +#include +#include #include #include @@ -24,10 +24,10 @@ namespace DB::DM::tests class ConstantLimiter : public Limiter { public: - ConstantLimiter(uint64_t rate_per_sec) + explicit ConstantLimiter(uint64_t rate_per_sec) : limiter(rate_per_sec, LimiterType::UNKNOW) {} - virtual void request() override + void request() override { limiter.request(1); } @@ -38,7 +38,7 @@ class ConstantLimiter : public Limiter std::unique_ptr Limiter::create(const WorkloadOptions & opts) { - uint64_t per_sec = std::ceil(static_cast(opts.max_write_per_sec / opts.write_thread_count)); + uint64_t per_sec = std::ceil(opts.max_write_per_sec * 1.0 / opts.write_thread_count); return std::make_unique(per_sec); } diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.h b/dbms/src/Storages/DeltaMerge/workload/Limiter.h similarity index 96% rename from dbms/src/Storages/DeltaMerge/tools/workload/Limiter.h rename to dbms/src/Storages/DeltaMerge/workload/Limiter.h index e2892b178a2..da2d31c7915 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.h +++ b/dbms/src/Storages/DeltaMerge/workload/Limiter.h @@ -23,6 +23,6 @@ class Limiter public: static std::unique_ptr create(const WorkloadOptions & opts); virtual void request() = 0; - virtual ~Limiter() {} + virtual ~Limiter() = default; }; } // namespace DB::DM::tests \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp b/dbms/src/Storages/DeltaMerge/workload/MainEntry.cpp similarity index 97% rename from dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp rename to dbms/src/Storages/DeltaMerge/workload/MainEntry.cpp index f79d414f20b..88cf0b6322f 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/MainEntry.cpp @@ -14,10 +14,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp b/dbms/src/Storages/DeltaMerge/workload/Options.cpp similarity index 98% rename from dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp rename to dbms/src/Storages/DeltaMerge/workload/Options.cpp index 1c6409f3c53..8545d22ca8d 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/Options.cpp @@ -13,8 +13,8 @@ // limitations under the License. #include -#include -#include +#include +#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.h b/dbms/src/Storages/DeltaMerge/workload/Options.h similarity index 100% rename from dbms/src/Storages/DeltaMerge/tools/workload/Options.h rename to dbms/src/Storages/DeltaMerge/workload/Options.h diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/ReadColumnsGenerator.h b/dbms/src/Storages/DeltaMerge/workload/ReadColumnsGenerator.h similarity index 93% rename from dbms/src/Storages/DeltaMerge/tools/workload/ReadColumnsGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/ReadColumnsGenerator.h index 180409f89e1..c881bb148a2 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/ReadColumnsGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/ReadColumnsGenerator.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include @@ -28,7 +28,7 @@ class ReadColumnsGenerator return std::make_unique(table_info); } - ReadColumnsGenerator(const TableInfo & table_info_) + explicit ReadColumnsGenerator(const TableInfo & table_info_) : table_info(table_info_) , rand_gen(std::random_device()()) , uniform_dist(0, table_info_.columns->size() - 1) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.cpp similarity index 97% rename from dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp rename to dbms/src/Storages/DeltaMerge/workload/TableGenerator.cpp index cf52e808ab1..ec29a476d6a 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.cpp @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -237,7 +237,7 @@ class RandomTableGenerator : public TableGenerator , rand_gen(std::random_device()()) {} - virtual TableInfo get(int64_t table_id, std::string table_name) override + TableInfo get(int64_t table_id, std::string table_name) override { TableInfo table_info; @@ -293,7 +293,7 @@ class RandomTableGenerator : public TableGenerator class ConstantTableGenerator : public TableGenerator { - virtual TableInfo get(int64_t table_id, std::string table_name) override + TableInfo get(int64_t table_id, std::string table_name) override { TableInfo table_info; diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.h similarity index 96% rename from dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/TableGenerator.h index aba5c1590b7..b88bf2b72e2 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.h @@ -38,6 +38,6 @@ class TableGenerator virtual TableInfo get(int64_t table_id, std::string table_name) = 0; - virtual ~TableGenerator() {} + virtual ~TableGenerator() = default; }; } // namespace DB::DM::tests \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TimestampGenerator.h b/dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h similarity index 100% rename from dbms/src/Storages/DeltaMerge/tools/workload/TimestampGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Utils.cpp b/dbms/src/Storages/DeltaMerge/workload/Utils.cpp similarity index 94% rename from dbms/src/Storages/DeltaMerge/tools/workload/Utils.cpp rename to dbms/src/Storages/DeltaMerge/workload/Utils.cpp index 1cefae724c6..80d9f788016 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Utils.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/Utils.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include @@ -83,7 +83,7 @@ std::string fieldToString(const DataTypePtr & data_type, const Field & f) } else if (t == Field::Types::Which::Decimal256) { - auto i = f.get(); + const auto & i = f.get(); auto scale = dynamic_cast(data_type.get())->getScale(); return i.toString(scale); } @@ -105,8 +105,8 @@ std::vector colToVec(const DataTypePtr & data_type, const ColumnPtr std::string blockToString(const Block & block) { std::string s = "id name type values\n"; - auto & cols = block.getColumnsWithTypeAndName(); - for (auto & col : cols) + const auto & cols = block.getColumnsWithTypeAndName(); + for (const auto & col : cols) { s += fmt::format("{} {} {} {}\n", col.column_id, col.name, col.type->getFamilyName(), colToVec(col.type, col.column)); } diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Utils.h b/dbms/src/Storages/DeltaMerge/workload/Utils.h similarity index 100% rename from dbms/src/Storages/DeltaMerge/tools/workload/Utils.h rename to dbms/src/Storages/DeltaMerge/workload/Utils.h diff --git a/dbms/src/Storages/Page/CMakeLists.txt b/dbms/src/Storages/Page/CMakeLists.txt index cead83fa126..f208dc84be2 100644 --- a/dbms/src/Storages/Page/CMakeLists.txt +++ b/dbms/src/Storages/Page/CMakeLists.txt @@ -14,13 +14,3 @@ add_subdirectory(V2) add_subdirectory(tools) - -# PageStorage Stress test -if (ENABLE_V3_PAGESTORAGE) - add_headers_and_sources(page_stress_testing stress) - add_headers_and_sources(page_stress_testing stress/workload) - add_executable(page_stress_testing EXCLUDE_FROM_ALL ${page_stress_testing_sources}) - target_link_libraries(page_stress_testing dbms page_storage_v3) - target_include_directories(page_stress_testing PRIVATE stress) - target_compile_options(page_stress_testing PRIVATE -Wno-format -lc++) # turn off printf format check -endif() \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/stress_page_storage.cpp b/dbms/src/Storages/Page/stress/stress_page_storage.cpp deleted file mode 100644 index 818be710363..00000000000 --- a/dbms/src/Storages/Page/stress/stress_page_storage.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -namespace DB -{ -// Define is_background_thread for this binary -// It is required for `RateLimiter` but we do not link with `BackgroundProcessingPool`. -#if __APPLE__ && __clang__ -__thread bool is_background_thread = false; -#else -thread_local bool is_background_thread = false; -#endif -} // namespace DB - -int main(int argc, char ** argv) -try -{ - StressEnv::initGlobalLogger(); - auto env = StressEnv::parse(argc, argv); - env.setup(); - - auto & mamager = StressWorkloadManger::getInstance(); - mamager.setEnv(env); - mamager.runWorkload(); - - return StressEnvStatus::getInstance().isSuccess(); -} -catch (...) -{ - DB::tryLogCurrentException(""); - exit(-1); -} diff --git a/dbms/src/Storages/DeltaMerge/tools/CMakeLists.txt b/dbms/src/Storages/Page/workload/CMakeLists.txt similarity index 53% rename from dbms/src/Storages/DeltaMerge/tools/CMakeLists.txt rename to dbms/src/Storages/Page/workload/CMakeLists.txt index 36270a0c8e4..5c8ecb34d97 100644 --- a/dbms/src/Storages/DeltaMerge/tools/CMakeLists.txt +++ b/dbms/src/Storages/Page/workload/CMakeLists.txt @@ -14,4 +14,9 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR}) -add_subdirectory (workload EXCLUDE_FROM_ALL) +set (page-workload-src HeavyMemoryCostInGC.cpp HeavyRead.cpp HeavySkewWriteRead.cpp HeavyWrite.cpp HighValidBigFileGC.cpp HoldSnapshotsLongTime.cpp Normal.cpp + PageStorageInMemoryCapacity.cpp ThousandsOfOffset.cpp MainEntry.cpp Normal.cpp PageStorageInMemoryCapacity.cpp PSBackground.cpp PSRunnable.cpp PSStressEnv.cpp PSWorkload.cpp) + +add_library (page-workload-lib ${page-workload-src}) +target_link_libraries (page-workload-lib dbms clickhouse_functions clickhouse-server-lib) +target_compile_options (page-workload-lib PRIVATE -Wno-format -lc++) \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HeavyMemoryCostInGC.cpp b/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.cpp similarity index 96% rename from dbms/src/Storages/Page/stress/workload/HeavyMemoryCostInGC.cpp rename to dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.cpp index 40595f0cb59..7e745e29fc2 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavyMemoryCostInGC.cpp +++ b/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavyMemoryCostInGC : public StressWorkload , public StressWorkloadFunc @@ -81,3 +83,4 @@ class HeavyMemoryCostInGC }; REGISTER_WORKLOAD(HeavyMemoryCostInGC) +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/workload/HeavyRead.cpp b/dbms/src/Storages/Page/workload/HeavyRead.cpp similarity index 94% rename from dbms/src/Storages/Page/stress/workload/HeavyRead.cpp rename to dbms/src/Storages/Page/workload/HeavyRead.cpp index 15aeb1320cf..a67c435e84c 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavyRead.cpp +++ b/dbms/src/Storages/Page/workload/HeavyRead.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavyRead : public StressWorkload , public StressWorkloadFunc { @@ -69,4 +71,5 @@ class HeavyRead : public StressWorkload } }; -REGISTER_WORKLOAD(HeavyRead) \ No newline at end of file +REGISTER_WORKLOAD(HeavyRead) +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HeavySkewWriteRead.cpp b/dbms/src/Storages/Page/workload/HeavySkewWriteRead.cpp similarity index 95% rename from dbms/src/Storages/Page/stress/workload/HeavySkewWriteRead.cpp rename to dbms/src/Storages/Page/workload/HeavySkewWriteRead.cpp index 78ffa5b60e0..805bf105358 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavySkewWriteRead.cpp +++ b/dbms/src/Storages/Page/workload/HeavySkewWriteRead.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavySkewWriteRead : public StressWorkload , public StressWorkloadFunc { @@ -84,4 +86,5 @@ class HeavySkewWriteRead : public StressWorkload } }; -REGISTER_WORKLOAD(HeavySkewWriteRead) \ No newline at end of file +REGISTER_WORKLOAD(HeavySkewWriteRead) +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HeavyWrite.cpp b/dbms/src/Storages/Page/workload/HeavyWrite.cpp similarity index 94% rename from dbms/src/Storages/Page/stress/workload/HeavyWrite.cpp rename to dbms/src/Storages/Page/workload/HeavyWrite.cpp index 265b289db56..8dfd7f810f7 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavyWrite.cpp +++ b/dbms/src/Storages/Page/workload/HeavyWrite.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavyWrite : public StressWorkload , public StressWorkloadFunc { @@ -71,4 +73,5 @@ class HeavyWrite : public StressWorkload } }; -REGISTER_WORKLOAD(HeavyWrite) \ No newline at end of file +REGISTER_WORKLOAD(HeavyWrite) +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HighValidBigFileGC.cpp b/dbms/src/Storages/Page/workload/HighValidBigFileGC.cpp similarity index 97% rename from dbms/src/Storages/Page/stress/workload/HighValidBigFileGC.cpp rename to dbms/src/Storages/Page/workload/HighValidBigFileGC.cpp index 866782c9578..a9af6aebb76 100644 --- a/dbms/src/Storages/Page/stress/workload/HighValidBigFileGC.cpp +++ b/dbms/src/Storages/Page/workload/HighValidBigFileGC.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HighValidBigFileGCWorkload : public StressWorkload , public StressWorkloadFunc @@ -129,3 +131,4 @@ class HighValidBigFileGCWorkload }; REGISTER_WORKLOAD(HighValidBigFileGCWorkload) +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HoldSnapshotsLongTime.cpp b/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.cpp similarity index 95% rename from dbms/src/Storages/Page/stress/workload/HoldSnapshotsLongTime.cpp rename to dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.cpp index b49347fc858..f02fbf65bcd 100644 --- a/dbms/src/Storages/Page/stress/workload/HoldSnapshotsLongTime.cpp +++ b/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HoldSnapshotsLongTime : public StressWorkload , public StressWorkloadFunc { @@ -93,4 +95,5 @@ class HoldSnapshotsLongTime : public StressWorkload } }; -REGISTER_WORKLOAD(HoldSnapshotsLongTime) \ No newline at end of file +REGISTER_WORKLOAD(HoldSnapshotsLongTime) +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/MainEntry.cpp b/dbms/src/Storages/Page/workload/MainEntry.cpp new file mode 100644 index 00000000000..ac82e1ea4bc --- /dev/null +++ b/dbms/src/Storages/Page/workload/MainEntry.cpp @@ -0,0 +1,70 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include + +using namespace DB::PS::tests; + +int StressWorkload::mainEntry(int argc, char ** argv) +{ + { + // maybe due to sequence of linking, REGISTER_WORKLOAD is not visible to main function in dbms/src/Server/main.cpp + // cause that REGISTER_WORKLOAD will not be triggered before mainEntry + // we do this to trigger REGISTER_WORKLOAD explicitly. + void _work_load_register_named_HeavyMemoryCostInGC(); + void (*f)() = _work_load_register_named_HeavyMemoryCostInGC; + (void)f; + void _work_load_register_named_HeavyRead(); + f = _work_load_register_named_HeavyRead; + (void)f; + void _work_load_register_named_HeavySkewWriteRead(); + f = _work_load_register_named_HeavySkewWriteRead; + (void)f; + void _work_load_register_named_HeavyWrite(); + f = _work_load_register_named_HeavyWrite; + (void)f; + void _work_load_register_named_HighValidBigFileGCWorkload(); + f = _work_load_register_named_HighValidBigFileGCWorkload; + (void)f; + void _work_load_register_named_HoldSnapshotsLongTime(); + f = _work_load_register_named_HoldSnapshotsLongTime; + (void)f; + void _work_load_register_named_PageStorageInMemoryCapacity(); + f = _work_load_register_named_PageStorageInMemoryCapacity; + (void)f; + void _work_load_register_named_NormalWorkload(); + f = _work_load_register_named_NormalWorkload; + (void)f; + void _work_load_register_named_ThousandsOfOffset(); + f = _work_load_register_named_ThousandsOfOffset; + (void)f; + } + try + { + StressEnv::initGlobalLogger(); + auto env = StressEnv::parse(argc, argv); + env.setup(); + + auto & mamager = StressWorkloadManger::getInstance(); + mamager.setEnv(env); + mamager.runWorkload(); + + return StressEnvStatus::getInstance().isSuccess(); + } + catch (...) + { + DB::tryLogCurrentException(""); + exit(-1); + } +} \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/Normal.cpp b/dbms/src/Storages/Page/workload/Normal.cpp similarity index 95% rename from dbms/src/Storages/Page/stress/workload/Normal.cpp rename to dbms/src/Storages/Page/workload/Normal.cpp index 0323b857613..57229395809 100644 --- a/dbms/src/Storages/Page/stress/workload/Normal.cpp +++ b/dbms/src/Storages/Page/workload/Normal.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class NormalWorkload : public StressWorkload , public StressWorkloadFunc @@ -77,3 +79,4 @@ class NormalWorkload }; REGISTER_WORKLOAD(NormalWorkload) +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSBackground.cpp b/dbms/src/Storages/Page/workload/PSBackground.cpp similarity index 96% rename from dbms/src/Storages/Page/stress/PSBackground.cpp rename to dbms/src/Storages/Page/workload/PSBackground.cpp index af7329e8348..247bea23dcc 100644 --- a/dbms/src/Storages/Page/stress/PSBackground.cpp +++ b/dbms/src/Storages/Page/workload/PSBackground.cpp @@ -13,11 +13,14 @@ // limitations under the License. #include -#include #include #include +#include #include + +namespace DB::PS::tests +{ void PSMetricsDumper::onTime(Poco::Timer & /*timer*/) { for (auto & metric : metrics) @@ -107,3 +110,4 @@ void StressTimeout::start() { timeout_timer.start(Poco::TimerCallback(*this, &StressTimeout::onTime)); } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSBackground.h b/dbms/src/Storages/Page/workload/PSBackground.h similarity index 97% rename from dbms/src/Storages/Page/stress/PSBackground.h rename to dbms/src/Storages/Page/workload/PSBackground.h index 8c22458c5e8..c91dad1361f 100644 --- a/dbms/src/Storages/Page/stress/PSBackground.h +++ b/dbms/src/Storages/Page/workload/PSBackground.h @@ -15,14 +15,16 @@ #pragma once #include #include -#include #include +#include namespace CurrentMetrics { extern const Metric PSMVCCSnapshotsList; } +namespace DB::PS::tests +{ class PSMetricsDumper { public: @@ -162,3 +164,4 @@ class StressTimeout Poco::Timer timeout_timer; }; using StressTimeoutPtr = std::shared_ptr; +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSRunnable.cpp b/dbms/src/Storages/Page/workload/PSRunnable.cpp similarity index 97% rename from dbms/src/Storages/Page/stress/PSRunnable.cpp rename to dbms/src/Storages/Page/workload/PSRunnable.cpp index 5d6c8ecc5c6..5e9774ccc99 100644 --- a/dbms/src/Storages/Page/stress/PSRunnable.cpp +++ b/dbms/src/Storages/Page/workload/PSRunnable.cpp @@ -16,14 +16,16 @@ #include #include #include -#include #include #include +#include #include #include #include +namespace DB::PS::tests +{ void PSRunnable::run() try { @@ -69,7 +71,7 @@ DB::ReadBufferPtr PSWriter::genRandomData(const DB::PageId pageId, DB::MemHolder std::uniform_int_distribution<> dist(0, 3000); const size_t buff_sz = approx_page_mb * DB::MB + dist(size_gen); - char * buff = static_cast(malloc(buff_sz)); + char * buff = static_cast(malloc(buff_sz)); // NOLINT if (buff == nullptr) { throw DB::Exception("Alloc fix memory failed.", DB::ErrorCodes::LOGICAL_ERROR); @@ -78,7 +80,7 @@ DB::ReadBufferPtr PSWriter::genRandomData(const DB::PageId pageId, DB::MemHolder const char buff_ch = pageId % 0xFF; memset(buff, buff_ch, buff_sz); - holder = DB::createMemHolder(buff, [&](char * p) { free(p); }); + holder = DB::createMemHolder(buff, [&](char * p) { free(p); }); // NOLINT return std::make_shared(const_cast(buff), buff_sz); } @@ -88,7 +90,7 @@ void PSWriter::updatedRandomData() size_t memory_size = approx_page_mb * DB::MB * 2; if (memory == nullptr) { - memory = static_cast(malloc(memory_size)); + memory = static_cast(malloc(memory_size)); // NOLINT if (memory == nullptr) { throw DB::Exception("Alloc fix memory failed.", DB::ErrorCodes::LOGICAL_ERROR); @@ -147,7 +149,7 @@ void PSCommonWriter::updatedRandomData() if (memory == nullptr) { - memory = static_cast(malloc(memory_size)); + memory = static_cast(malloc(memory_size)); // NOLINT if (memory == nullptr) { throw DB::Exception("Alloc fix memory failed.", DB::ErrorCodes::LOGICAL_ERROR); @@ -415,3 +417,4 @@ DB::PageId PSIncreaseWriter::genRandomPageId() { return static_cast(begin_page_id++); } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSRunnable.h b/dbms/src/Storages/Page/workload/PSRunnable.h similarity index 90% rename from dbms/src/Storages/Page/stress/PSRunnable.h rename to dbms/src/Storages/Page/workload/PSRunnable.h index 3ddcd73c093..b723236391d 100644 --- a/dbms/src/Storages/Page/stress/PSRunnable.h +++ b/dbms/src/Storages/Page/workload/PSRunnable.h @@ -13,12 +13,14 @@ // limitations under the License. #pragma once -#include #include #include +#include const DB::PageId MAX_PAGE_ID_DEFAULT = 1000; +namespace DB::PS::tests +{ class PSRunnable : public Poco::Runnable { public: @@ -46,7 +48,7 @@ class PSWriter : public PSRunnable gen.seed(time(nullptr)); } - virtual ~PSWriter() + ~PSWriter() override { if (memory != nullptr) { @@ -54,7 +56,7 @@ class PSWriter : public PSRunnable } } - virtual String description() override + String description() override { return fmt::format("(Stress Test Writer {})", index); } @@ -67,7 +69,7 @@ class PSWriter : public PSRunnable static void fillAllPages(const PSPtr & ps); - virtual bool runImpl() override; + bool runImpl() override; protected: virtual DB::PageId genRandomPageId(); @@ -91,11 +93,11 @@ class PSCommonWriter : public PSWriter : PSWriter(ps_, index_) {} - virtual void updatedRandomData() override; + void updatedRandomData() override; - virtual String description() override { return fmt::format("(Stress Test Common Writer {})", index); } + String description() override { return fmt::format("(Stress Test Common Writer {})", index); } - virtual bool runImpl() override; + bool runImpl() override; void setBatchBufferNums(size_t numbers); @@ -120,7 +122,7 @@ class PSCommonWriter : public PSWriter DB::PageFieldSizes data_sizes = {}; - virtual DB::PageId genRandomPageId() override; + DB::PageId genRandomPageId() override; virtual size_t genBufferSize(); }; @@ -154,7 +156,7 @@ class PSWindowWriter : public PSCommonWriter void setNormalDistributionSigma(size_t sigma); protected: - virtual DB::PageId genRandomPageId() override; + DB::PageId genRandomPageId() override; protected: size_t window_size = 100; @@ -170,12 +172,12 @@ class PSIncreaseWriter : public PSCommonWriter String description() override { return fmt::format("(Stress Test Increase Writer {})", index); } - virtual bool runImpl() override; + bool runImpl() override; void setPageRange(size_t page_range); protected: - virtual DB::PageId genRandomPageId() override; + DB::PageId genRandomPageId() override; protected: size_t begin_page_id = 1; @@ -192,9 +194,9 @@ class PSReader : public PSRunnable gen.seed(time(nullptr)); } - virtual String description() override { return fmt::format("(Stress Test PSReader {})", index); } + String description() override { return fmt::format("(Stress Test PSReader {})", index); } - virtual bool runImpl() override; + bool runImpl() override; void setPageReadOnce(size_t page_read_once); @@ -242,7 +244,7 @@ class PSWindowReader : public PSReader void setWriterNums(size_t writer_nums); protected: - virtual DB::PageIds genRandomPageIds() override; + DB::PageIds genRandomPageIds() override; protected: size_t window_size = 100; @@ -261,12 +263,13 @@ class PSSnapshotReader : public PSReader : PSReader(ps_, index_) {} - virtual bool runImpl() override; + bool runImpl() override; void setSnapshotGetIntervalMs(size_t snapshot_get_interval_ms_); protected: - size_t snapshots_hold_num; + size_t snapshots_hold_num = 0; size_t snapshot_get_interval_ms = 0; std::list snapshots; -}; \ No newline at end of file +}; +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.cpp b/dbms/src/Storages/Page/workload/PSStressEnv.cpp similarity index 97% rename from dbms/src/Storages/Page/stress/PSStressEnv.cpp rename to dbms/src/Storages/Page/workload/PSStressEnv.cpp index 7d680cd43c0..f5cead0a158 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.cpp +++ b/dbms/src/Storages/Page/workload/PSStressEnv.cpp @@ -16,18 +16,20 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include #include #include +namespace DB::PS::tests +{ Poco::Logger * StressEnv::logger; void StressEnv::initGlobalLogger() { @@ -146,3 +148,4 @@ void StressEnv::setup() init_pages = true; setupSignal(); } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.h b/dbms/src/Storages/Page/workload/PSStressEnv.h similarity index 98% rename from dbms/src/Storages/Page/stress/PSStressEnv.h rename to dbms/src/Storages/Page/workload/PSStressEnv.h index 1c7d8ee761f..e67cb325430 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.h +++ b/dbms/src/Storages/Page/workload/PSStressEnv.h @@ -25,6 +25,8 @@ namespace Poco class Logger; } +namespace DB::PS::tests +{ using PSPtr = std::shared_ptr; enum StressEnvStat @@ -124,3 +126,4 @@ struct StressEnv void setup(); }; +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSWorkload.cpp b/dbms/src/Storages/Page/workload/PSWorkload.cpp similarity index 98% rename from dbms/src/Storages/Page/stress/PSWorkload.cpp rename to dbms/src/Storages/Page/workload/PSWorkload.cpp index ce1f8d92ce0..81f13527f48 100644 --- a/dbms/src/Storages/Page/stress/PSWorkload.cpp +++ b/dbms/src/Storages/Page/workload/PSWorkload.cpp @@ -14,12 +14,14 @@ #include #include -#include #include #include #include +#include #include +namespace DB::PS::tests +{ void StressWorkload::onDumpResult() { UInt64 time_interval = stop_watch.elapsedMilliseconds(); @@ -177,3 +179,4 @@ void StressWorkloadManger::runWorkload() } } } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSWorkload.h b/dbms/src/Storages/Page/workload/PSWorkload.h similarity index 92% rename from dbms/src/Storages/Page/stress/PSWorkload.h rename to dbms/src/Storages/Page/workload/PSWorkload.h index cb099b4203a..eaaaf4eba5b 100644 --- a/dbms/src/Storages/Page/stress/PSWorkload.h +++ b/dbms/src/Storages/Page/workload/PSWorkload.h @@ -16,15 +16,17 @@ #include #include -#include -#include -#include #include #include #include +#include +#include +#include #include #define NORMAL_WORKLOAD 0 +namespace DB::PS::tests +{ template class StressWorkloadFunc { @@ -45,6 +47,8 @@ class StressWorkloadFunc class StressWorkload { public: + static int mainEntry(int argc, char ** argv); + explicit StressWorkload(StressEnv options_) : options(options_) {} @@ -189,13 +193,15 @@ class StressWorkloadManger StressEnv options; }; -#define REGISTER_WORKLOAD(WORKLOAD) \ - static void __attribute__((constructor)) _work_load_register_named_##WORKLOAD(void) \ - { \ - StressWorkloadManger::getInstance().reg( \ - WORKLOAD::nameFunc(), \ - WORKLOAD::maskFunc(), \ - [](const StressEnv & opts) -> std::shared_ptr { \ - return std::make_shared(opts); \ - }); \ +#define REGISTER_WORKLOAD(WORKLOAD) \ + void __attribute__((constructor)) _work_load_register_named_##WORKLOAD(void) \ + { \ + StressWorkloadManger::getInstance().reg( \ + WORKLOAD::nameFunc(), \ + WORKLOAD::maskFunc(), \ + [](const StressEnv & opts) -> std::shared_ptr { \ + return std::make_shared(opts); \ + }); \ } + +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/workload/PageStorageInMemoryCapacity.cpp b/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.cpp similarity index 96% rename from dbms/src/Storages/Page/stress/workload/PageStorageInMemoryCapacity.cpp rename to dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.cpp index 190cbf6b323..6ab321d1a10 100644 --- a/dbms/src/Storages/Page/stress/workload/PageStorageInMemoryCapacity.cpp +++ b/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.cpp @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include #include #include - #ifdef __APPLE__ #include @@ -27,6 +26,8 @@ #include #endif +namespace DB::PS::tests +{ class PageStorageInMemoryCapacity : public StressWorkload , public StressWorkloadFunc { @@ -89,14 +90,14 @@ class PageStorageInMemoryCapacity : public StressWorkload } FILE * file = fopen("/proc/meminfo", "r"); - if (file != NULL) + if (file != nullptr) { char buffer[128]; #define MEMORY_TOTAL_LABEL "MemTotal:" while (fgets(buffer, 128, file)) { if ((strncmp((buffer), (MEMORY_TOTAL_LABEL), strlen(MEMORY_TOTAL_LABEL)) == 0) - && sscanf(buffer + strlen(MEMORY_TOTAL_LABEL), " %32llu kB", &total_mem)) + && sscanf(buffer + strlen(MEMORY_TOTAL_LABEL), " %32llu kB", &total_mem)) // NOLINT { break; } @@ -174,4 +175,5 @@ class PageStorageInMemoryCapacity : public StressWorkload } }; -REGISTER_WORKLOAD(PageStorageInMemoryCapacity) \ No newline at end of file +REGISTER_WORKLOAD(PageStorageInMemoryCapacity) +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/ThousandsOfOffset.cpp b/dbms/src/Storages/Page/workload/ThousandsOfOffset.cpp similarity index 97% rename from dbms/src/Storages/Page/stress/workload/ThousandsOfOffset.cpp rename to dbms/src/Storages/Page/workload/ThousandsOfOffset.cpp index 3a215f76769..5a02ef48d68 100644 --- a/dbms/src/Storages/Page/stress/workload/ThousandsOfOffset.cpp +++ b/dbms/src/Storages/Page/workload/ThousandsOfOffset.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class ThousandsOfOffset : public StressWorkload , public StressWorkloadFunc { @@ -168,4 +170,5 @@ class ThousandsOfOffset : public StressWorkload } }; -REGISTER_WORKLOAD(ThousandsOfOffset) \ No newline at end of file +REGISTER_WORKLOAD(ThousandsOfOffset) +} // namespace DB::PS::tests \ No newline at end of file From 1e3207d3794d8870fc8afad895017191851e4cca Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Fri, 10 Jun 2022 12:32:30 +0800 Subject: [PATCH 04/10] feat: add hardware information of server as an attribute (#5090) close pingcap/tiflash#4879 --- dbms/src/Server/CMakeLists.txt | 1 + dbms/src/Server/Server.cpp | 30 +++-- dbms/src/Server/Server.h | 8 +- dbms/src/Server/ServerInfo.cpp | 199 +++++++++++++++++++++++++++++++++ dbms/src/Server/ServerInfo.h | 99 ++++++++++++++++ 5 files changed, 327 insertions(+), 10 deletions(-) create mode 100644 dbms/src/Server/ServerInfo.cpp create mode 100644 dbms/src/Server/ServerInfo.h diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index 104b4f34e4a..2948bb076db 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -34,6 +34,7 @@ add_library (clickhouse-server-lib NotFoundHandler.cpp PingRequestHandler.cpp RootRequestHandler.cpp + ServerInfo.cpp Server.cpp StatusFile.cpp TCPHandler.cpp diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 04676ef969d..3e2c29de76c 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "Server.h" - #include #include #include @@ -56,6 +54,8 @@ #include #include #include +#include +#include #include #include #include @@ -72,7 +72,6 @@ #include #include #include -#include #include #include @@ -1049,6 +1048,23 @@ int Server::main(const std::vector & /*args*/) LOG_FMT_INFO(log, "tiflash proxy thread is joined"); }); + /// get CPU/memory/disk info of this server + if (tiflash_instance_wrap.proxy_helper) + { + diagnosticspb::ServerInfoRequest request; + request.set_tp(static_cast(1)); + diagnosticspb::ServerInfoResponse response; + std::string req = request.SerializeAsString(); + auto * helper = tiflash_instance_wrap.proxy_helper; + helper->fn_server_info(helper->proxy_ptr, strIntoView(&req), &response); + server_info.parseSysInfo(response); + LOG_FMT_INFO(log, "ServerInfo: {}", server_info.debugString()); + } + else + { + LOG_FMT_INFO(log, "TiFlashRaftProxyHelper is null, failed to get server info"); + } + CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get()); // print necessary grpc log. @@ -1410,10 +1426,10 @@ int Server::main(const std::vector & /*args*/) // on ARM processors it can show only enabled at current moment cores LOG_FMT_INFO( log, - "Available RAM = {}; physical cores = {}; threads = {}.", - formatReadableSizeWithBinarySuffix(getMemoryAmount()), - getNumberOfPhysicalCPUCores(), - std::thread::hardware_concurrency()); + "Available RAM = {}; physical cores = {}; logical cores = {}.", + server_info.memory_info.capacity, + server_info.cpu_info.physical_cores, + server_info.cpu_info.logical_cores); } LOG_FMT_INFO(log, "Ready for connections."); diff --git a/dbms/src/Server/Server.h b/dbms/src/Server/Server.h index 278349f2aa4..07c5b955a92 100644 --- a/dbms/src/Server/Server.h +++ b/dbms/src/Server/Server.h @@ -14,10 +14,10 @@ #pragma once +#include +#include #include -#include "IServer.h" - /** Server provides three interfaces: * 1. HTTP - simple interface for any applications. * 2. TCP - interface for native clickhouse-client and for server to server internal communications. @@ -39,7 +39,7 @@ class Server : public BaseDaemon return BaseDaemon::config(); } - virtual const TiFlashSecurityConfig & securityConfig() const override { return security_config; }; + const TiFlashSecurityConfig & securityConfig() const override { return security_config; }; Poco::Logger & logger() const override { @@ -70,6 +70,8 @@ class Server : public BaseDaemon TiFlashSecurityConfig security_config; + ServerInfo server_info; + class FlashGrpcServerHolder; class TcpHttpServersHolder; }; diff --git a/dbms/src/Server/ServerInfo.cpp b/dbms/src/Server/ServerInfo.cpp new file mode 100644 index 00000000000..9cba40c4775 --- /dev/null +++ b/dbms/src/Server/ServerInfo.cpp @@ -0,0 +1,199 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +namespace DB +{ +using diagnosticspb::ServerInfoItem; +using diagnosticspb::ServerInfoResponse; + +void ServerInfo::parseCPUInfo(const diagnosticspb::ServerInfoItem & cpu_info_item) +{ + for (const auto & pair : cpu_info_item.pairs()) + { + const auto & key = pair.key(); + if (key == "cpu-logical-cores") + { + cpu_info.logical_cores = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-physical-cores") + { + cpu_info.physical_cores = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-frequency") + { + cpu_info.frequency = pair.value(); + } + else if (key == "l1-cache-size") + { + cpu_info.l1_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l1-cache-line-size") + { + cpu_info.l1_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "l2-cache-size") + { + cpu_info.l2_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l2-cache-line-size") + { + cpu_info.l2_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "l3-cache-size") + { + cpu_info.l3_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l3-cache-line-size") + { + cpu_info.l3_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-arch") + { + cpu_info.arch = pair.value(); + } + } +} + +void ServerInfo::parseDiskInfo(const diagnosticspb::ServerInfoItem & disk_info_item) +{ + Disk disk; + disk.name = disk_info_item.name(); + for (const auto & pair : disk_info_item.pairs()) + { + const auto & key = pair.key(); + if (key == "type") + { + if (pair.value() == "HDD") + { + disk.disk_type = Disk::DiskType::HDD; + } + else if (pair.value() == "SSD") + { + disk.disk_type = Disk::DiskType::SSD; + } + else + { + disk.disk_type = Disk::DiskType::UNKNOWN; + } + } + else if (key == "total") + { + disk.total_space = static_cast(std::stoull(pair.value())); + } + else if (key == "free") + { + disk.free_space = static_cast(std::stoull(pair.value())); + } + else if (key == "path") + { + disk.mount_point = pair.value(); + } + else if (key == "fstype") + { + disk.fs_type = pair.value(); + } + } + disk_infos.push_back(disk); +} + +void ServerInfo::parseMemoryInfo(const diagnosticspb::ServerInfoItem & memory_info_item) +{ + for (const auto & pair : memory_info_item.pairs()) + { + if (pair.key() == "capacity") + { + memory_info.capacity = std::stoull(pair.value()); + } + } +} + +void ServerInfo::parseSysInfo(const diagnosticspb::ServerInfoResponse & sys_info_response) +{ + for (const auto & item : sys_info_response.items()) + { + const auto & tp = item.tp(); + if (tp == "cpu") + { + parseCPUInfo(item); + } + else if (tp == "disk") + { + parseDiskInfo(item); + } + else if (tp == "memory") + { + parseMemoryInfo(item); + } + } +} + +String ServerInfo::debugString() const +{ + FmtBuffer fmt_buf; + // append cpu info + fmt_buf.fmtAppend("CPU: \n" + " logical cores: {}\n" + " physical cores: {}\n" + " frequency: {}\n" + " l1 cache size: {}\n" + " l1 cache line size: {}\n" + " l2 cache size: {}\n" + " l2 cache line size: {}\n" + " l3 cache size: {}\n" + " l3 cache line size: {}\n" + " arch: {}\n", + cpu_info.logical_cores, + cpu_info.physical_cores, + cpu_info.frequency, + cpu_info.l1_cache_size, + cpu_info.l1_cache_line_size, + cpu_info.l2_cache_size, + cpu_info.l2_cache_line_size, + cpu_info.l3_cache_size, + cpu_info.l3_cache_line_size, + cpu_info.arch); + // append disk info + { + const static String disk_type_str[] = {"UNKNOWN", "HDD", "SSD"}; + for (const auto & disk_info : disk_infos) + { + fmt_buf.fmtAppend("Disk: \n" + " name: {}\n" + " type: {}\n" + " total space: {}\n" + " free space: {}\n" + " mount point: {}\n" + " fstype: {}\n", + disk_info.name, + disk_type_str[static_cast(disk_info.disk_type)], + disk_info.total_space, + disk_info.free_space, + disk_info.mount_point, + disk_info.fs_type); + } + } + // append memory info + fmt_buf.fmtAppend("Memory: \n" + " capacity: {}\n", + memory_info.capacity); + + return fmt_buf.toString(); +} + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Server/ServerInfo.h b/dbms/src/Server/ServerInfo.h new file mode 100644 index 00000000000..9663bd37568 --- /dev/null +++ b/dbms/src/Server/ServerInfo.h @@ -0,0 +1,99 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif +#include +#pragma GCC diagnostic pop + +namespace DB +{ +class ServerInfo +{ +public: + struct CPUInfo + { + /// number of logical CPU cores + UInt16 logical_cores = std::thread::hardware_concurrency(); + /// number of physical CPU cores + UInt16 physical_cores = getNumberOfPhysicalCPUCores(); + /// number of L1 cache size + /// units: Byte + UInt32 l1_cache_size = 16384; // 16KB (typical value) + /// number of L2 cache size + /// units: Byte + UInt32 l2_cache_size = 65536; // 64KB (typical value) + /// number of L3 cache size + /// units: Byte + UInt32 l3_cache_size = 2097152; // 2MB (typical value) + /// number of L1 cache line size + UInt8 l1_cache_line_size = 64; // 64B (typical value) + /// number of L2 cache line size + UInt8 l2_cache_line_size = 64; // 64B (typical value) + /// number of L3 cache line size + UInt8 l3_cache_line_size = 64; // 64B (typical value) + /// CPU architecture + String arch; + /// CPU frequency + String frequency; + }; + + struct Disk + { + String name; + enum DiskType + { + UNKNOWN = 0, + HDD = 1, + SSD = 2 + }; + DiskType disk_type; + UInt64 total_space = 0; + UInt64 free_space = 0; + String mount_point; + String fs_type; + }; + using DiskInfo = std::vector; + + struct MemoryInfo + { + /// total memory size + /// units: Byte + UInt64 capacity = getMemoryAmount(); + }; + + ServerInfo() = default; + ~ServerInfo() = default; + void parseCPUInfo(const diagnosticspb::ServerInfoItem & cpu_info_item); + void parseDiskInfo(const diagnosticspb::ServerInfoItem & disk_info_item); + void parseMemoryInfo(const diagnosticspb::ServerInfoItem & memory_info_item); + void parseSysInfo(const diagnosticspb::ServerInfoResponse & sys_info_response); + String debugString() const; + + CPUInfo cpu_info; + DiskInfo disk_infos; + MemoryInfo memory_info; +}; +} // namespace DB From 8916afe6eb0fccea897df172473b6bb0a98acae0 Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 10 Jun 2022 17:28:31 +0800 Subject: [PATCH 05/10] move `tunnel_map` to MPPTunnelSet (#5123) ref pingcap/tiflash#5095 --- dbms/src/Flash/Mpp/MPPTask.cpp | 46 +++++++------------- dbms/src/Flash/Mpp/MPPTask.h | 3 -- dbms/src/Flash/Mpp/MPPTunnelSet.cpp | 65 +++++++++++++++++++++++++++++ dbms/src/Flash/Mpp/MPPTunnelSet.h | 21 ++++++---- 4 files changed, 92 insertions(+), 43 deletions(-) diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 0f18ad582b4..8f9ca8e55e5 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -56,6 +56,7 @@ MPPTask::MPPTask(const mpp::TaskMeta & meta_, const ContextPtr & context_) , id(meta.start_ts(), meta.task_id()) , log(Logger::get("MPPTask", id.toString())) , mpp_task_statistics(id, meta.address()) + , needed_threads(0) , schedule_state(ScheduleState::WAITING) {} @@ -78,18 +79,14 @@ MPPTask::~MPPTask() void MPPTask::closeAllTunnels(const String & reason) { - for (auto & it : tunnel_map) - { - it.second->close(reason); - } + if (likely(tunnel_set)) + tunnel_set->close(reason); } void MPPTask::finishWrite() { - for (const auto & it : tunnel_map) - { - it.second->writeDone(); - } + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + tunnel_set->finishWrite(); } void MPPTask::run() @@ -97,15 +94,13 @@ void MPPTask::run() newThreadManager()->scheduleThenDetach(true, "MPPTask", [self = shared_from_this()] { self->runImpl(); }); } -void MPPTask::registerTunnel(const MPPTaskId & id, MPPTunnelPtr tunnel) +void MPPTask::registerTunnel(const MPPTaskId & task_id, MPPTunnelPtr tunnel) { if (status == CANCELLED) throw Exception("the tunnel " + tunnel->id() + " can not been registered, because the task is cancelled"); - if (tunnel_map.find(id) != tunnel_map.end()) - throw Exception("the tunnel " + tunnel->id() + " has been registered"); - - tunnel_map[id] = tunnel; + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + tunnel_set->registerTunnel(task_id, tunnel); } std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConnectionRequest * request) @@ -120,8 +115,9 @@ std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConn } MPPTaskId receiver_id{request->receiver_meta().start_ts(), request->receiver_meta().task_id()}; - auto it = tunnel_map.find(receiver_id); - if (it == tunnel_map.end()) + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + auto tunnel_ptr = tunnel_set->getTunnelById(receiver_id); + if (tunnel_ptr == nullptr) { auto err_msg = fmt::format( "can't find tunnel ({} + {})", @@ -129,7 +125,7 @@ std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConn request->receiver_meta().task_id()); return {nullptr, err_msg}; } - return {it->second, ""}; + return {tunnel_ptr, ""}; } void MPPTask::unregisterTask() @@ -211,7 +207,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) } // register tunnels - tunnel_set = std::make_shared(); + tunnel_set = std::make_shared(log->identifier()); std::chrono::seconds timeout(task_request.timeout()); for (int i = 0; i < exchange_sender.encoded_task_meta_size(); i++) @@ -225,7 +221,6 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) MPPTunnelPtr tunnel = std::make_shared(task_meta, task_request.meta(), timeout, context->getSettingsRef().max_threads, is_local, is_async, log->identifier()); LOG_FMT_DEBUG(log, "begin to register the tunnel {}", tunnel->id()); registerTunnel(MPPTaskId{task_meta.start_ts(), task_meta.task_id()}, tunnel); - tunnel_set->addTunnel(tunnel); if (!dag_context->isRootMPPTask()) { FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_register_tunnel_for_non_root_mpp_task); @@ -369,19 +364,8 @@ void MPPTask::runImpl() void MPPTask::writeErrToAllTunnels(const String & e) { - for (auto & it : tunnel_map) - { - try - { - FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_write_err_to_tunnel); - it.second->write(getPacketWithError(e), true); - } - catch (...) - { - it.second->close("Failed to write error msg to tunnel"); - tryLogCurrentException(log, "Failed to write error " + e + " to tunnel: " + it.second->id()); - } - } + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + tunnel_set->writeError(e); } void MPPTask::cancel(const String & reason) diff --git a/dbms/src/Flash/Mpp/MPPTask.h b/dbms/src/Flash/Mpp/MPPTask.h index c34cae49699..ee434a2f2ff 100644 --- a/dbms/src/Flash/Mpp/MPPTask.h +++ b/dbms/src/Flash/Mpp/MPPTask.h @@ -123,9 +123,6 @@ class MPPTask : public std::enable_shared_from_this MPPTunnelSetPtr tunnel_set; - // which targeted task we should send data by which tunnel. - std::unordered_map tunnel_map; - MPPTaskManager * manager = nullptr; const LoggerPtr log; diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp index 12de07d4a18..500e9501b08 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp @@ -13,11 +13,17 @@ // limitations under the License. #include +#include #include +#include #include namespace DB { +namespace FailPoints +{ +extern const char exception_during_mpp_write_err_to_tunnel[]; +} // namespace FailPoints namespace { inline mpp::MPPDataPacket serializeToPacket(const tipb::SelectResponse & response) @@ -108,6 +114,65 @@ void MPPTunnelSetBase::write(mpp::MPPDataPacket & packet, int16_t partit tunnels[partition_id]->write(packet); } +template +void MPPTunnelSetBase::writeError(const String & msg) +{ + for (auto & tunnel : tunnels) + { + try + { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_write_err_to_tunnel); + tunnel->write(getPacketWithError(msg), true); + } + catch (...) + { + tunnel->close("Failed to write error msg to tunnel"); + tryLogCurrentException(log, "Failed to write error " + msg + " to tunnel: " + tunnel->id()); + } + } +} + +template +void MPPTunnelSetBase::registerTunnel(const MPPTaskId & id, const TunnelPtr & tunnel) +{ + if (id_to_index_map.find(id) != id_to_index_map.end()) + throw Exception("the tunnel " + tunnel->id() + " has been registered"); + + id_to_index_map[id] = tunnels.size(); + tunnels.push_back(tunnel); + if (!tunnel->isLocal()) + { + remote_tunnel_cnt++; + } +} + +template +void MPPTunnelSetBase::close(const String & reason) +{ + for (auto & tunnel : tunnels) + tunnel->close(reason); +} + +template +void MPPTunnelSetBase::finishWrite() +{ + for (auto & tunnel : tunnels) + { + tunnel->writeDone(); + } +} + +template +typename MPPTunnelSetBase::TunnelPtr MPPTunnelSetBase::getTunnelById(const MPPTaskId & id) +{ + auto it = id_to_index_map.find(id); + if (it == id_to_index_map.end()) + { + return nullptr; + } + return tunnels[it->second]; +} + /// Explicit template instantiations - to avoid code bloat in headers. template class MPPTunnelSetBase; diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.h b/dbms/src/Flash/Mpp/MPPTunnelSet.h index f2279b945cb..021c609f516 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSet.h +++ b/dbms/src/Flash/Mpp/MPPTunnelSet.h @@ -14,6 +14,7 @@ #pragma once +#include #include #ifdef __clang__ #pragma clang diagnostic push @@ -32,6 +33,9 @@ class MPPTunnelSetBase : private boost::noncopyable { public: using TunnelPtr = std::shared_ptr; + explicit MPPTunnelSetBase(const String & req_id) + : log(Logger::get("MPPTunnelSet", req_id)) + {} void clearExecutionSummaries(tipb::SelectResponse & response); @@ -50,17 +54,14 @@ class MPPTunnelSetBase : private boost::noncopyable // this is a partition writing. void write(tipb::SelectResponse & response, int16_t partition_id); void write(mpp::MPPDataPacket & packet, int16_t partition_id); + void writeError(const String & msg); + void close(const String & reason); + void finishWrite(); + void registerTunnel(const MPPTaskId & id, const TunnelPtr & tunnel); - uint16_t getPartitionNum() const { return tunnels.size(); } + TunnelPtr getTunnelById(const MPPTaskId & id); - void addTunnel(const TunnelPtr & tunnel) - { - tunnels.push_back(tunnel); - if (!tunnel->isLocal()) - { - remote_tunnel_cnt++; - } - } + uint16_t getPartitionNum() const { return tunnels.size(); } int getRemoteTunnelCnt() { @@ -71,6 +72,8 @@ class MPPTunnelSetBase : private boost::noncopyable private: std::vector tunnels; + std::unordered_map id_to_index_map; + const LoggerPtr log; int remote_tunnel_cnt = 0; }; From 8a81342c104a78c9d84b2877c111a4cb87e3cdff Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Fri, 10 Jun 2022 21:42:30 +0800 Subject: [PATCH 06/10] add some notes about `getMemoryAmount()` and `getNumberOfPhysicalCPUCores()` (#5126) close pingcap/tiflash#5125 --- dbms/src/Common/getNumberOfPhysicalCPUCores.h | 1 + dbms/src/Storages/DeltaMerge/workload/Handle.h | 6 ++++-- libs/libcommon/include/common/getMemoryAmount.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/getNumberOfPhysicalCPUCores.h b/dbms/src/Common/getNumberOfPhysicalCPUCores.h index 6f7eaef4bb4..b3ab65a66e5 100644 --- a/dbms/src/Common/getNumberOfPhysicalCPUCores.h +++ b/dbms/src/Common/getNumberOfPhysicalCPUCores.h @@ -15,4 +15,5 @@ #pragma once /// Get number of CPU cores without hyper-threading. +/// Note: do not support environment under resource isolation mechanism like Docker, CGroup. unsigned getNumberOfPhysicalCPUCores(); diff --git a/dbms/src/Storages/DeltaMerge/workload/Handle.h b/dbms/src/Storages/DeltaMerge/workload/Handle.h index c4949c15a1f..2bbd1bd409d 100644 --- a/dbms/src/Storages/DeltaMerge/workload/Handle.h +++ b/dbms/src/Storages/DeltaMerge/workload/Handle.h @@ -51,14 +51,16 @@ class HandleLock std::vector> getLocks(const std::vector & handles) { - std::vector indexes(handles.size()); + std::vector indexes; + indexes.reserve(handles.size()); for (const auto & h : handles) { indexes.push_back(index(h)); } // Sort mutex indexes to avoid dead lock. sort(indexes.begin(), indexes.end()); - std::vector> locks(indexes.size()); + std::vector> locks; + locks.reserve(indexes.size()); for (auto i : indexes) { locks.push_back(getLockByIndex(i)); diff --git a/libs/libcommon/include/common/getMemoryAmount.h b/libs/libcommon/include/common/getMemoryAmount.h index 98aa87661c3..0807c6f8e12 100644 --- a/libs/libcommon/include/common/getMemoryAmount.h +++ b/libs/libcommon/include/common/getMemoryAmount.h @@ -19,5 +19,6 @@ /** * Returns the size of physical memory (RAM) in bytes. * Returns 0 on unsupported platform +* Note: do not support environment under resource isolation mechanism like Docker, CGroup. */ uint64_t getMemoryAmount(); From 10bcb06bcd379601014c70b6dd9173bb960b3c32 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Mon, 13 Jun 2022 12:52:32 +0800 Subject: [PATCH 07/10] *: remove TableFunctionFile (#5098) ref pingcap/tiflash#2019 --- dbms/src/Storages/StorageFile.cpp | 348 ------------------ dbms/src/Storages/StorageFile.h | 100 ----- dbms/src/Storages/registerStorages.cpp | 2 - dbms/src/TableFunctions/TableFunctionFile.cpp | 98 ----- dbms/src/TableFunctions/TableFunctionFile.h | 39 -- .../TableFunctions/registerTableFunctions.cpp | 2 - 6 files changed, 589 deletions(-) delete mode 100644 dbms/src/Storages/StorageFile.cpp delete mode 100644 dbms/src/Storages/StorageFile.h delete mode 100644 dbms/src/TableFunctions/TableFunctionFile.cpp delete mode 100644 dbms/src/TableFunctions/TableFunctionFile.h diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp deleted file mode 100644 index 4dec4fd5ea0..00000000000 --- a/dbms/src/Storages/StorageFile.cpp +++ /dev/null @@ -1,348 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - extern const int CANNOT_SEEK_THROUGH_FILE; - extern const int DATABASE_ACCESS_DENIED; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNKNOWN_IDENTIFIER; - extern const int INCORRECT_FILE_NAME; - extern const int FILE_DOESNT_EXIST; - extern const int EMPTY_LIST_OF_COLUMNS_PASSED; -}; - - -static std::string getTablePath(const std::string & db_dir_path, const std::string & table_name, const std::string & format_name) -{ - return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); -} - -/// Both db_dir_path and table_path must be converted to absolute paths (in particular, path cannot contain '..'). -static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, int table_fd) -{ - if (context_global.getApplicationType() != Context::ApplicationType::SERVER) - return; - - if (table_fd >= 0) - throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - else if (!startsWith(table_path, db_dir_path)) - throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); - - Poco::File table_path_poco_file = Poco::File(table_path); - if (!table_path_poco_file.exists()) - throw Exception("File " + table_path + " is not exist", ErrorCodes::FILE_DOESNT_EXIST); - else if (table_path_poco_file.isDirectory()) - throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); -} - - -StorageFile::StorageFile( - const std::string & table_path_, - int table_fd_, - const std::string & db_dir_path, - const std::string & table_name_, - const std::string & format_name_, - const ColumnsDescription & columns_, - Context & context_) - : IStorage(columns_), - table_name(table_name_), format_name(format_name_), context_global(context_), table_fd(table_fd_) -{ - if (table_fd < 0) /// Will use file - { - use_table_fd = false; - - if (!table_path_.empty()) /// Is user's file - { - Poco::Path poco_path = Poco::Path(table_path_); - if (poco_path.isRelative()) - poco_path = Poco::Path(db_dir_path, poco_path); - - path = poco_path.absolute().toString(); - checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); - is_db_table = false; - } - else /// Is DB's file - { - if (db_dir_path.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); - - path = getTablePath(db_dir_path, table_name, format_name); - is_db_table = true; - Poco::File(Poco::Path(path).parent()).createDirectories(); - } - } - else /// Will use FD - { - checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); - - is_db_table = false; - use_table_fd = true; - - /// Save initial offset, it will be used for repeating SELECTs - /// If FD isn't seekable (lseek returns -1), then the second and subsequent SELECTs will fail. - table_fd_init_offset = lseek(table_fd, 0, SEEK_CUR); - } -} - - -class StorageFileBlockInputStream : public IProfilingBlockInputStream -{ -public: - StorageFileBlockInputStream(StorageFile & storage_, const Context & context, size_t max_block_size) - : storage(storage_) - { - if (storage.use_table_fd) - { - storage.rwlock.lock(); - - /// We could use common ReadBuffer and WriteBuffer in storage to leverage cache - /// and add ability to seek unseekable files, but cache sync isn't supported. - - if (storage.table_fd_was_used) /// We need seek to initial position - { - if (storage.table_fd_init_offset < 0) - throw Exception("File descriptor isn't seekable, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - - /// ReadBuffer's seek() doesn't make sence, since cache is empty - if (lseek(storage.table_fd, storage.table_fd_init_offset, SEEK_SET) < 0) - throwFromErrno("Cannot seek file descriptor, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - } - - storage.table_fd_was_used = true; - read_buf = std::make_unique(storage.table_fd); - } - else - { - storage.rwlock.lock_shared(); - - read_buf = std::make_unique(storage.path); - } - - reader = FormatFactory().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); - } - - ~StorageFileBlockInputStream() override - { - if (storage.use_table_fd) - storage.rwlock.unlock(); - else - storage.rwlock.unlock_shared(); - } - - String getName() const override - { - return storage.getName(); - } - - Block readImpl() override - { - return reader->read(); - } - - Block getHeader() const override { return reader->getHeader(); }; - - void readPrefixImpl() override - { - reader->readPrefix(); - } - - void readSuffixImpl() override - { - reader->readSuffix(); - } - -private: - StorageFile & storage; - Block sample_block; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; -}; - - -BlockInputStreams StorageFile::read( - const Names & /*column_names*/, - const SelectQueryInfo & /*query_info*/, - const Context & context, - QueryProcessingStage::Enum & /*processed_stage*/, - size_t max_block_size, - unsigned /*num_streams*/) -{ - return BlockInputStreams(1, std::make_shared(*this, context, max_block_size)); -} - - -class StorageFileBlockOutputStream : public IBlockOutputStream -{ -public: - explicit StorageFileBlockOutputStream(StorageFile & storage_) - : storage(storage_), lock(storage.rwlock) - { - if (storage.use_table_fd) - { - /** NOTE: Using real file binded to FD may be misleading: - * SELECT *; INSERT insert_data; SELECT *; last SELECT returns initil_fd_data + insert_data - * INSERT data; SELECT *; last SELECT returns only insert_data - */ - storage.table_fd_was_used = true; - write_buf = std::make_unique(storage.table_fd); - } - else - { - write_buf = std::make_unique(storage.path, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT); - } - - writer = FormatFactory().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), storage.context_global); - } - - Block getHeader() const override { return storage.getSampleBlock(); } - - void write(const Block & block) override - { - writer->write(block); - } - - void writePrefix() override - { - writer->writePrefix(); - } - - void writeSuffix() override - { - writer->writeSuffix(); - } - - void flush() override - { - writer->flush(); - } - -private: - StorageFile & storage; - std::unique_lock lock; - std::unique_ptr write_buf; - BlockOutputStreamPtr writer; -}; - -BlockOutputStreamPtr StorageFile::write( - const ASTPtr & /*query*/, - const Settings & /*settings*/) -{ - return std::make_shared(*this); -} - - -void StorageFile::drop() -{ - /// Extra actions are not required. -} - - -void StorageFile::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name) -{ - if (!is_db_table) - throw Exception("Can't rename table '" + table_name + "' binded to user-defined file (or FD)", ErrorCodes::DATABASE_ACCESS_DENIED); - - std::unique_lock lock(rwlock); - - std::string path_new = getTablePath(new_path_to_db, new_table_name, format_name); - Poco::File(Poco::Path(path_new).parent()).createDirectories(); - Poco::File(path).renameTo(path_new); - - path = std::move(path_new); -} - - -void registerStorageFile(StorageFactory & factory) -{ - factory.registerStorage("File", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (!(engine_args.size() == 1 || engine_args.size() == 2)) - throw Exception( - "Storage File requires 1 or 2 arguments: name of used format and source.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); - String format_name = static_cast(*engine_args[0]).value.safeGet(); - - int source_fd = -1; - String source_path; - if (engine_args.size() >= 2) - { - /// Will use FD if engine_args[1] is int literal or identifier with std* name - - if (const ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) - { - if (identifier->name == "stdin") - source_fd = STDIN_FILENO; - else if (identifier->name == "stdout") - source_fd = STDOUT_FILENO; - else if (identifier->name == "stderr") - source_fd = STDERR_FILENO; - else - throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor", - ErrorCodes::UNKNOWN_IDENTIFIER); - } - else if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) - { - auto type = literal->value.getType(); - if (type == Field::Types::Int64) - source_fd = static_cast(literal->value.get()); - else if (type == Field::Types::UInt64) - source_fd = static_cast(literal->value.get()); - else if (type == Field::Types::String) - source_path = literal->value.get(); - } - } - - return StorageFile::create( - source_path, source_fd, - args.data_path, - args.table_name, format_name, args.columns, - args.context); - }); -} - -} diff --git a/dbms/src/Storages/StorageFile.h b/dbms/src/Storages/StorageFile.h deleted file mode 100644 index ca46f7f366e..00000000000 --- a/dbms/src/Storages/StorageFile.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ -class StorageFileBlockInputStream; -class StorageFileBlockOutputStream; - -class StorageFile : public ext::SharedPtrHelper - , public IStorage -{ -public: - std::string getName() const override - { - return "File"; - } - - std::string getTableName() const override - { - return table_name; - } - - BlockInputStreams read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - size_t max_block_size, - unsigned num_streams) override; - - BlockOutputStreamPtr write( - const ASTPtr & query, - const Settings & settings) override; - - void drop() override; - - void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; - - String getDataPath() const override { return path; } - -protected: - friend class StorageFileBlockInputStream; - friend class StorageFileBlockOutputStream; - - /** there are three options (ordered by priority): - - use specified file descriptor if (fd >= 0) - - use specified table_path if it isn't empty - - create own table inside data/db/table/ - */ - StorageFile( - const std::string & table_path_, - int table_fd_, - const std::string & db_dir_path, - const std::string & table_name_, - const std::string & format_name_, - const ColumnsDescription & columns_, - Context & context_); - -private: - std::string table_name; - std::string format_name; - Context & context_global; - - std::string path; - int table_fd = -1; - - bool is_db_table = true; /// Table is stored in real database, not user's file - bool use_table_fd = false; /// Use table_fd insted of path - std::atomic table_fd_was_used{false}; /// To detect repeating reads from stdin - off_t table_fd_init_offset = -1; /// Initial position of fd, used for repeating reads - - mutable std::shared_mutex rwlock; - - Poco::Logger * log = &Poco::Logger::get("StorageFile"); -}; - -} // namespace DB diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index a709be0b017..ddf815316ab 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -27,7 +27,6 @@ void registerStorageNull(StorageFactory & factory); void registerStorageMerge(StorageFactory & factory); void registerStorageBuffer(StorageFactory & factory); void registerStorageMemory(StorageFactory & factory); -void registerStorageFile(StorageFactory & factory); void registerStorageDictionary(StorageFactory & factory); void registerStorageSet(StorageFactory & factory); void registerStorageJoin(StorageFactory & factory); @@ -47,7 +46,6 @@ void registerStorages() registerStorageMerge(factory); registerStorageBuffer(factory); registerStorageMemory(factory); - registerStorageFile(factory); registerStorageDictionary(factory); registerStorageSet(factory); registerStorageJoin(factory); diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp deleted file mode 100644 index 0ff1a5b443f..00000000000 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ -namespace ErrorCodes -{ -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int DATABASE_ACCESS_DENIED; -} // namespace ErrorCodes - -StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const -{ - // Parse args - ASTs & args_func = typeid_cast(*ast_function).children; - - if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); - - ASTs & args = typeid_cast(*args_func.at(0)).children; - - if (args.size() != 3) - throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: path, format and structure.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < 3; ++i) - args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); - - std::string path = static_cast(*args[0]).value.safeGet(); - std::string format = static_cast(*args[1]).value.safeGet(); - std::string structure = static_cast(*args[2]).value.safeGet(); - - // Create sample block - std::vector structure_vals; - boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); - - if (structure_vals.size() % 2 != 0) - throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); - - Block sample_block; - const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - - for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) - { - ColumnWithTypeAndName column; - column.name = structure_vals[i]; - column.type = data_type_factory.get(structure_vals[i + 1]); - column.column = column.type->createColumn(); - sample_block.insert(std::move(column)); - } - - // Create table - StoragePtr storage = StorageFile::create( - path, - -1, - context.getUserFilesPath(), - getName(), - format, - ColumnsDescription{sample_block.getNamesAndTypesList()}, - const_cast(context)); - - storage->startup(); - - return storage; -} - - -void registerTableFunctionFile(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} // namespace DB diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h deleted file mode 100644 index dda367c2679..00000000000 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - - -namespace DB -{ -/* file(path, format, structure) - creates a temporary storage from file - * - * - * The file must be in the clickhouse data directory. - * The relative path begins with the clickhouse data directory. - */ -class TableFunctionFile : public ITableFunction -{ -public: - static constexpr auto name = "file"; - std::string getName() const override { return name; } - -private: - StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; -}; - - -} // namespace DB diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index 2eac0ce0548..bfe219eec62 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -22,7 +22,6 @@ namespace DB void registerTableFunctionMerge(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); -void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctions() { auto & factory = TableFunctionFactory::instance(); @@ -30,7 +29,6 @@ void registerTableFunctions() registerTableFunctionMerge(factory); registerTableFunctionNumbers(factory); registerTableFunctionCatBoostPool(factory); - registerTableFunctionFile(factory); } } // namespace DB From d54fcc7723606ea92ffa787b0d06ae408f303fb5 Mon Sep 17 00:00:00 2001 From: Grace Cai Date: Mon, 13 Jun 2022 14:44:33 +0800 Subject: [PATCH 08/10] README.md: update URL of TiDB Cloud free trial (#5133) close pingcap/tiflash#4936, ref pingcap/tidb#35316 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8a2217b9a42..02af727105b 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ TiFlash repository is based on [ClickHouse](https://github.com/ClickHouse/ClickH ### Start with TiDB Cloud -Quickly explore TiFlash with [a free trial of TiDB Cloud](https://tidbcloud.com/signup). +Quickly explore TiFlash with [a free trial of TiDB Cloud](https://tidbcloud.com/free-trial). See [TiDB Cloud Quick Start Guide](https://docs.pingcap.com/tidbcloud/tidb-cloud-quickstart). From 9c8a58884b3f61afd17a4c1a132dbd78495f3037 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Mon, 13 Jun 2022 15:46:32 +0800 Subject: [PATCH 09/10] *: remove CatBoost (#5131) ref pingcap/tiflash#2019 --- dbms/src/Storages/StorageCatBoostPool.cpp | 287 ------------------ dbms/src/Storages/StorageCatBoostPool.h | 100 ------ .../TableFunctionCatBoostPool.cpp | 68 ----- .../TableFunctionCatBoostPool.h | 35 --- .../TableFunctions/registerTableFunctions.cpp | 3 +- 5 files changed, 1 insertion(+), 492 deletions(-) delete mode 100644 dbms/src/Storages/StorageCatBoostPool.cpp delete mode 100644 dbms/src/Storages/StorageCatBoostPool.h delete mode 100644 dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp delete mode 100644 dbms/src/TableFunctions/TableFunctionCatBoostPool.h diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp deleted file mode 100644 index 317cac21d52..00000000000 --- a/dbms/src/Storages/StorageCatBoostPool.cpp +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_PARSE_TEXT; - extern const int DATABASE_ACCESS_DENIED; -} - -namespace -{ -class CatBoostDatasetBlockInputStream : public IProfilingBlockInputStream -{ -public: - - CatBoostDatasetBlockInputStream(const std::string & file_name, const std::string & format_name, - const Block & sample_block, const Context & context, size_t max_block_size) - : file_name(file_name), format_name(format_name) - { - read_buf = std::make_unique(file_name); - reader = FormatFactory().getInput(format_name, *read_buf, sample_block, context, max_block_size); - } - - String getName() const override - { - return "CatBoostDataset"; - } - - Block readImpl() override - { - return reader->read(); - } - - void readPrefixImpl() override - { - reader->readPrefix(); - } - - void readSuffixImpl() override - { - reader->readSuffix(); - } - - Block getHeader() const override { return sample_block; }; - -private: - Block sample_block; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; - std::string file_name; - std::string format_name; -}; - -} - -static boost::filesystem::path canonicalPath(std::string && path) -{ - return boost::filesystem::canonical(boost::filesystem::path(path)); -} - -static std::string resolvePath(const boost::filesystem::path & base_path, std::string && path) -{ - boost::filesystem::path resolved_path(path); - if (!resolved_path.is_absolute()) - return (base_path / resolved_path).string(); - return resolved_path.string(); -} - -static void checkCreationIsAllowed(const String & base_path, const String & path) -{ - if (base_path != path.substr(0, base_path.size())) - throw Exception( - "Using file descriptor or user specified path as source of storage isn't allowed for server daemons", - ErrorCodes::DATABASE_ACCESS_DENIED); -} - - -StorageCatBoostPool::StorageCatBoostPool(const Context & context, - String column_description_file_name_, - String data_description_file_name_) - : column_description_file_name(std::move(column_description_file_name_)), - data_description_file_name(std::move(data_description_file_name_)) -{ - auto base_path = canonicalPath(context.getPath()); - column_description_file_name = resolvePath(base_path, std::move(column_description_file_name)); - data_description_file_name = resolvePath(base_path, std::move(data_description_file_name)); - if (context.getApplicationType() == Context::ApplicationType::SERVER) - { - const auto & base_path_str = base_path.string(); - checkCreationIsAllowed(base_path_str, column_description_file_name); - checkCreationIsAllowed(base_path_str, data_description_file_name); - } - - parseColumnDescription(); - createSampleBlockAndColumns(); -} - -std::string StorageCatBoostPool::getColumnTypesString(const ColumnTypesMap & columnTypesMap) -{ - std::string types_string; - bool first = true; - for (const auto & value : columnTypesMap) - { - if (!first) - types_string.append(", "); - - first = false; - types_string += value.first; - } - - return types_string; -} - -void StorageCatBoostPool::checkDatasetDescription() -{ - std::ifstream in(data_description_file_name); - if (!in.good()) - throw Exception("Cannot open file: " + data_description_file_name, ErrorCodes::CANNOT_OPEN_FILE); - - std::string line; - if (!std::getline(in, line)) - throw Exception("File is empty: " + data_description_file_name, ErrorCodes::CANNOT_PARSE_TEXT); - - size_t columns_count = 1; - for (char sym : line) - if (sym == '\t') - ++columns_count; - - columns_description.resize(columns_count); -} - -void StorageCatBoostPool::parseColumnDescription() -{ - /// NOTE: simple parsing - /// TODO: use ReadBufferFromFile - - checkDatasetDescription(); - - std::ifstream in(column_description_file_name); - if (!in.good()) - throw Exception("Cannot open file: " + column_description_file_name, ErrorCodes::CANNOT_OPEN_FILE); - - std::string line; - size_t line_num = 0; - auto column_types_map = getColumnTypesMap(); - auto column_types_string = getColumnTypesString(column_types_map); - - /// Enumerate default names for columns as Auxiliary, Auxiliary1, Auxiliary2, ... - std::map columns_per_type_count; - - while (std::getline(in, line)) - { - ++line_num; - std::string str_line_num = std::to_string(line_num); - - if (line.empty()) - continue; - - std::istringstream iss(line); - std::vector tokens; - std::string token; - while (std::getline(iss, token, '\t')) - tokens.push_back(token); - - if (tokens.size() != 2 && tokens.size() != 3) - throw Exception("Cannot parse column description at line " + str_line_num + " '" + line + "' " - + ": expected 2 or 3 columns, got " + std::to_string(tokens.size()), - ErrorCodes::CANNOT_PARSE_TEXT); - - std::string str_id = tokens[0]; - std::string col_type = tokens[1]; - std::string col_alias = tokens.size() > 2 ? tokens[2] : ""; - - size_t num_id; - try - { - num_id = std::stoull(str_id); - } - catch (std::exception & e) - { - throw Exception("Cannot parse column index at row " + str_line_num + ": " + e.what(), - ErrorCodes::CANNOT_PARSE_TEXT); - } - - if (num_id >= columns_description.size()) - throw Exception("Invalid index at row " + str_line_num + ": " + str_id - + ", expected in range [0, " + std::to_string(columns_description.size()) + ")", - ErrorCodes::CANNOT_PARSE_TEXT); - - if (column_types_map.count(col_type) == 0) - throw Exception("Invalid column type: " + col_type + ", expected: " + column_types_string, - ErrorCodes::CANNOT_PARSE_TEXT); - - auto type = column_types_map[col_type]; - - std::string col_name; - - bool is_feature_column = type == DatasetColumnType::Num || type == DatasetColumnType::Categ; - auto & col_number = columns_per_type_count[type]; - /// If column is not feature skip '0' after the name (to use 'Target' instead of 'Target0'). - col_name = col_type + (is_feature_column || col_number ? std::to_string(col_number) : ""); - ++col_number; - - columns_description[num_id] = ColumnDescription(col_name, col_alias, type); - } -} - -void StorageCatBoostPool::createSampleBlockAndColumns() -{ - ColumnsDescription columns; - NamesAndTypesList cat_columns; - NamesAndTypesList num_columns; - sample_block.clear(); - for (auto & desc : columns_description) - { - DataTypePtr type; - if (desc.column_type == DatasetColumnType::Categ - || desc.column_type == DatasetColumnType::Auxiliary - || desc.column_type == DatasetColumnType::DocId) - type = std::make_shared(); - else - type = std::make_shared(); - - if (desc.column_type == DatasetColumnType::Categ) - cat_columns.emplace_back(desc.column_name, type); - else if (desc.column_type == DatasetColumnType::Num) - num_columns.emplace_back(desc.column_name, type); - else - columns.materialized.emplace_back(desc.column_name, type); - - if (!desc.alias.empty()) - { - auto alias = std::make_shared(desc.column_name); - columns.defaults[desc.alias] = {ColumnDefaultKind::Alias, alias}; - columns.aliases.emplace_back(desc.alias, type); - } - - sample_block.insert(ColumnWithTypeAndName(type, desc.column_name)); - } - columns.ordinary.insert(columns.ordinary.end(), num_columns.begin(), num_columns.end()); - columns.ordinary.insert(columns.ordinary.end(), cat_columns.begin(), cat_columns.end()); - - setColumns(columns); -} - -BlockInputStreams StorageCatBoostPool::read(const Names & column_names, - const SelectQueryInfo & /*query_info*/, - const Context & context, - QueryProcessingStage::Enum & /*processed_stage*/, - size_t max_block_size, - unsigned /*threads*/) -{ - auto stream = std::make_shared( - data_description_file_name, "TSV", sample_block, context, max_block_size); - - auto filter_stream = std::make_shared(stream, column_names, false); - return { filter_stream }; -} - -} diff --git a/dbms/src/Storages/StorageCatBoostPool.h b/dbms/src/Storages/StorageCatBoostPool.h deleted file mode 100644 index 0f4f7c2cede..00000000000 --- a/dbms/src/Storages/StorageCatBoostPool.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include - -namespace DB -{ -class StorageCatBoostPool : public ext::SharedPtrHelper - , public IStorage -{ -public: - std::string getName() const override { return "CatBoostPool"; } - - std::string getTableName() const override { return table_name; } - - BlockInputStreams read(const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - size_t max_block_size, - unsigned threads) override; - -private: - String table_name; - - String column_description_file_name; - String data_description_file_name; - Block sample_block; - - enum class DatasetColumnType - { - Target, - Num, - Categ, - Auxiliary, - DocId, - Weight, - Baseline - }; - - using ColumnTypesMap = std::map; - - ColumnTypesMap getColumnTypesMap() const - { - return { - {"Target", DatasetColumnType::Target}, - {"Num", DatasetColumnType::Num}, - {"Categ", DatasetColumnType::Categ}, - {"Auxiliary", DatasetColumnType::Auxiliary}, - {"DocId", DatasetColumnType::DocId}, - {"Weight", DatasetColumnType::Weight}, - {"Baseline", DatasetColumnType::Baseline}, - }; - }; - - std::string getColumnTypesString(const ColumnTypesMap & columnTypesMap); - - struct ColumnDescription - { - std::string column_name; - std::string alias; - DatasetColumnType column_type; - - ColumnDescription() - : column_type(DatasetColumnType::Num) - {} - ColumnDescription(std::string column_name, std::string alias, DatasetColumnType column_type) - : column_name(std::move(column_name)) - , alias(std::move(alias)) - , column_type(column_type) - {} - }; - - std::vector columns_description; - - void checkDatasetDescription(); - void parseColumnDescription(); - void createSampleBlockAndColumns(); - -protected: - StorageCatBoostPool(const Context & context, String column_description_file_name, String data_description_file_name); -}; - -} // namespace DB diff --git a/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp b/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp deleted file mode 100644 index ab5cd7e5849..00000000000 --- a/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int BAD_ARGUMENTS; -} // namespace ErrorCodes - - -StoragePtr TableFunctionCatBoostPool::executeImpl(const ASTPtr & ast_function, const Context & context) const -{ - ASTs & args_func = typeid_cast(*ast_function).children; - - std::string err = "Table function '" + getName() + "' requires 2 parameters: " - + "column descriptions file, dataset description file"; - - if (args_func.size() != 1) - throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - ASTs & args = typeid_cast(*args_func.at(0)).children; - - if (args.size() != 2) - throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - auto getStringLiteral = [](const IAST & node, const char * description) { - auto lit = typeid_cast(&node); - if (!lit) - throw Exception(description + String(" must be string literal (in single quotes)."), ErrorCodes::BAD_ARGUMENTS); - - if (lit->value.getType() != Field::Types::String) - throw Exception(description + String(" must be string literal (in single quotes)."), ErrorCodes::BAD_ARGUMENTS); - - return safeGet(lit->value); - }; - String column_descriptions_file = getStringLiteral(*args[0], "Column descriptions file"); - String dataset_description_file = getStringLiteral(*args[1], "Dataset description file"); - - return StorageCatBoostPool::create(context, column_descriptions_file, dataset_description_file); -} - -void registerTableFunctionCatBoostPool(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} // namespace DB diff --git a/dbms/src/TableFunctions/TableFunctionCatBoostPool.h b/dbms/src/TableFunctions/TableFunctionCatBoostPool.h deleted file mode 100644 index 0b5b32dfffe..00000000000 --- a/dbms/src/TableFunctions/TableFunctionCatBoostPool.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - - -namespace DB -{ -/* catboostPool('column_descriptions_file', 'dataset_description_file') - * Create storage from CatBoost dataset. - */ -class TableFunctionCatBoostPool : public ITableFunction -{ -public: - static constexpr auto name = "catBoostPool"; - std::string getName() const override { return name; } - -private: - StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; -}; - -} // namespace DB diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index bfe219eec62..8449077cc96 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -21,14 +21,13 @@ namespace DB { void registerTableFunctionMerge(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); -void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); + void registerTableFunctions() { auto & factory = TableFunctionFactory::instance(); registerTableFunctionMerge(factory); registerTableFunctionNumbers(factory); - registerTableFunctionCatBoostPool(factory); } } // namespace DB From f4c2e012fc3067193412aca91c6b26e30173c722 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Mon, 13 Jun 2022 16:16:35 +0800 Subject: [PATCH 10/10] Test: Mock window function, refactor window function tests (#5021) ref pingcap/tiflash#4609, close pingcap/tiflash#5081 --- .../DataStreams/WindowBlockInputStream.cpp | 18 + dbms/src/DataStreams/WindowBlockInputStream.h | 2 + dbms/src/Debug/astToExecutor.cpp | 199 +++++++- dbms/src/Debug/astToExecutor.h | 56 ++- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 4 +- .../Coprocessor/DAGQueryBlockInterpreter.h | 2 - dbms/src/Flash/Coprocessor/DAGUtils.cpp | 5 +- .../Coprocessor/collectOutputFieldTypes.cpp | 26 +- dbms/src/Flash/tests/gtest_interpreter.cpp | 80 ++++ dbms/src/Interpreters/WindowDescription.cpp | 36 +- dbms/src/Interpreters/WindowDescription.h | 4 + dbms/src/TestUtils/ExecutorTestUtils.cpp | 34 +- dbms/src/TestUtils/ExecutorTestUtils.h | 57 +-- dbms/src/TestUtils/FunctionTestUtils.cpp | 32 ++ dbms/src/TestUtils/FunctionTestUtils.h | 27 ++ dbms/src/TestUtils/executorSerializer.cpp | 66 ++- dbms/src/TestUtils/mockExecutor.cpp | 56 ++- dbms/src/TestUtils/mockExecutor.h | 19 +- .../TestUtils/tests/gtest_mock_executors.cpp | 12 + .../tests/gtest_window_functions.cpp | 436 ++++++------------ 20 files changed, 823 insertions(+), 348 deletions(-) diff --git a/dbms/src/DataStreams/WindowBlockInputStream.cpp b/dbms/src/DataStreams/WindowBlockInputStream.cpp index bc63db52873..2cc61df8104 100644 --- a/dbms/src/DataStreams/WindowBlockInputStream.cpp +++ b/dbms/src/DataStreams/WindowBlockInputStream.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include namespace DB @@ -574,4 +575,21 @@ void WindowBlockInputStream::tryCalculate() peer_group_number = 1; } } + +void WindowBlockInputStream::appendInfo(FmtBuffer & buffer) const +{ + buffer.append(", function: {"); + buffer.joinStr( + window_description.window_functions_descriptions.begin(), + window_description.window_functions_descriptions.end(), + [&](const auto & func, FmtBuffer & b) { + b.append(func.window_function->getName()); + }, + ", "); + buffer.fmtAppend( + "}}, frame: {{type: {}, boundary_begin: {}, boundary_end: {}}}", + frameTypeToString(window_description.frame.type), + boundaryTypeToString(window_description.frame.begin_type), + boundaryTypeToString(window_description.frame.end_type)); +} } // namespace DB diff --git a/dbms/src/DataStreams/WindowBlockInputStream.h b/dbms/src/DataStreams/WindowBlockInputStream.h index 46b18dec1ee..0ef23aa9f6f 100644 --- a/dbms/src/DataStreams/WindowBlockInputStream.h +++ b/dbms/src/DataStreams/WindowBlockInputStream.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -169,6 +170,7 @@ class WindowBlockInputStream : public IProfilingBlockInputStream protected: Block readImpl() override; + void appendInfo(FmtBuffer & buffer) const override; LoggerPtr log; diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index 82f894905e6..fec76d7a085 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -24,13 +24,13 @@ #include #include #include -#include #include #include #include namespace DB { +using ASTPartitionByElement = ASTOrderByElement; void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & val_field, tipb::Expr * expr, Int32 collator_id) { *(expr->mutable_field_type()) = columnInfoToFieldType(ci); @@ -190,6 +190,12 @@ std::unordered_map agg_func_name_to_sig({ {"group_concat", tipb::ExprType::GroupConcat}, }); +std::unordered_map window_func_name_to_sig({ + {"RowNumber", tipb::ExprType::RowNumber}, + {"Rank", tipb::ExprType::Rank}, + {"DenseRank", tipb::ExprType::DenseRank}, +}); + DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input) { DAGColumnInfo output = input; @@ -1343,6 +1349,105 @@ void Join::toMPPSubPlan(size_t & executor_index, const DAGProperties & propertie exchange_map[left_exchange_receiver->name] = std::make_pair(left_exchange_receiver, left_exchange_sender); exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); } + +bool Window::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeWindow); + tipb_executor->set_executor_id(name); + tipb::Window * window = tipb_executor->mutable_window(); + auto & input_schema = children[0]->output_schema; + for (const auto & expr : func_descs) + { + tipb::Expr * window_expr = window->add_func_desc(); + const auto * window_func = typeid_cast(expr.get()); + for (const auto & arg : window_func->arguments->children) + { + tipb::Expr * func = window_expr->add_children(); + astToPB(input_schema, arg, func, collator_id, context); + } + auto window_sig_it = window_func_name_to_sig.find(window_func->name); + if (window_sig_it == window_func_name_to_sig.end()) + throw Exception(fmt::format("Unsupported window function {}", window_func->name), ErrorCodes::LOGICAL_ERROR); + auto window_sig = window_sig_it->second; + window_expr->set_tp(window_sig); + auto * ft = window_expr->mutable_field_type(); + // TODO: Maybe more window functions with different field type. + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagBinary); + ft->set_collate(collator_id); + ft->set_flen(21); + ft->set_decimal(-1); + } + + for (const auto & child : order_by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = window->add_order_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + for (const auto & child : partition_by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = window->add_partition_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + if (frame.type.has_value()) + { + tipb::WindowFrame * mut_frame = window->mutable_frame(); + mut_frame->set_type(frame.type.value()); + if (frame.start.has_value()) + { + auto * start = mut_frame->mutable_start(); + start->set_offset(std::get<2>(frame.start.value())); + start->set_unbounded(std::get<1>(frame.start.value())); + start->set_type(std::get<0>(frame.start.value())); + } + + if (frame.end.has_value()) + { + auto * end = mut_frame->mutable_end(); + end->set_offset(std::get<2>(frame.end.value())); + end->set_unbounded(std::get<1>(frame.end.value())); + end->set_type(std::get<0>(frame.end.value())); + } + } + + auto * children_executor = window->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + +bool Sort::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeSort); + tipb_executor->set_executor_id(name); + tipb::Sort * sort = tipb_executor->mutable_sort(); + sort->set_ispartialsort(is_partial_sort); + + for (const auto & child : by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = sort->add_byitems(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + auto * children_executor = sort->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + } // namespace mock ExecutorPtr compileTableScan(size_t & executor_index, TableInfo & table_info, String & table_alias, bool append_pk_column) @@ -1561,11 +1666,101 @@ ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, ti return exchange_sender; } - ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema) { ExecutorPtr exchange_receiver = std::make_shared(executor_index, schema); return exchange_receiver; } +ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame) +{ + std::vector partition_columns; + if (partition_by_expr_list != nullptr) + { + for (const auto & child : partition_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); + partition_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + + std::vector order_columns; + if (order_by_expr_list != nullptr) + { + for (const auto & child : order_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + + DAGSchema output_schema; + output_schema.insert(output_schema.end(), input->output_schema.begin(), input->output_schema.end()); + + std::vector window_exprs; + if (func_desc_list != nullptr) + { + for (const auto & expr : func_desc_list->children) + { + const auto * func = typeid_cast(expr.get()); + window_exprs.push_back(expr); + std::vector children_ci; + for (const auto & arg : func->arguments->children) + { + children_ci.push_back(compileExpr(input->output_schema, arg)); + } + // TODO: add more window functions + TiDB::ColumnInfo ci; + switch (window_func_name_to_sig[func->name]) + { + case tipb::ExprType::RowNumber: + case tipb::ExprType::Rank: + case tipb::ExprType::DenseRank: + { + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagBinary; + break; + } + default: + throw Exception(fmt::format("Unsupported window function {}", func->name), ErrorCodes::LOGICAL_ERROR); + } + output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); + } + } + + ExecutorPtr window = std::make_shared( + executor_index, + output_schema, + window_exprs, + std::move(partition_columns), + std::move(order_columns), + frame); + window->children.push_back(input); + return window; +} + +ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort) +{ + std::vector order_columns; + if (order_by_expr_list != nullptr) + { + for (const auto & child : order_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + ExecutorPtr sort = std::make_shared(executor_index, input->output_schema, std::move(order_columns), is_partial_sort); + sort->children.push_back(input); + return sort; +} } // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index 37d3f22b6e1..cbd2e5ade3a 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,8 @@ #include #include +#include + namespace DB { namespace ErrorCodes @@ -272,6 +275,54 @@ struct Join : Executor void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; }; + +using MockWindowFrameBound = std::tuple; + +struct MockWindowFrame +{ + std::optional type; + std::optional start; + std::optional end; + // TODO: support calcFuncs +}; + +struct Window : Executor +{ + std::vector func_descs; + std::vector partition_by_exprs; + std::vector order_by_exprs; + MockWindowFrame frame; + + Window(size_t & index_, const DAGSchema & output_schema_, std::vector func_descs_, std::vector partition_by_exprs_, std::vector order_by_exprs_, MockWindowFrame frame_) + : Executor(index_, "window_" + std::to_string(index_), output_schema_) + , func_descs(std::move(func_descs_)) + , partition_by_exprs(std::move(partition_by_exprs_)) + , order_by_exprs(order_by_exprs_) + , frame(frame_) + { + } + // Currently only use Window Executor in Unit Test which don't call columnPrume. + // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; +}; + +struct Sort : Executor +{ + std::vector by_exprs; + bool is_partial_sort; + + Sort(size_t & index_, const DAGSchema & output_schema_, std::vector by_exprs_, bool is_partial_sort_) + : Executor(index_, "sort_" + std::to_string(index_), output_schema_) + , by_exprs(by_exprs_) + , is_partial_sort(is_partial_sort_) + { + } + // Currently only use Sort Executor in Unit Test which don't call columnPrume. + // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; +}; } // namespace mock using ExecutorPtr = std::shared_ptr; @@ -294,8 +345,9 @@ ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, ti ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema); -void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); +ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame); -//TODO: add compileWindow +ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort); +void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); } // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 3b7112af02d..9f201006a88 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -151,11 +151,9 @@ class DAGExpressionAnalyzer : private boost::noncopyable void appendCastAfterWindow( const ExpressionActionsPtr & actions, const tipb::Window & window, - const size_t window_columns_start_index); + size_t window_columns_start_index); -#ifndef DBMS_PUBLIC_GTEST private: -#endif NamesAndTypes buildOrderColumns( const ExpressionActionsPtr & actions, const ::google::protobuf::RepeatedPtrField & order_by); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index e68c4f91cee..0b3b2db9623 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -54,9 +54,7 @@ class DAGQueryBlockInterpreter BlockInputStreams execute(); -#ifndef DBMS_PUBLIC_GTEST private: -#endif void executeImpl(DAGPipeline & pipeline); void handleMockTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); void handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 87f58131c8c..bea26fe9f99 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -29,7 +29,6 @@ #include namespace DB { - const Int8 VAR_SIZE = 0; extern const String uniq_raw_res_name; @@ -770,6 +769,10 @@ const String & getFunctionName(const tipb::Expr & expr) { return getAggFunctionName(expr); } + else if (isWindowFunctionExpr(expr)) + { + return getWindowFunctionName(expr); + } else { auto it = scalar_func_map.find(expr.sig()); diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index b68279faa13..87744c553e0 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -45,19 +45,36 @@ bool collectForAgg(std::vector & output_field_types, const tipb { for (const auto & expr : agg.agg_func()) { - if (!exprHasValidFieldType(expr)) + if (unlikely(!exprHasValidFieldType(expr))) throw TiFlashException("Agg expression without valid field type", Errors::Coprocessor::BadRequest); output_field_types.push_back(expr.field_type()); } for (const auto & expr : agg.group_by()) { - if (!exprHasValidFieldType(expr)) + if (unlikely(!exprHasValidFieldType(expr))) throw TiFlashException("Group by expression without valid field type", Errors::Coprocessor::BadRequest); output_field_types.push_back(expr.field_type()); } return false; } +bool collectForExecutor(std::vector & output_field_types, const tipb::Executor & executor); +bool collectForWindow(std::vector & output_field_types, const tipb::Executor & executor) +{ + // collect output_field_types of child + getChildren(executor).forEach([&output_field_types](const tipb::Executor & child) { + traverseExecutorTree(child, [&output_field_types](const tipb::Executor & e) { return collectForExecutor(output_field_types, e); }); + }); + + for (const auto & expr : executor.window().func_desc()) + { + if (unlikely(!exprHasValidFieldType(expr))) + throw TiFlashException("Window expression without valid field type", Errors::Coprocessor::BadRequest); + output_field_types.push_back(expr.field_type()); + } + return false; +} + bool collectForReceiver(std::vector & output_field_types, const tipb::ExchangeReceiver & receiver) { for (const auto & field_type : receiver.field_types()) @@ -82,7 +99,6 @@ bool collectForTableScan(std::vector & output_field_types, cons return false; } -bool collectForExecutor(std::vector & output_field_types, const tipb::Executor & executor); bool collectForJoin(std::vector & output_field_types, const tipb::Executor & executor) { // collect output_field_types of children @@ -147,8 +163,8 @@ bool collectForExecutor(std::vector & output_field_types, const case tipb::ExecType::TypeWindow: // Window will only be pushed down in mpp mode. // In mpp mode, ExchangeSender or Sender will return output_field_types directly. - // If not in mpp mode, window executor type is invalid. - throw TiFlashException("Window executor type is invalid in non-mpp mode, should not reach here.", Errors::Coprocessor::Internal); + // If not in mpp mode or debug mode, window executor type is invalid. + return collectForWindow(output_field_types, executor); case tipb::ExecType::TypeExchangeReceiver: return collectForReceiver(output_field_types, executor.exchange_receiver()); case tipb::ExecType::TypeTableScan: diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp index a6bb8ff1702..ba7d8fd15ee 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -385,5 +385,85 @@ CreatingSets } CATCH +TEST_F(InterpreterExecuteTest, Window) +try +{ + auto request = context + .scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "RowNumber()"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + Expression: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .sort({{"s1", true}, {"s2", false}}, true) + .project({"s1", "s2", "s3"}) + .window(RowNumber(), {"s1", true}, {"s1", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "s3", "RowNumber()"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + Expression: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Union: + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } +} +CATCH + } // namespace tests } // namespace DB \ No newline at end of file diff --git a/dbms/src/Interpreters/WindowDescription.cpp b/dbms/src/Interpreters/WindowDescription.cpp index 2ab407bb18e..09d96411673 100644 --- a/dbms/src/Interpreters/WindowDescription.cpp +++ b/dbms/src/Interpreters/WindowDescription.cpp @@ -44,7 +44,7 @@ WindowFrame::FrameType getFrameTypeFromTipb(const tipb::WindowFrameType & type) return WindowFrame::FrameType::Groups; default: throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Unknowed frame type {}", + "Unknown frame type {}", type); } } @@ -60,4 +60,38 @@ void WindowDescription::setWindowFrame(const tipb::WindowFrame & frame_) frame.end_preceding = (frame_.end().type() == tipb::WindowBoundType::Preceding); frame.is_default = false; } + +String frameTypeToString(const WindowFrame::FrameType & type) +{ + switch (type) + { + case WindowFrame::FrameType::Rows: + return "Rows"; + case WindowFrame::FrameType::Groups: + return "Groups"; + case WindowFrame::FrameType::Ranges: + return "Ranges"; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unknown frame type {}", + type); + } +} + +String boundaryTypeToString(const WindowFrame::BoundaryType & type) +{ + switch (type) + { + case WindowFrame::BoundaryType::Unbounded: + return "Unbounded"; + case WindowFrame::BoundaryType::Current: + return "Current"; + case WindowFrame::BoundaryType::Offset: + return "Offset"; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unknown boundary type {}", + type); + } +} } // namespace DB diff --git a/dbms/src/Interpreters/WindowDescription.h b/dbms/src/Interpreters/WindowDescription.h index cdcade1b750..a3c2bac5747 100644 --- a/dbms/src/Interpreters/WindowDescription.h +++ b/dbms/src/Interpreters/WindowDescription.h @@ -87,6 +87,10 @@ struct WindowFrame && other.end_preceding == end_preceding; } }; + +String frameTypeToString(const WindowFrame::FrameType & type); +String boundaryTypeToString(const WindowFrame::BoundaryType & type); + class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; struct WindowDescription diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 67a21d12286..881ebaf88db 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -12,11 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include #include #include + +#include + namespace DB::tests { DAGContext & ExecutorTest::getDAGContext() @@ -34,15 +38,20 @@ void ExecutorTest::initializeContext() void ExecutorTest::SetUpTestCase() { - try - { - DB::registerFunctions(); - DB::registerAggregateFunctions(); - } - catch (DB::Exception &) - { - // Maybe another test has already registered, ignore exception here. - } + auto register_func = [](std::function func) { + try + { + func(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } + }; + + register_func(DB::registerFunctions); + register_func(DB::registerAggregateFunctions); + register_func(DB::registerWindowFunctions); } void ExecutorTest::initializeClientInfo() @@ -125,6 +134,13 @@ void ExecutorTest::executeStreams(const std::shared_ptr & requ executeStreams(request, context.executorIdColumnsMap(), expect_columns, concurrency); } +void ExecutorTest::executeStreamsWithSingleSource(const std::shared_ptr & request, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns, SourceType type, size_t concurrency) +{ + std::unordered_map source_columns_map; + source_columns_map[getSourceName(type)] = source_columns; + executeStreams(request, source_columns_map, expect_columns, concurrency); +} + void ExecutorTest::dagRequestEqual(const String & expected_string, const std::shared_ptr & actual) { ASSERT_EQ(Poco::trim(expected_string), Poco::trim(ExecutorSerializer().serialize(actual.get()))); diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 977b46abbd2..87bb7115bed 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -15,15 +15,13 @@ #pragma once #include -#include -#include #include #include #include -#include -#include #include #include +#include + namespace DB::tests { void executeInterpreter(const std::shared_ptr & request, Context & context); @@ -52,6 +50,28 @@ class ExecutorTest : public ::testing::Test void executeInterpreter(const String & expected_string, const std::shared_ptr & request, size_t concurrency); + enum SourceType + { + TableScan, + ExchangeReceiver + }; + + // for single source query, the source executor name is ${type}_0 + static String getSourceName(SourceType type) + { + switch (type) + { + case TableScan: + return "table_scan_0"; + case ExchangeReceiver: + return "exchange_receiver_0"; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unknown Executor Source type {}", + type); + } + } + void executeStreams( const std::shared_ptr & request, std::unordered_map & source_columns_map, @@ -62,29 +82,12 @@ class ExecutorTest : public ::testing::Test const ColumnsWithTypeAndName & expect_columns, size_t concurrency = 1); - template - ColumnWithTypeAndName toNullableVec(const std::vector::FieldType>> & v) - { - return createColumn>(v); - } - - template - ColumnWithTypeAndName toVec(const std::vector::FieldType> & v) - { - return createColumn(v); - } - - template - ColumnWithTypeAndName toNullableVec(String name, const std::vector::FieldType>> & v) - { - return createColumn>(v, name); - } - - template - ColumnWithTypeAndName toVec(String name, const std::vector::FieldType> & v) - { - return createColumn(v, name); - } + void executeStreamsWithSingleSource( + const std::shared_ptr & request, + const ColumnsWithTypeAndName & source_columns, + const ColumnsWithTypeAndName & expect_columns, + SourceType type = TableScan, + size_t concurrency = 1); protected: MockDAGRequestContext context; diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index dae07f7123b..637fbf51c00 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -242,5 +242,37 @@ ColumnWithTypeAndName createOnlyNullColumn(size_t size, const String & name) return {std::move(col), data_type, name}; } +ColumnWithTypeAndName toDatetimeVec(String name, const std::vector & v, int fsp) +{ + std::vector::FieldType> vec; + vec.reserve(v.size()); + for (const auto & value_str : v) + { + Field value = parseMyDateTime(value_str, fsp); + vec.push_back(value.template safeGet()); + } + DataTypePtr data_type = std::make_shared(fsp); + return {makeColumn(data_type, vec), data_type, name, 0}; +} + +ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector & v, int fsp) +{ + std::vector::FieldType>> vec; + vec.reserve(v.size()); + for (const auto & value_str : v) + { + if (!value_str.empty()) + { + Field value = parseMyDateTime(value_str, fsp); + vec.push_back(value.template safeGet()); + } + else + { + vec.push_back({}); + } + } + DataTypePtr data_type = makeNullable(std::make_shared(fsp)); + return {makeColumn>(data_type, vec), data_type, name, 0}; +} } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index 7704c69a89f..d6b7351df05 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -654,6 +654,33 @@ ColumnWithTypeAndName createNullableColumn( return createNullableColumn(data_type_args, vec, null_map, name, 0); } +template +ColumnWithTypeAndName toNullableVec(const std::vector::FieldType>> & v) +{ + return createColumn>(v); +} + +template +ColumnWithTypeAndName toVec(const std::vector::FieldType> & v) +{ + return createColumn(v); +} + +template +ColumnWithTypeAndName toNullableVec(String name, const std::vector::FieldType>> & v) +{ + return createColumn>(v, name); +} + +template +ColumnWithTypeAndName toVec(String name, const std::vector::FieldType> & v) +{ + return createColumn(v, name); +} + +ColumnWithTypeAndName toDatetimeVec(String name, const std::vector & v, int fsp); + +ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector & v, int fsp); class FunctionTest : public ::testing::Test { protected: diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index b8d2b039bd2..a0ae4b11270 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -204,6 +204,66 @@ void serializeExchangeReceiver(const String & executor_id, const tipb::ExchangeR buf.append("}\n"); } +void serializeWindow(const String & executor_id, const tipb::Window & window [[maybe_unused]], FmtBuffer & buf) +{ + buf.fmtAppend("{} | partition_by: {{", executor_id); + buf.joinStr( + window.partition_by().begin(), + window.partition_by().end(), + [&](const auto & partition_by, FmtBuffer & fb) { + fb.append("("); + serializeExpression(partition_by.expr(), buf); + fb.fmtAppend(", desc: {})", partition_by.desc()); + }, + ", "); + buf.append("}}, order_by: {"); + buf.joinStr( + window.order_by().begin(), + window.order_by().end(), + [&](const auto & order_by, FmtBuffer & fb) { + fb.append("("); + serializeExpression(order_by.expr(), buf); + fb.fmtAppend(", desc: {})", order_by.desc()); + }, + ", "); + buf.append("}, func_desc: {"); + buf.joinStr( + window.func_desc().begin(), + window.func_desc().end(), + [&](const auto & func, FmtBuffer &) { + serializeExpression(func, buf); + }, + ", "); + if (window.has_frame()) + { + buf.append("}, frame: {"); + if (window.frame().has_start()) + { + buf.fmtAppend("start<{}, {}, {}>", window.frame().start().type(), window.frame().start().unbounded(), window.frame().start().offset()); + } + if (window.frame().has_end()) + { + buf.fmtAppend(", end<{}, {}, {}>", window.frame().end().type(), window.frame().end().unbounded(), window.frame().end().offset()); + } + } + buf.append("}\n"); +} + +void serializeSort(const String & executor_id, const tipb::Sort & sort [[maybe_unused]], FmtBuffer & buf) +{ + buf.fmtAppend("{} | isPartialSort: {}, partition_by: {{", executor_id, sort.ispartialsort()); + buf.joinStr( + sort.byitems().begin(), + sort.byitems().end(), + [&](const auto & by, FmtBuffer & fb) { + fb.append("("); + serializeExpression(by.expr(), buf); + fb.fmtAppend(", desc: {})", by.desc()); + }, + ", "); + buf.append("}\n"); +} + void ExecutorSerializer::serialize(const tipb::Executor & root_executor, size_t level) { auto append_str = [&level, this](const tipb::Executor & executor) { @@ -248,9 +308,11 @@ void ExecutorSerializer::serialize(const tipb::Executor & root_executor, size_t serializeExchangeSender(executor.executor_id(), executor.exchange_sender(), buf); break; case tipb::ExecType::TypeSort: - throw TiFlashException("Sort executor is not supported", Errors::Coprocessor::Unimplemented); // todo support sort executor. + serializeSort(executor.executor_id(), executor.sort(), buf); + break; case tipb::ExecType::TypeWindow: - throw TiFlashException("Window executor is not supported", Errors::Coprocessor::Unimplemented); // todo support window executor. + serializeWindow(executor.executor_id(), executor.window(), buf); + break; default: throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); } diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index af939002cff..e1ccbdbb010 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -23,8 +23,6 @@ #include #include -#include - namespace DB::tests { ASTPtr buildColumn(const String & column_name) @@ -54,6 +52,15 @@ ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) return exp_list; } +MockWindowFrame buildDefaultRowsFrame() +{ + MockWindowFrame frame; + frame.type = tipb::WindowFrameType::Rows; + frame.end = {tipb::WindowBoundType::CurrentRow, false, 0}; + frame.start = {tipb::WindowBoundType::CurrentRow, false, 0}; + return frame; +} + // a mock DAGRequest should prepare its time_zone, flags, encode_type and output_schema. void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request) { @@ -96,6 +103,9 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String TiDB::ColumnInfo ret; ret.tp = column.second; ret.name = column.first; + // TODO: find a way to assign decimal field's flen. + if (ret.tp == TiDB::TP::TypeNewDecimal) + ret.flen = 65; ret.id = i++; table_info.columns.push_back(std::move(ret)); } @@ -276,6 +286,48 @@ DAGRequestBuilder & DAGRequestBuilder::buildAggregation(ASTPtr agg_funcs, ASTPtr return *this; } +DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame) +{ + assert(root); + auto window_func_list = std::make_shared(); + window_func_list->children.push_back(window_func); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemList({partition_by}), buildOrderByItemList({order_by}), frame); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame) +{ + assert(root); + auto window_func_list = std::make_shared(); + window_func_list->children.push_back(window_func); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemList(partition_by_list), buildOrderByItemList(order_by_list), frame); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::window(MockAsts window_funcs, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame) +{ + assert(root); + auto window_func_list = std::make_shared(); + for (const auto & func : window_funcs) + window_func_list->children.push_back(func); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemList(partition_by_list), buildOrderByItemList(order_by_list), frame); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItem order_by, bool is_partial_sort) +{ + assert(root); + root = compileSort(root, getExecutorIndex(), buildOrderByItemList({order_by}), is_partial_sort); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItems order_by_list, bool is_partial_sort) +{ + assert(root); + root = compileSort(root, getExecutorIndex(), buildOrderByItemList(order_by_list), is_partial_sort); + return *this; +} + void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos) { std::vector v_column_info(columnInfos.size()); diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 88d98158b74..d52b5ec674a 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -20,9 +20,6 @@ #include #include -#include -#include - namespace DB::tests { using MockColumnInfo = std::pair; @@ -31,8 +28,11 @@ using MockColumnInfoList = std::initializer_list; using MockTableName = std::pair; using MockOrderByItem = std::pair; using MockOrderByItems = std::initializer_list; +using MockPartitionByItem = std::pair; +using MockPartitionByItems = std::initializer_list; using MockColumnNames = std::initializer_list; using MockAsts = std::initializer_list; +using MockWindowFrame = mock::MockWindowFrame; class MockDAGRequestContext; @@ -96,6 +96,13 @@ class DAGRequestBuilder DAGRequestBuilder & aggregation(ASTPtr agg_func, ASTPtr group_by_expr); DAGRequestBuilder & aggregation(MockAsts agg_funcs, MockAsts group_by_exprs); + // window + DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame); + DAGRequestBuilder & window(MockAsts window_funcs, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame); + DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame); + DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort); + DAGRequestBuilder & sort(MockOrderByItems order_by_list, bool is_partial_sort); + private: void initDAGRequest(tipb::DAGRequest & dag_request); DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs); @@ -164,6 +171,8 @@ ASTPtr buildLiteral(const Field & field); ASTPtr buildFunction(MockAsts exprs, const String & name); ASTPtr buildOrderByItemList(MockOrderByItems order_by_items); +MockWindowFrame buildDefaultRowsFrame(); + #define col(name) buildColumn((name)) #define lit(field) buildLiteral((field)) #define eq(expr1, expr2) makeASTFunction("equals", (expr1), (expr2)) @@ -174,5 +183,9 @@ ASTPtr buildOrderByItemList(MockOrderByItems order_by_items); #define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2)) #define NOT(expr) makeASTFunction("not", (expr1), (expr2)) #define Max(expr) makeASTFunction("max", expr) +/// Window functions +#define RowNumber() makeASTFunction("RowNumber") +#define Rank() makeASTFunction("Rank") +#define DenseRank() makeASTFunction("DenseRank") } // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 214148fe47f..8bed0f2fc6c 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -252,5 +252,17 @@ try } CATCH +TEST_F(MockDAGRequestTest, MockWindow) +try +{ + auto request = context.scan("test_db", "test_table").sort({"s1", false}, true).window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()).build(context); + { + String expected = "window_2 | partition_by: {(<1, String>, desc: false)}}, order_by: {(<0, String>, desc: true)}, func_desc: {row_number()}, frame: {start<2, false, 0>, end<2, false, 0>}\n" + " sort_1 | isPartialSort: true, partition_by: {(<0, String>, desc: false)}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } +} +CATCH } // namespace tests } // namespace DB \ No newline at end of file diff --git a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp index e4205f6f938..3addf73a642 100644 --- a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp +++ b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp @@ -12,334 +12,192 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include -#include -#include -#include +#include namespace DB::tests { -class WindowFunction : public DB::tests::FunctionTest +class WindowExecutorTestRunner : public DB::tests::ExecutorTest { -protected: - std::shared_ptr mock_interpreter; - - void SetUp() override - { - DB::tests::FunctionTest::SetUp(); - DB::registerWindowFunctions(); - } - - template - ColumnWithTypeAndName toNullableVec(String name, const std::vector::FieldType>> & v) - { - return createColumn>(v, name); - } - - template - ColumnWithTypeAndName toVec(String name, const std::vector::FieldType> & v) - { - return createColumn(v, name); - } - - template - static ColumnWithTypeAndName toConst(const T s) - { - return createConstColumn(1, s); - } - - static ColumnWithTypeAndName toDatetimeVec(String name, const std::vector & v, int fsp) - { - std::vector::FieldType> vec; - for (const auto & value_str : v) - { - Field value = parseMyDateTime(value_str, fsp); - vec.push_back(value.template safeGet()); - } - DataTypePtr data_type = std::make_shared(fsp); - return {makeColumn(data_type, vec), data_type, name, 0}; - } - - static ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector & v, int fsp) - { - std::vector::FieldType>> vec; - for (const auto & value_str : v) - { - if (!value_str.empty()) - { - Field value = parseMyDateTime(value_str, fsp); - vec.push_back(value.template safeGet()); - } - else - { - vec.push_back({}); - } - } - DataTypePtr data_type = makeNullable(std::make_shared(fsp)); - return {makeColumn>(data_type, vec), data_type, name, 0}; - } - - void setMaxBlockSize(int size) - { - context.getSettingsRef().max_block_size.set(size); - } - - void mockInterpreter(std::vector source_columns, Context context) - { - std::vector mock_input_streams_vec = {}; - DAGQueryBlock mock_query_block(0, static_cast>(nullptr)); - std::vector mock_subqueries_for_sets = {}; - mock_interpreter = std::make_shared(context, - mock_input_streams_vec, - mock_query_block, - 1); - - mock_interpreter->analyzer = std::make_unique(std::move(source_columns), context); - } - - void mockExecuteTableScan(DAGPipeline & pipeline, ColumnsWithTypeAndName columns) - { - pipeline.streams.push_back(std::make_shared(columns, context.getSettingsRef().max_block_size)); - mock_interpreter->input_streams_vec.push_back(pipeline.streams); - } - - void mockExecuteWindowOrder(DAGPipeline & pipeline, std::string sort_json_str) +public: + void initializeContext() override { - tipb::Sort sort; - google::protobuf::util::JsonStringToMessage(sort_json_str, &sort); - mock_interpreter->handleWindowOrder(pipeline, sort); - mock_interpreter->input_streams_vec[0] = pipeline.streams; - NamesWithAliases final_project; - for (const auto & column : (*mock_interpreter->analyzer).source_columns) - { - final_project.push_back({column.name, ""}); - } - mockExecuteProject(pipeline, final_project); - } - - void mockExecuteWindow(DAGPipeline & pipeline, std::string window_json_str) - { - tipb::Window window; - google::protobuf::util::JsonStringToMessage(window_json_str, &window); - mock_interpreter->handleWindow(pipeline, window); - mock_interpreter->input_streams_vec[0] = pipeline.streams; - NamesWithAliases final_project; - for (const auto & column : (*mock_interpreter->analyzer).source_columns) - { - final_project.push_back({column.name, ""}); - } - mockExecuteProject(pipeline, final_project); - } - - void mockExecuteProject(DAGPipeline & pipeline, NamesWithAliases & final_project) - { - mock_interpreter->executeProject(pipeline, final_project); - } - - static Block mergeBlocks(Blocks blocks) - { - if (blocks.empty()) - { - return {}; - } - Block sample_block; - std::vector actual_cols; - - for (const auto & block : blocks) - { - if (!sample_block) - { - sample_block = block; - for (const auto & column : block.getColumnsWithTypeAndName()) - { - actual_cols.push_back(column.type->createColumn()); - } - } - - for (size_t i = 0; i < block.columns(); ++i) - { - for (size_t j = 0; j < block.rows(); ++j) - { - actual_cols[i]->insert((*(block.getColumnsWithTypeAndName())[i].column)[j]); - } - } - } - - ColumnsWithTypeAndName actual_columns; - - for (size_t i = 0; i < actual_cols.size(); ++i) - { - actual_columns.push_back({std::move(actual_cols[i]), sample_block.getColumnsWithTypeAndName()[i].type, sample_block.getColumnsWithTypeAndName()[i].name, sample_block.getColumnsWithTypeAndName()[i].column_id}); - } - return Block(actual_columns); - } - - void testOneWindowFunction(const std::vector & source_column_types, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns, const std::string window_json_str, const std::string sort_json_str) - { - mockInterpreter(source_column_types, context); - DAGPipeline pipeline; - ExpressionActionsChain chain; - Block except_block(expect_columns); - - mockExecuteTableScan(pipeline, source_columns); - - mockExecuteWindowOrder(pipeline, sort_json_str); - - mockExecuteWindow(pipeline, window_json_str); - - auto stream = pipeline.firstStream(); - - Blocks actual_blocks; - while (Block block = stream->read()) - { - actual_blocks.push_back(block); - } - - Block actual_block = mergeBlocks(actual_blocks); - - if (actual_block) - { - // Check that input columns is properly split to many blocks - ASSERT_EQ(actual_blocks.size(), (actual_block.rows() - 1) / context.getSettingsRef().max_block_size + 1); - } - ASSERT_BLOCK_EQ(except_block, actual_block); + ExecutorTest::initializeContext(); + context.addMockTable( + {"test_db", "test_table"}, + {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); + context.addMockTable( + {"test_db", "test_table_string"}, + {{"partition", TiDB::TP::TypeString}, {"order", TiDB::TP::TypeString}}, + {toVec("partition", {"banana", "banana", "banana", "banana", "apple", "apple", "apple", "apple"}), + toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"})}); + + context.addMockTable( + {"test_db", "test_table_more_cols"}, + {{"partition1", TiDB::TP::TypeLongLong}, {"partition2", TiDB::TP::TypeLongLong}, {"order1", TiDB::TP::TypeLongLong}, {"order2", TiDB::TP::TypeLongLong}}, + {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toVec("order1", {2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}), + toVec("order2", {2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1})}); + + context.addMockTable( + {"test_db", "test_table_float64"}, + {{"partition", TiDB::TP::TypeDouble}, {"order", TiDB::TP::TypeDouble}}, + {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}); + + context.addMockTable( + {"test_db", "test_table_datetime"}, + {{"partition", TiDB::TP::TypeDatetime}, {"order", TiDB::TP::TypeDatetime}}); + + context.addMockTable( + {"test_db", "test_table_for_rank"}, + {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); } }; -TEST_F(WindowFunction, testWindowFunctionByPartitionAndOrder) +TEST_F(WindowExecutorTestRunner, testWindowFunctionByPartitionAndOrder) try { - setMaxBlockSize(3); - - std::string window_json; - std::string sort_json; - /***** row_number with different types of input *****/ // int - sql : select *, row_number() over w1 from test1 window w1 as (partition by partition_int order by order_int) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAkMCV6NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAkMCV6NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + auto request = context + .scan("test_db", "test_table") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + executeStreams( + request, + {toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); // null input - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + executeStreamsWithSingleSource( + request, {toNullableVec("partition", {}), toNullableVec("order", {})}, - {}, - window_json, - sort_json); + {}); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + executeStreamsWithSingleSource( + request, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), {toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}}, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); // string - sql : select *, row_number() over w1 from test2 window w1 as (partition by partition_string order by order_string) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGA8Nz57tP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGA8Nz57tP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {"banana", "banana", "banana", "banana", "apple", "apple", "apple", "apple"}), toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"})}, - {toVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + request = context + .scan("test_db", "test_table_string") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + + executeStreams( + request, + {toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}, - {toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); - - // decimal - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_decimal) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAoN3M99P+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAoN3M99P+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, - {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + executeStreamsWithSingleSource( + request, + {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}, + {toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); + + // float64 - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_float64) + request = context + .scan("test_db", "test_table_float64") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + + executeStreams( + request, + {toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, - {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + executeStreamsWithSingleSource( + request, + {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, + {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); // datetime - select *, row_number() over w1 from test4 window w1 as (partition by partition_datetime order by order_datetime); - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsNmBhdT+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsNmBhdT+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + request = context + .scan("test_db", "test_table_datetime") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + executeStreamsWithSingleSource( + request, + {toNullableDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, - {toDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), - toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + {toNullableDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + executeStreamsWithSingleSource( + request, {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, {toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); // 2 partiton key and 2 order key // sql : select *, row_number() over w1 from test6 window w1 as (partition by partition_int1, partition_int2 order by order_int1,order_int2) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIKA0Img1If/BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIKA0Img1If/BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition1", std::make_shared()), NameAndTypePair("partition2", std::make_shared()), NameAndTypePair("order1", std::make_shared()), NameAndTypePair("order2", std::make_shared())}, - {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), toVec("order1", {2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}), toVec("order2", {2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1})}, - {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), toVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), toVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})}, - window_json, - sort_json); + request = context + .scan("test_db", "test_table_more_cols") + .sort({{"partition1", false}, {"partition2", false}, {"order1", false}, {"order2", false}}, true) + .window(RowNumber(), {{"order1", false}, {"order2", false}}, {{"partition1", false}, {"partition2", false}}, buildDefaultRowsFrame()) + .build(context); + + executeStreams( + request, + {toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), + toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})}); /***** rank, dense_rank *****/ - window_json = R"({"funcDesc":[{"tp":"Rank","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false},{"tp":"DenseRank","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsOnl3NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsOnl3NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})}, - window_json, - sort_json); + request = context.scan("test_db", "test_table_for_rank").sort({{"partition", false}, {"order", false}}, true).window({Rank(), DenseRank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame{}).build(context); + executeStreams( + request, + {toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})}); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})}, - window_json, - sort_json); - - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})}, - window_json, - sort_json); + executeStreamsWithSingleSource( + request, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})}); + + executeStreamsWithSingleSource( + request, + {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}, + {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})}); } CATCH + } // namespace DB::tests