diff --git a/.gitmodules b/.gitmodules index 8472d78404e..335e1dbd9c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,6 @@ [submodule "contrib/cpu_features"] path = contrib/cpu_features url = https://github.com/google/cpu_features +[submodule "contrib/arm-optimized-routines"] + path = contrib/arm-optimized-routines + url = https://github.com/ARM-software/optimized-routines diff --git a/README.md b/README.md index 02af727105b..aa64e39d5ba 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,30 @@ LSAN_OPTIONS=suppressions=$WORKSPACE/tiflash/test/sanitize/asan.suppression ## Run Integration Tests -TBD. +1. Build your own tiflash binary in $BUILD with `-DCMAKE_BUILD_TYPE=DEBUG`. +``` +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=DEBUG +ninja tiflash +``` +2. Run tidb cluster locally using tiup playgroud or other tools. +``` +tiup playground nightly --tiflash.binpath $BUILD/dbms/src/Server/tiflash +``` +3. Check $WORKSPACE/tests/_env.sh to make the port and build dir right. +4. Run your integration tests using commands like "./run-test.sh fullstack-test2/ddl" under $WORKSPACE dir + +## Run MicroBenchmark Tests + +To run micro benchmark tests, you need to build with -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON: + +```shell +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON +ninja bench_dbms +``` + +And the microbenchmark-test executables are at `$BUILD/dbms/bench_dbms`, you can run it with `./bench_dbms` or `./bench_dbms --benchmark_filter=xxx` . More usage please check with `./bench_dbms --help`. ## Generate LLVM Coverage Report diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 71f81ae3ee5..4520d1cb176 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -165,3 +165,7 @@ add_subdirectory(benchmark) set (BUILD_TESTING OFF CACHE BOOL "Disable cpu-features testing" FORCE) add_subdirectory(cpu_features) + +if (ARCH_AARCH64 AND ARCH_LINUX) + add_subdirectory(arm-optimized-routines-cmake) +endif () diff --git a/contrib/arm-optimized-routines b/contrib/arm-optimized-routines new file mode 160000 index 00000000000..e373f659523 --- /dev/null +++ b/contrib/arm-optimized-routines @@ -0,0 +1 @@ +Subproject commit e373f6595230087a8ddea449bfb14b47150b4059 diff --git a/contrib/arm-optimized-routines-cmake/CMakeLists.txt b/contrib/arm-optimized-routines-cmake/CMakeLists.txt new file mode 100644 index 00000000000..89baa7222f3 --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/CMakeLists.txt @@ -0,0 +1,45 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This library is to override performance-critical routines for aarch64 targets. +# The implementations are imported from official ARM repo. +# To reduce dispatching cost, indirect function technique is utilized. Therefore, +# this library should only be enabled with ELF targets. + +# Considerations: +# - By Jun, 2022, most enterprise OSs (CentOS 7, CentOS Stream 8 and RHEL 8) still +# use relatively old glibc on ARM64, where ASIMD, MTE, DC ZVA and SVE are not +# fully utilized. However, it is becoming increasingly common to use ARM64 instances +# in cloud-native situations. +# - `optimized-routines` repo is actively maintained by ARM officials. Therefore, +# the qualities can be ensured while using it also enables us to keep sync with latest +# acceleration techniques. + +set(CMAKE_C_FLAGS "") +ENABLE_LANGUAGE(C) +ENABLE_LANGUAGE(ASM) +set(TIFLASH_AOR_DIR ../arm-optimized-routines) + +file(GLOB TIFLASH_AARCH64_STRING_FILES ${TIFLASH_AOR_DIR}/string/aarch64/*.S) +add_library(tiflash-aarch64-string STATIC ${TIFLASH_AARCH64_STRING_FILES} src/aor.c) +target_compile_options(tiflash-aarch64-string PRIVATE -march=armv8-a+sve) +target_include_directories(tiflash-aarch64-string PRIVATE ${TIFLASH_AOR_DIR}/string/include) + +file(GLOB TIFLASH_AARCH64_MATH_FILES ${TIFLASH_AOR_DIR}/math/*.c) +add_library(tiflash-aarch64-math STATIC ${TIFLASH_AARCH64_MATH_FILES}) +target_include_directories(tiflash-aarch64-math PRIVATE ${TIFLASH_AOR_DIR}/math/include) + +# it is reasonable to keep these libraries optimized +target_compile_options(tiflash-aarch64-string PRIVATE -O3 -g3 -fno-omit-frame-pointer -ffunction-sections -fdata-sections) +target_compile_options(tiflash-aarch64-math PRIVATE -O3 -g3 -fno-omit-frame-pointer -ffunction-sections -fdata-sections) diff --git a/contrib/arm-optimized-routines-cmake/src/aor.c b/contrib/arm-optimized-routines-cmake/src/aor.c new file mode 100644 index 00000000000..daff1df3c4b --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/src/aor.c @@ -0,0 +1,115 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +// Provide default macro definitions in case that they are not defined on current linux distro. +// For example, TiFlash compiled on older linux kernels may also be used in newer ones. +// These values should be stable for Linux: only false negative is expected when running on +// older kernels, but it is acceptable as `google/cpu_features` is also doing so. +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif + +/// check if MTE is supported in current environment +static inline bool mte_supported(void) +{ + return (getauxval(AT_HWCAP2) & HWCAP2_MTE) != 0; +} + +/// check if SVE is supported in current environment +static inline bool sve_supported(void) +{ + return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0; +} + +#define STRINGIFY_IMPL(X) #X +#define STRINGIFY(X) STRINGIFY_IMPL(X) +/** + * \brief + * Symbol is defined as hidden visibility. Therefore, implementations here are only to override routines with TiFlash + * binary itself. This is because dependencies like `ld.so`, `libgcc_s.so`, etc will need essential routines like + * `memcpy` to finish the early loading procedure. Therefore, declare such symbols as visible indirect function will + * create cyclic dependency. It shall be good enough to override symbols within TiFlash, as most heavy computation works + * are happening in the main binary. + * \param NAME: exported symbol name + * \param SVE: preferred implementation when SVE is available + * \param MTE: preferred implementation when MTE is available + * \param ASIMD: preferred implementation for generic aarch64 targets (ASIMD is required by default for Armv8 and above) + */ +#define DISPATCH(NAME, SVE, MTE, ASIMD) \ + extern typeof(ASIMD) __tiflash_##NAME __attribute__((ifunc(STRINGIFY(__tiflash_##NAME##_resolver)))); \ + extern typeof(ASIMD) NAME __attribute__((visibility("hidden"), alias(STRINGIFY(__tiflash_##NAME)))); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wunused-function\"") static typeof(ASIMD) * __tiflash_##NAME##_resolver(void) \ + { \ + if (sve_supported()) \ + { \ + return SVE; \ + } \ + if (mte_supported()) \ + { \ + return MTE; \ + } \ + return ASIMD; \ + } \ + _Pragma("GCC diagnostic pop") +#undef memcpy +#undef memmove +#undef memset +#undef memchr +#undef memrchr +#undef memcmp +#undef strcpy +#undef stpcpy +#undef strcmp +#undef strchr +#undef strrchr +#undef strchrnul +#undef strlen +#undef strnlen +#undef strncmp + +DISPATCH(memcpy, __memcpy_aarch64_sve, __memcpy_aarch64_simd, __memcpy_aarch64_simd) +DISPATCH(memmove, __memmove_aarch64_sve, __memmove_aarch64_simd, __memmove_aarch64_simd) +DISPATCH(memset, __memset_aarch64, __memset_aarch64, __memset_aarch64) +DISPATCH(memchr, __memchr_aarch64_sve, __memchr_aarch64_mte, __memchr_aarch64) +DISPATCH(memrchr, __memrchr_aarch64, __memrchr_aarch64, __memrchr_aarch64) +DISPATCH(memcmp, __memcmp_aarch64_sve, __memcmp_aarch64, __memcmp_aarch64) +DISPATCH(strcpy, __strcpy_aarch64_sve, __strcpy_aarch64, __strcpy_aarch64) +DISPATCH(stpcpy, __stpcpy_aarch64_sve, __stpcpy_aarch64, __stpcpy_aarch64) +DISPATCH(strcmp, __strcmp_aarch64_sve, __strcmp_aarch64, __strcmp_aarch64) +DISPATCH(strchr, __strchr_aarch64_sve, __strchr_aarch64_mte, __strchr_aarch64) +DISPATCH(strrchr, __strrchr_aarch64_sve, __strrchr_aarch64_mte, __strrchr_aarch64) +DISPATCH(strchrnul, __strchrnul_aarch64_sve, __strchrnul_aarch64_mte, __strchrnul_aarch64) +DISPATCH(strlen, __strlen_aarch64_sve, __strlen_aarch64_mte, __strlen_aarch64) +DISPATCH(strnlen, __strnlen_aarch64_sve, __strnlen_aarch64, __strnlen_aarch64) +DISPATCH(strncmp, __strncmp_aarch64_sve, __strncmp_aarch64, __strncmp_aarch64) \ No newline at end of file diff --git a/contrib/client-c b/contrib/client-c index 36e05cb0f24..034d1e782cb 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 36e05cb0f24c085785abf367176dac2a45bfd67b +Subproject commit 034d1e782cb4697f99b09b679c00dade00f19dd5 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index e1e52fab73b..0df79f89a84 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -316,6 +316,7 @@ if (ENABLE_TESTS) ${TiFlash_SOURCE_DIR}/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp ) target_include_directories(bench_dbms BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR} ${benchmark_SOURCE_DIR}/include) + target_compile_definitions(bench_dbms PUBLIC DBMS_PUBLIC_GTEST) target_link_libraries(bench_dbms gtest dbms test_util_bench_main benchmark clickhouse_functions) if (ENABLE_TIFLASH_DTWORKLOAD) diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 61a2843ac59..e21bde19a47 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -38,12 +38,6 @@ #include #endif - -namespace CurrentMetrics -{ -extern const Metric SendExternalTables; -} - namespace DB { namespace ErrorCodes @@ -434,8 +428,6 @@ void Connection::sendExternalTablesData(ExternalTablesData & data) size_t maybe_compressed_out_bytes = maybe_compressed_out ? maybe_compressed_out->count() : 0; size_t rows = 0; - CurrentMetrics::Increment metric_increment{CurrentMetrics::SendExternalTables}; - for (auto & elem : data) { elem.first->readPrefix(); diff --git a/dbms/src/Client/ConnectionPoolWithFailover.cpp b/dbms/src/Client/ConnectionPoolWithFailover.cpp index a9b6825a3fe..179b2d92c0e 100644 --- a/dbms/src/Client/ConnectionPoolWithFailover.cpp +++ b/dbms/src/Client/ConnectionPoolWithFailover.cpp @@ -20,13 +20,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event DistributedConnectionMissingTable; -extern const Event DistributedConnectionStaleReplica; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -50,7 +43,7 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( hostname_differences.resize(nested_pools.size()); for (size_t i = 0; i < nested_pools.size(); ++i) { - ConnectionPool & connection_pool = dynamic_cast(*nested_pools[i]); + auto & connection_pool = dynamic_cast(*nested_pools[i]); hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); } } @@ -187,7 +180,6 @@ ConnectionPoolWithFailover::tryGetEntry( fail_message = "There is no table " + table_to_check->database + "." + table_to_check->table + " on server: " + result.entry->getDescription(); LOG_WARNING(log, fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable); return result; } @@ -217,7 +209,6 @@ ConnectionPoolWithFailover::tryGetEntry( table_to_check->database, table_to_check->table, delay); - ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica); } } catch (const Exception & e) diff --git a/dbms/src/Common/Arena.h b/dbms/src/Common/Arena.h index c61ebfca8aa..ebaaf607a6d 100644 --- a/dbms/src/Common/Arena.h +++ b/dbms/src/Common/Arena.h @@ -24,13 +24,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ArenaAllocChunks; -extern const Event ArenaAllocBytes; -} // namespace ProfileEvents - namespace DB { /** Memory pool to append something. For example, short strings. @@ -55,9 +48,6 @@ class Arena : private boost::noncopyable Chunk(size_t size_, Chunk * prev_) { - ProfileEvents::increment(ProfileEvents::ArenaAllocChunks); - ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_); - begin = reinterpret_cast(Allocator::alloc(size_)); pos = begin; end = begin + size_; diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index 8a2f111d882..b7ce9fd1e89 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -17,36 +17,11 @@ /// Available metrics. Add something here as you wish. #define APPLY_FOR_METRICS(M) \ - M(Query) \ - M(Merge) \ - M(ReplicatedFetch) \ - M(ReplicatedSend) \ - M(ReplicatedChecks) \ - M(BackgroundPoolTask) \ - M(DiskSpaceReservedForMerge) \ - M(DistributedSend) \ - M(QueryPreempted) \ - M(TCPConnection) \ - M(HTTPConnection) \ - M(InterserverConnection) \ M(OpenFileForRead) \ M(OpenFileForWrite) \ M(OpenFileForReadWrite) \ - M(SendExternalTables) \ - M(QueryThread) \ - M(ReadonlyReplica) \ - M(LeaderReplica) \ M(MemoryTracking) \ M(MemoryTrackingInBackgroundProcessingPool) \ - M(MemoryTrackingForMerges) \ - M(LeaderElection) \ - M(EphemeralNode) \ - M(DelayedInserts) \ - M(ContextLockWait) \ - M(StorageBufferRows) \ - M(StorageBufferBytes) \ - M(DictCacheRequests) \ - M(Revision) \ M(PSMVCCNumSnapshots) \ M(PSMVCCSnapshotsList) \ M(RWLockWaitingReaders) \ diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index c6c3caa44ad..ad5010d7826 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -12,7 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include +#include +#include +#include +#include #include #include @@ -21,7 +27,6 @@ namespace DB { std::unordered_map> FailPointHelper::fail_point_wait_channels; - #define APPLY_FOR_FAILPOINTS_ONCE(M) \ M(exception_between_drop_meta_and_data) \ M(exception_between_alter_data_and_meta) \ @@ -85,33 +90,54 @@ std::unordered_map> FailPointHelper::f M(force_remote_read_for_batch_cop) \ M(force_context_path) \ M(force_slow_page_storage_snapshot_release) \ - M(force_change_all_blobs_to_read_only) - -#define APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) \ - M(pause_with_alter_locks_acquired) \ - M(hang_in_execution) \ - M(pause_before_dt_background_delta_merge) \ - M(pause_until_dt_background_delta_merge) \ - M(pause_before_apply_raft_cmd) \ - M(pause_before_apply_raft_snapshot) \ - M(pause_until_apply_raft_snapshot) \ + M(force_change_all_blobs_to_read_only) \ + M(unblock_query_init_after_write) + + +#define APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) \ + M(pause_with_alter_locks_acquired) \ + M(hang_in_execution) \ + M(pause_before_dt_background_delta_merge) \ + M(pause_until_dt_background_delta_merge) \ + M(pause_before_apply_raft_cmd) \ + M(pause_before_apply_raft_snapshot) \ + M(pause_until_apply_raft_snapshot) \ M(pause_after_copr_streams_acquired_once) -#define APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) \ - M(pause_when_reading_from_dt_stream) \ - M(pause_when_writing_to_dt_store) \ - M(pause_when_ingesting_to_dt_store) \ - M(pause_when_altering_dt_store) \ - M(pause_after_copr_streams_acquired) \ - M(pause_before_server_merge_one_delta) +#define APPLY_FOR_PAUSEABLE_FAILPOINTS(M) \ + M(pause_when_reading_from_dt_stream) \ + M(pause_when_writing_to_dt_store) \ + M(pause_when_ingesting_to_dt_store) \ + M(pause_when_altering_dt_store) \ + M(pause_after_copr_streams_acquired) \ + M(pause_before_server_merge_one_delta) \ + M(pause_query_init) + + +#define APPLY_FOR_RANDOM_FAILPOINTS(M) \ + M(random_tunnel_wait_timeout_failpoint) \ + M(random_tunnel_init_rpc_failure_failpoint) \ + M(random_receiver_sync_msg_push_failure_failpoint) \ + M(random_receiver_async_msg_push_failure_failpoint) \ + M(random_limit_check_failpoint) \ + M(random_join_build_failpoint) \ + M(random_join_prob_failpoint) \ + M(random_aggregate_create_state_failpoint) \ + M(random_aggregate_merge_failpoint) \ + M(random_sharedquery_failpoint) \ + M(random_interpreter_failpoint) \ + M(random_task_lifecycle_failpoint) \ + M(random_task_manager_find_task_failure_failpoint) \ + M(random_min_tso_scheduler_failpoint) namespace FailPoints { #define M(NAME) extern const char(NAME)[] = #NAME ""; APPLY_FOR_FAILPOINTS_ONCE(M) APPLY_FOR_FAILPOINTS(M) -APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) -APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) +APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) +APPLY_FOR_PAUSEABLE_FAILPOINTS(M) +APPLY_FOR_RANDOM_FAILPOINTS(M) #undef M } // namespace FailPoints @@ -167,15 +193,15 @@ void FailPointHelper::enableFailPoint(const String & fail_point_name) } #define M(NAME) SUB_M(NAME, FIU_ONETIME) - APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) + APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) #undef M #define M(NAME) SUB_M(NAME, 0) - APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) + APPLY_FOR_PAUSEABLE_FAILPOINTS(M) #undef M #undef SUB_M - throw Exception("Cannot find fail point " + fail_point_name, ErrorCodes::FAIL_POINT_ERROR); + throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); } void FailPointHelper::disableFailPoint(const String & fail_point_name) @@ -200,6 +226,41 @@ void FailPointHelper::wait(const String & fail_point_name) ptr->wait(); } } + +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) +{ + String random_fail_point_cfg = config.getString("flash.random_fail_points", ""); + if (random_fail_point_cfg.empty()) + return; + + Poco::StringTokenizer string_tokens(random_fail_point_cfg, ","); + for (const auto & string_token : string_tokens) + { + Poco::StringTokenizer pair_tokens(string_token, "-"); + RUNTIME_ASSERT((pair_tokens.count() == 2), log, "RandomFailPoints config should be FailPointA-RatioA,FailPointB-RatioB,... format"); + double rate = atof(pair_tokens[1].c_str()); //NOLINT(cert-err34-c): check conversion error manually + RUNTIME_ASSERT((0 <= rate && rate <= 1.0), log, "RandomFailPoint trigger rate should in [0,1], while {}", rate); + enableRandomFailPoint(pair_tokens[0], rate); + } + LOG_FMT_INFO(log, "Enable RandomFailPoints: {}", random_fail_point_cfg); +} + +void FailPointHelper::enableRandomFailPoint(const String & fail_point_name, double rate) +{ +#define SUB_M(NAME) \ + if (fail_point_name == FailPoints::NAME) \ + { \ + fiu_enable_random(FailPoints::NAME, 1, nullptr, 0, rate); \ + return; \ + } + +#define M(NAME) SUB_M(NAME) + APPLY_FOR_RANDOM_FAILPOINTS(M) +#undef M +#undef SUB_M + + throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); +} #else class FailPointChannel { @@ -210,6 +271,10 @@ void FailPointHelper::enableFailPoint(const String &) {} void FailPointHelper::disableFailPoint(const String &) {} void FailPointHelper::wait(const String &) {} + +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration &, Poco::Logger *) {} + +void FailPointHelper::enableRandomFailPoint(const String &, double) {} #endif } // namespace DB diff --git a/dbms/src/Common/FailPoint.h b/dbms/src/Common/FailPoint.h index 2cf40ad55e4..31df2dbdcd2 100644 --- a/dbms/src/Common/FailPoint.h +++ b/dbms/src/Common/FailPoint.h @@ -21,6 +21,15 @@ #include +namespace Poco +{ +class Logger; +namespace Util +{ +class LayeredConfiguration; +} +} // namespace Poco + namespace DB { namespace ErrorCodes @@ -35,7 +44,6 @@ extern const int FAIL_POINT_ERROR; // When `fail_point` is enabled, wait till it is disabled #define FAIL_POINT_PAUSE(fail_point) fiu_do_on(fail_point, FailPointHelper::wait(fail_point);) - class FailPointChannel; class FailPointHelper { @@ -46,6 +54,16 @@ class FailPointHelper static void wait(const String & fail_point_name); + /* + * For Server RandomFailPoint test usage. When FIU_ENABLE is defined, this function does the following work: + * 1. Return if TiFlash config has empty flash.random_fail_points cfg + * 2. Parse flash.random_fail_points, which expect to has "FailPointA-RatioA,FailPointB-RatioB,..." format + * 3. Call enableRandomFailPoint method with parsed FailPointName and Rate + */ + static void initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); + + static void enableRandomFailPoint(const String & fail_point_name, double rate); + private: static std::unordered_map> fail_point_wait_channels; }; diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h index a5483587e3c..04e6474c0fe 100644 --- a/dbms/src/Common/PoolWithFailoverBase.h +++ b/dbms/src/Common/PoolWithFailoverBase.h @@ -40,12 +40,6 @@ extern const int LOGICAL_ERROR; } // namespace ErrorCodes } // namespace DB -namespace ProfileEvents -{ -extern const Event DistributedConnectionFailTry; -extern const Event DistributedConnectionFailAtAll; -} // namespace ProfileEvents - /// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB. /// Initialized by several PoolBase objects. /// When a connection is requested, tries to create or choose an alive connection from one of the nested pools. @@ -254,14 +248,12 @@ PoolWithFailoverBase::getMany( else { LOG_FMT_WARNING(log, "Connection failed at try No.{}, reason: {}", shuffled_pool.error_count + 1, fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); ++shuffled_pool.error_count; if (shuffled_pool.error_count >= max_tries) { ++failed_pools_count; - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll); } } } diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index 0ec1ce438a6..7507ff0b1f8 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -16,160 +16,98 @@ /// Available events. Add something here as you wish. -#define APPLY_FOR_EVENTS(M) \ - M(Query) \ - M(SelectQuery) \ - M(InsertQuery) \ - M(DeleteQuery) \ - M(FileOpen) \ - M(FileOpenFailed) \ - M(Seek) \ - M(ReadBufferFromFileDescriptorRead) \ - M(ReadBufferFromFileDescriptorReadFailed) \ - M(ReadBufferFromFileDescriptorReadBytes) \ - M(WriteBufferFromFileDescriptorWrite) \ - M(WriteBufferFromFileDescriptorWriteFailed) \ - M(WriteBufferFromFileDescriptorWriteBytes) \ - M(ReadBufferAIORead) \ - M(ReadBufferAIOReadBytes) \ - M(WriteBufferAIOWrite) \ - M(WriteBufferAIOWriteBytes) \ - M(ReadCompressedBytes) \ - M(CompressedReadBufferBlocks) \ - M(CompressedReadBufferBytes) \ - M(UncompressedCacheHits) \ - M(UncompressedCacheMisses) \ - M(UncompressedCacheWeightLost) \ - M(IOBufferAllocs) \ - M(IOBufferAllocBytes) \ - M(ArenaAllocChunks) \ - M(ArenaAllocBytes) \ - M(FunctionExecute) \ - M(TableFunctionExecute) \ - M(MarkCacheHits) \ - M(MarkCacheMisses) \ - M(CreatedReadBufferOrdinary) \ - M(CreatedReadBufferAIO) \ - M(CreatedWriteBufferOrdinary) \ - M(CreatedWriteBufferAIO) \ - \ - M(InsertedRows) \ - M(InsertedBytes) \ - M(DelayedInserts) \ - M(RejectedInserts) \ - M(DelayedInsertsMilliseconds) \ - M(DuplicatedInsertedBlocks) \ - \ - M(DistributedConnectionFailTry) \ - M(DistributedConnectionMissingTable) \ - M(DistributedConnectionStaleReplica) \ - M(DistributedConnectionFailAtAll) \ - \ - M(CompileAttempt) \ - M(CompileSuccess) \ - \ - M(ExternalSortWritePart) \ - M(ExternalSortMerge) \ - M(ExternalAggregationWritePart) \ - M(ExternalAggregationMerge) \ - M(ExternalAggregationCompressedBytes) \ - M(ExternalAggregationUncompressedBytes) \ - \ - M(SlowRead) \ - M(ReadBackoff) \ - \ - M(RegexpCreated) \ - M(ContextLock) \ - \ - M(StorageBufferFlush) \ - M(StorageBufferErrorOnFlush) \ - M(StorageBufferPassedAllMinThresholds) \ - M(StorageBufferPassedTimeMaxThreshold) \ - M(StorageBufferPassedRowsMaxThreshold) \ - M(StorageBufferPassedBytesMaxThreshold) \ - \ - M(DictCacheKeysRequested) \ - M(DictCacheKeysRequestedMiss) \ - M(DictCacheKeysRequestedFound) \ - M(DictCacheKeysExpired) \ - M(DictCacheKeysNotFound) \ - M(DictCacheKeysHit) \ - M(DictCacheRequestTimeNs) \ - M(DictCacheRequests) \ - M(DictCacheLockWriteNs) \ - M(DictCacheLockReadNs) \ - \ - M(DistributedSyncInsertionTimeoutExceeded) \ - M(DataAfterMergeDiffersFromReplica) \ - M(PolygonsAddedToPool) \ - M(PolygonsInPoolAllocatedBytes) \ - M(RWLockAcquiredReadLocks) \ - M(RWLockAcquiredWriteLocks) \ - M(RWLockReadersWaitMilliseconds) \ - M(RWLockWritersWaitMilliseconds) \ - \ - M(PSMWritePages) \ - M(PSMWriteIOCalls) \ - M(PSV3MBlobExpansion) \ - M(PSV3MBlobReused) \ - M(PSMWriteBytes) \ - M(PSMBackgroundWriteBytes) \ - M(PSMReadPages) \ - M(PSMBackgroundReadBytes) \ - \ - M(PSMReadIOCalls) \ - M(PSMReadBytes) \ - M(PSMWriteFailed) \ - M(PSMReadFailed) \ - \ - M(PSMVCCApplyOnCurrentBase) \ - M(PSMVCCApplyOnCurrentDelta) \ - M(PSMVCCApplyOnNewDelta) \ - M(PSMVCCCompactOnDelta) \ - M(PSMVCCCompactOnDeltaRebaseRejected) \ - M(PSMVCCCompactOnBase) \ - \ - M(DMWriteBytes) \ - M(DMWriteBlock) \ - M(DMWriteBlockNS) \ - M(DMWriteFile) \ - M(DMWriteFileNS) \ - M(DMDeleteRange) \ - M(DMDeleteRangeNS) \ - M(DMAppendDeltaPrepare) \ - M(DMAppendDeltaPrepareNS) \ - M(DMAppendDeltaCommitMemory) \ - M(DMAppendDeltaCommitMemoryNS) \ - M(DMAppendDeltaCommitDisk) \ - M(DMAppendDeltaCommitDiskNS) \ - M(DMAppendDeltaCleanUp) \ - M(DMAppendDeltaCleanUpNS) \ - M(DMPlace) \ - M(DMPlaceNS) \ - M(DMPlaceUpsert) \ - M(DMPlaceUpsertNS) \ - M(DMPlaceDeleteRange) \ - M(DMPlaceDeleteRangeNS) \ - M(DMDeltaMerge) \ - M(DMDeltaMergeNS) \ - M(DMSegmentSplit) \ - M(DMSegmentSplitNS) \ - M(DMSegmentGetSplitPoint) \ - M(DMSegmentGetSplitPointNS) \ - M(DMSegmentMerge) \ - M(DMSegmentMergeNS) \ - M(DMFlushDeltaCache) \ - M(DMFlushDeltaCacheNS) \ - M(DMCleanReadRows) \ - \ - M(FileFSync) \ - \ - M(DMFileFilterNoFilter) \ - M(DMFileFilterAftPKAndPackSet) \ - M(DMFileFilterAftRoughSet) \ - \ - M(ChecksumDigestBytes) \ - \ +#define APPLY_FOR_EVENTS(M) \ + M(Query) \ + M(FileOpen) \ + M(FileOpenFailed) \ + M(ReadBufferFromFileDescriptorRead) \ + M(ReadBufferFromFileDescriptorReadFailed) \ + M(ReadBufferFromFileDescriptorReadBytes) \ + M(WriteBufferFromFileDescriptorWrite) \ + M(WriteBufferFromFileDescriptorWriteBytes) \ + M(ReadBufferAIORead) \ + M(ReadBufferAIOReadBytes) \ + M(WriteBufferAIOWrite) \ + M(WriteBufferAIOWriteBytes) \ + \ + M(UncompressedCacheHits) \ + M(UncompressedCacheMisses) \ + M(UncompressedCacheWeightLost) \ + M(MarkCacheHits) \ + M(MarkCacheMisses) \ + \ + M(ExternalAggregationCompressedBytes) \ + M(ExternalAggregationUncompressedBytes) \ + \ + M(ContextLock) \ + \ + M(RWLockAcquiredReadLocks) \ + M(RWLockAcquiredWriteLocks) \ + M(RWLockReadersWaitMilliseconds) \ + M(RWLockWritersWaitMilliseconds) \ + \ + M(PSMWritePages) \ + M(PSMWriteIOCalls) \ + M(PSV3MBlobExpansion) \ + M(PSV3MBlobReused) \ + M(PSMWriteBytes) \ + M(PSMBackgroundWriteBytes) \ + M(PSMReadPages) \ + M(PSMBackgroundReadBytes) \ + \ + M(PSMReadIOCalls) \ + M(PSMReadBytes) \ + M(PSMWriteFailed) \ + M(PSMReadFailed) \ + \ + M(PSMVCCApplyOnCurrentBase) \ + M(PSMVCCApplyOnCurrentDelta) \ + M(PSMVCCApplyOnNewDelta) \ + M(PSMVCCCompactOnDelta) \ + M(PSMVCCCompactOnDeltaRebaseRejected) \ + M(PSMVCCCompactOnBase) \ + \ + M(DMWriteBytes) \ + M(DMWriteBlock) \ + M(DMWriteBlockNS) \ + M(DMWriteFile) \ + M(DMWriteFileNS) \ + M(DMDeleteRange) \ + M(DMDeleteRangeNS) \ + M(DMAppendDeltaPrepare) \ + M(DMAppendDeltaPrepareNS) \ + M(DMAppendDeltaCommitMemory) \ + M(DMAppendDeltaCommitMemoryNS) \ + M(DMAppendDeltaCommitDisk) \ + M(DMAppendDeltaCommitDiskNS) \ + M(DMAppendDeltaCleanUp) \ + M(DMAppendDeltaCleanUpNS) \ + M(DMPlace) \ + M(DMPlaceNS) \ + M(DMPlaceUpsert) \ + M(DMPlaceUpsertNS) \ + M(DMPlaceDeleteRange) \ + M(DMPlaceDeleteRangeNS) \ + M(DMDeltaMerge) \ + M(DMDeltaMergeNS) \ + M(DMSegmentSplit) \ + M(DMSegmentSplitNS) \ + M(DMSegmentGetSplitPoint) \ + M(DMSegmentGetSplitPointNS) \ + M(DMSegmentMerge) \ + M(DMSegmentMergeNS) \ + M(DMFlushDeltaCache) \ + M(DMFlushDeltaCacheNS) \ + M(DMCleanReadRows) \ + \ + M(FileFSync) \ + \ + M(DMFileFilterNoFilter) \ + M(DMFileFilterAftPKAndPackSet) \ + M(DMFileFilterAftRoughSet) \ + \ + M(ChecksumDigestBytes) \ + \ M(RaftWaitIndexTimeout) namespace ProfileEvents diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 9aa826e0e30..c0ce60af01e 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -60,27 +60,27 @@ namespace DB F(type_partition_ts, {"type", "partition_table_scan"}), \ F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"})) \ M(tiflash_coprocessor_request_duration_seconds, "Bucketed histogram of request duration", Histogram, \ - F(type_batch, {{"type", "batch"}}, ExpBuckets{0.0005, 2, 30}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.0005, 2, 30})) \ + F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_coprocessor_request_memory_usage, "Bucketed histogram of request memory usage", Histogram, \ F(type_cop, {{"type", "cop"}}, ExpBuckets{1024 * 1024, 2, 16}), \ - F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{1024 * 1024, 2, 16}), \ - F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{1024 * 1024, 2, 16})) \ + F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{1024 * 1024, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{1024 * 1024, 2, 20})) \ M(tiflash_coprocessor_request_error, "Total number of request error", Counter, F(reason_meet_lock, {"reason", "meet_lock"}), \ F(reason_region_not_found, {"reason", "region_not_found"}), F(reason_epoch_not_match, {"reason", "epoch_not_match"}), \ F(reason_kv_client_error, {"reason", "kv_client_error"}), F(reason_internal_error, {"reason", "internal_error"}), \ F(reason_other_error, {"reason", "other_error"})) \ M(tiflash_coprocessor_request_handle_seconds, "Bucketed histogram of request handle duration", Histogram, \ - F(type_batch, {{"type", "batch"}}, ExpBuckets{0.0005, 2, 30}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.0005, 2, 30})) \ + F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_coprocessor_response_bytes, "Total bytes of response body", Counter) \ M(tiflash_schema_version, "Current version of tiflash cached schema", Gauge) \ M(tiflash_schema_applying, "Whether the schema is applying or not (holding lock)", Gauge) \ @@ -95,21 +95,14 @@ namespace DB F(type_alter_column_tp, {"type", "alter_column_type"}), F(type_rename_column, {"type", "rename_column"}), \ F(type_exchange_partition, {"type", "exchange_partition"})) \ M(tiflash_schema_apply_duration_seconds, "Bucketed histogram of ddl apply duration", Histogram, \ - F(type_ddl_apply_duration, {{"req", "ddl_apply_duration"}}, ExpBuckets{0.0005, 2, 20})) \ - M(tiflash_tmt_merge_count, "Total number of TMT engine merge", Counter) \ - M(tiflash_tmt_merge_duration_seconds, "Bucketed histogram of TMT engine merge duration", Histogram, \ - F(type_tmt_merge_duration, {{"type", "tmt_merge_duration"}}, ExpBuckets{0.0005, 2, 20})) \ - M(tiflash_tmt_write_parts_count, "Total number of TMT engine write parts", Counter) \ - M(tiflash_tmt_write_parts_duration_seconds, "Bucketed histogram of TMT engine write parts duration", Histogram, \ - F(type_tmt_write_duration, {{"type", "tmt_write_parts_duration"}}, ExpBuckets{0.0005, 2, 20})) \ - M(tiflash_tmt_read_parts_count, "Total number of TMT engine read parts", Gauge) \ + F(type_ddl_apply_duration, {{"req", "ddl_apply_duration"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_raft_read_index_count, "Total number of raft read index", Counter) \ M(tiflash_raft_read_index_duration_seconds, "Bucketed histogram of raft read index duration", Histogram, \ - F(type_raft_read_index_duration, {{"type", "tmt_raft_read_index_duration"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_raft_read_index_duration, {{"type", "tmt_raft_read_index_duration"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_raft_wait_index_duration_seconds, "Bucketed histogram of raft wait index duration", Histogram, \ - F(type_raft_wait_index_duration, {{"type", "tmt_raft_wait_index_duration"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_raft_wait_index_duration, {{"type", "tmt_raft_wait_index_duration"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_syncing_data_freshness, "The freshness of tiflash data with tikv data", Histogram, \ - F(type_syncing_data_freshness, {{"type", "data_freshness"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_syncing_data_freshness, {{"type", "data_freshness"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_storage_write_amplification, "The data write amplification in storage engine", Gauge) \ M(tiflash_storage_read_tasks_count, "Total number of storage engine read tasks", Counter) \ M(tiflash_storage_command_count, "Total number of storage's command, such as delete range / shutdown /startup", Counter, \ @@ -122,16 +115,16 @@ namespace DB F(type_seg_split, {"type", "seg_split"}), F(type_seg_split_fg, {"type", "seg_split_fg"}), \ F(type_seg_merge, {"type", "seg_merge"}), F(type_place_index_update, {"type", "place_index_update"})) \ M(tiflash_storage_subtask_duration_seconds, "Bucketed histogram of storage's sub task duration", Histogram, \ - F(type_delta_merge, {{"type", "delta_merge"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_merge_fg, {{"type", "delta_merge_fg"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_merge_fg_rpc, {{"type", "delta_merge_fg_rpc"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_merge_bg_gc, {{"type", "delta_merge_bg_gc"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_seg_split, {{"type", "seg_split"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_seg_merge, {{"type", "seg_merge"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_delta_merge, {{"type", "delta_merge"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_fg, {{"type", "delta_merge_fg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_fg_rpc, {{"type", "delta_merge_fg_rpc"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_bg_gc, {{"type", "delta_merge_bg_gc"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split, {{"type", "seg_split"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_merge, {{"type", "seg_merge"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_storage_throughput_bytes, "Calculate the throughput of tasks of storage in bytes", Gauge, /**/ \ F(type_write, {"type", "write"}), /**/ \ F(type_ingest, {"type", "ingest"}), /**/ \ @@ -145,8 +138,8 @@ namespace DB F(type_split, {"type", "split"}), /**/ \ F(type_merge, {"type", "merge"})) /**/ \ M(tiflash_storage_write_stall_duration_seconds, "The write stall duration of storage, in seconds", Histogram, /**/ \ - F(type_write, {{"type", "write"}}, ExpBuckets{0.0005, 2, 20}), /**/ \ - F(type_delete_range, {{"type", "delete_range"}}, ExpBuckets{0.0005, 2, 20})) /**/ \ + F(type_write, {{"type", "write"}}, ExpBuckets{0.001, 2, 20}), /**/ \ + F(type_delete_range, {{"type", "delete_range"}}, ExpBuckets{0.001, 2, 20})) /**/ \ M(tiflash_storage_page_gc_count, "Total number of page's gc execution.", Counter, \ F(type_exec, {"type", "exec"}), \ F(type_low_write, {"type", "low_write"}), \ @@ -170,7 +163,7 @@ namespace DB Histogram, /* these command usually cost servel seconds, increase the start bucket to 50ms */ \ F(type_ingest_sst, {{"type", "ingest_sst"}}, ExpBuckets{0.05, 2, 10}), \ F(type_apply_snapshot_predecode, {{"type", "snapshot_predecode"}}, ExpBuckets{0.05, 2, 10}), \ - F(type_apply_snapshot_predecode_sst2dt, {{"type", "snapshot_predecode_sst2dt"}}, ExpBuckets{0.05, 2, 10}), \ + F(type_apply_snapshot_predecode_sst2dt, {{"type", "snapshot_predecode_sst2dt"}}, ExpBuckets{0.05, 2, 10}), \ F(type_apply_snapshot_flush, {{"type", "snapshot_flush"}}, ExpBuckets{0.05, 2, 10})) \ M(tiflash_raft_process_keys, "Total number of keys processed in some types of Raft commands", Counter, \ F(type_apply_snapshot, {"type", "apply_snapshot"}), F(type_ingest_sst, {"type", "ingest_sst"})) \ @@ -212,7 +205,7 @@ namespace DB F(type_thread_hard_limit, {"type", "thread_hard_limit"}), \ F(type_hard_limit_exceeded_count, {"type", "hard_limit_exceeded_count"})) \ M(tiflash_task_scheduler_waiting_duration_seconds, "Bucketed histogram of task waiting for scheduling duration", Histogram, \ - F(type_task_scheduler_waiting_duration, {{"type", "task_waiting_duration"}}, ExpBuckets{0.0005, 2, 20})) + F(type_task_scheduler_waiting_duration, {{"type", "task_waiting_duration"}}, ExpBuckets{0.001, 2, 20})) // clang-format on diff --git a/dbms/src/Common/wrapInvocable.h b/dbms/src/Common/wrapInvocable.h index d6cee519835..1c93bb3e782 100644 --- a/dbms/src/Common/wrapInvocable.h +++ b/dbms/src/Common/wrapInvocable.h @@ -35,7 +35,6 @@ inline auto wrapInvocable(bool propagate_memory_tracker, Func && func, Args &&.. // run the task with the parameters provided return std::apply(std::move(func), std::move(args)); }; - return capture; } } // namespace DB diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 28db7af82e1..971e8f36e2a 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -238,10 +238,18 @@ void Block::checkNumberOfRows() const if (rows == -1) rows = size; else if (rows != size) - throw Exception("Sizes of columns doesn't match: " - + data.front().name + ": " + toString(rows) - + ", " + elem.name + ": " + toString(size), + { + auto first_col = data.front(); + throw Exception(fmt::format( + "Sizes of columns doesn't match: {}(id={}): {}, {}(id={}): {}", + first_col.name, + first_col.column_id, + rows, + elem.name, + elem.column_id, + size), ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + } } } diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 0d9e907c5f4..4cd09d1ea63 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -17,12 +17,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalAggregationMerge; -} - namespace DB { Block AggregatingBlockInputStream::getHeader() const @@ -56,8 +50,6 @@ Block AggregatingBlockInputStream::readImpl() * then read and merge them, spending the minimum amount of memory. */ - ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge); - if (!isCancelled()) { /// Flush data in the RAM to disk also. It's easier than merging on-disk and RAM data. diff --git a/dbms/src/DataStreams/AsynchronousBlockInputStream.h b/dbms/src/DataStreams/AsynchronousBlockInputStream.h index e75d1603648..5b373c26e95 100644 --- a/dbms/src/DataStreams/AsynchronousBlockInputStream.h +++ b/dbms/src/DataStreams/AsynchronousBlockInputStream.h @@ -22,12 +22,6 @@ #include #include - -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - namespace DB { /** Executes another BlockInputStream in a separate thread. @@ -141,8 +135,6 @@ class AsynchronousBlockInputStream : public IProfilingBlockInputStream /// Calculations that can be performed in a separate thread void calculate() { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - try { if (first) diff --git a/dbms/src/DataStreams/CountingBlockOutputStream.cpp b/dbms/src/DataStreams/CountingBlockOutputStream.cpp index 26bc5a4566f..52dc6b598b9 100644 --- a/dbms/src/DataStreams/CountingBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CountingBlockOutputStream.cpp @@ -12,20 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include - - -namespace ProfileEvents -{ - extern const Event InsertedRows; - extern const Event InsertedBytes; -} - +#include namespace DB { - void CountingBlockOutputStream::write(const Block & block) { stream->write(block); @@ -33,9 +24,6 @@ void CountingBlockOutputStream::write(const Block & block) Progress local_progress(block.rows(), block.bytes(), 0); progress.incrementPiecewiseAtomically(local_progress); - ProfileEvents::increment(ProfileEvents::InsertedRows, local_progress.rows); - ProfileEvents::increment(ProfileEvents::InsertedBytes, local_progress.bytes); - if (process_elem) process_elem->updateProgressOut(local_progress); @@ -43,4 +31,4 @@ void CountingBlockOutputStream::write(const Block & block) progress_callback(local_progress); } -} +} // namespace DB diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index e79426f686e..cf8db3f8711 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -19,13 +19,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalSortWritePart; -extern const Event ExternalSortMerge; -} // namespace ProfileEvents - namespace DB { /** Remove constant columns from block. @@ -136,7 +129,6 @@ Block MergeSortingBlockInputStream::readImpl() MergeSortingBlocksBlockInputStream block_in(blocks, description, log->identifier(), max_merged_block_size, limit); LOG_FMT_INFO(log, "Sorting and writing part of data into temporary file {}", path); - ProfileEvents::increment(ProfileEvents::ExternalSortWritePart); copyData(block_in, block_out, &is_cancelled); /// NOTE. Possibly limit disk usage. LOG_FMT_INFO(log, "Done writing part of data into temporary file {}", path); @@ -155,7 +147,6 @@ Block MergeSortingBlockInputStream::readImpl() else { /// If there was temporary files. - ProfileEvents::increment(ProfileEvents::ExternalSortMerge); LOG_FMT_INFO(log, "There are {} temporary sorted parts to merge.", temporary_files.size()); diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 5d0b677b792..3a1cc1eed31 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -19,13 +19,6 @@ #include - -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - - namespace DB { /** Scheme of operation: @@ -156,7 +149,7 @@ void MergingAggregatedMemoryEfficientBlockInputStream::cancel(bool kill) for (auto & input : inputs) { - if (IProfilingBlockInputStream * child = dynamic_cast(input.stream.get())) + if (auto * child = dynamic_cast(input.stream.get())) { try { @@ -198,7 +191,6 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() reading_pool->schedule( wrapInvocable(true, [&child] { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; child->readPrefix(); })); } @@ -309,8 +301,6 @@ void MergingAggregatedMemoryEfficientBlockInputStream::finalize() void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread() { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - try { while (!parallel_merge_data->finish) @@ -490,7 +480,6 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate if (need_that_input(input)) { reading_pool->schedule(wrapInvocable(true, [&input, &read_from_input] { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; read_from_input(input); })); } diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 1a59b979c29..f983de91b37 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -20,13 +20,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalAggregationMerge; -} - - namespace DB { ParallelAggregatingBlockInputStream::ParallelAggregatingBlockInputStream( @@ -101,8 +94,6 @@ Block ParallelAggregatingBlockInputStream::readImpl() * then read and merge them, spending the minimum amount of memory. */ - ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge); - const auto & files = aggregator.getTemporaryFiles(); BlockInputStreams input_streams; for (const auto & file : files.files) @@ -207,8 +198,8 @@ void ParallelAggregatingBlockInputStream::Handler::onException(std::exception_pt /// can not cancel parent inputStream or the exception might be lost if (!parent.executed) - /// kill the processor so ExchangeReceiver will be closed - parent.processor.cancel(true); + /// use cancel instead of kill to avoid too many useless error message + parent.processor.cancel(false); } diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index 0e839093cd7..34c70a7085e 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -46,11 +46,6 @@ * then read block from source and then put source back to queue of available sources. */ -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - namespace DB { /** Union mode. @@ -208,8 +203,6 @@ class ParallelInputsProcessor { std::exception_ptr exception; - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - try { while (!finish) diff --git a/dbms/src/DataStreams/SharedQueryBlockInputStream.h b/dbms/src/DataStreams/SharedQueryBlockInputStream.h index e7cece67f0b..d7c0707b5aa 100644 --- a/dbms/src/DataStreams/SharedQueryBlockInputStream.h +++ b/dbms/src/DataStreams/SharedQueryBlockInputStream.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -24,6 +25,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_sharedquery_failpoint[]; +} // namespace FailPoints + /** This block input stream is used by SharedQuery. * It enable multiple threads read from one stream. */ @@ -136,6 +142,7 @@ class SharedQueryBlockInputStream : public IProfilingBlockInputStream in->readPrefix(); while (true) { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_sharedquery_failpoint); Block block = in->read(); // in is finished or queue is canceled if (!block || !queue.push(block)) diff --git a/dbms/src/DataStreams/SizeLimits.cpp b/dbms/src/DataStreams/SizeLimits.cpp index 7dd5e1524ba..4d1bfaae997 100644 --- a/dbms/src/DataStreams/SizeLimits.cpp +++ b/dbms/src/DataStreams/SizeLimits.cpp @@ -12,22 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include -#include +#include +#include +#include +#include namespace DB { +namespace FailPoints +{ +extern const char random_limit_check_failpoint[]; +} // namespace FailPoints bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const { - if (max_rows && rows > max_rows) + bool rows_exceed_limit = max_rows && rows > max_rows; + fiu_do_on(FailPoints::random_limit_check_failpoint, rows_exceed_limit = true;); + if (rows_exceed_limit) { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max rows: " + formatReadableQuantity(max_rows) - + ", current rows: " + formatReadableQuantity(rows), exception_code); + + ", current rows: " + formatReadableQuantity(rows), + exception_code); else return false; } @@ -36,7 +44,8 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max bytes: " + formatReadableSizeWithBinarySuffix(max_bytes) - + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), exception_code); + + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), + exception_code); else return false; } @@ -44,4 +53,4 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti return true; } -} +} // namespace DB diff --git a/dbms/src/DataStreams/UnionBlockInputStream.h b/dbms/src/DataStreams/UnionBlockInputStream.h index 251d0663e14..a782c3dd087 100644 --- a/dbms/src/DataStreams/UnionBlockInputStream.h +++ b/dbms/src/DataStreams/UnionBlockInputStream.h @@ -293,8 +293,8 @@ class UnionBlockInputStream final : public IProfilingBlockInputStream /// and the exception is lost. output_queue.emplace(exception); /// can not cancel itself or the exception might be lost - /// kill the processor so ExchangeReceiver will be closed - processor.cancel(true); + /// use cancel instead of kill to avoid too many useless error message + processor.cancel(false); } struct Handler diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 120d0b1ba30..71fda0615e4 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -471,7 +471,6 @@ class IDataType : private boost::noncopyable virtual bool isEnum() const { return false; }; virtual bool isNullable() const { return false; } - /** Is this type can represent only NULL value? (It also implies isNullable) */ virtual bool onlyNull() const { return false; } diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 42ab56a97c1..7b3bdb0948f 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -221,7 +221,6 @@ TiDB::TableInfoPtr MockTiDB::parseColumns( { String & name = string_tokens[index]; index_info.idx_cols[index].name = name; - index_info.idx_cols[index].offset = pk_column_pos_map[name]; index_info.idx_cols[index].length = -1; } } @@ -302,7 +301,7 @@ int MockTiDB::newTables( tables_by_id.emplace(table->table_info.id, table); tables_by_name.emplace(qualified_name, table); - AffectedOption opt; + AffectedOption opt{}; opt.schema_id = table->database_id; opt.table_id = table->id(); opt.old_schema_id = table->database_id; @@ -571,7 +570,7 @@ void MockTiDB::renameTables(const std::vectordatabase_id; opt.table_id = new_table->id(); opt.old_schema_id = table->database_id; diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index fec76d7a085..7d1f3bc7209 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -1629,7 +1629,6 @@ ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr se } } } - auto project = std::make_shared(executor_index, output_schema, std::move(exprs)); project->children.push_back(input); return project; diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index cbd2e5ade3a..4d87c0db77e 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -350,4 +350,4 @@ ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr fun ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort); void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp index df93ee1c78d..3626041f428 100644 --- a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp @@ -40,7 +40,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); const String & database_name = typeid_cast(*args[1]).name; const String & table_name = typeid_cast(*args[2]).name; auto table = MockTiDB::instance().getTableByName(database_name, table_name); @@ -49,7 +49,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar if (4 + handle_column_size * 4 != args.size()) throw Exception("Args not matched, should be: region-id1, database-name, table-name, start1, end1, start2, end2, region-id2", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id2 = (RegionID)safeGet(typeid_cast(*args[args.size() - 1]).value); + auto region_id2 = static_cast(safeGet(typeid_cast(*args[args.size() - 1]).value)); auto table_id = table->id(); TiKVKey start_key1, start_key2, end_key1, end_key2; @@ -59,9 +59,17 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar std::vector start_keys2; std::vector end_keys1; std::vector end_keys2; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field1 = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum1 = TiDB::DatumBumpy(start_field1, column_info.tp); @@ -88,10 +96,10 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar } else { - HandleID start1 = (HandleID)safeGet(typeid_cast(*args[3]).value); - HandleID end1 = (HandleID)safeGet(typeid_cast(*args[4]).value); - HandleID start2 = (HandleID)safeGet(typeid_cast(*args[5]).value); - HandleID end2 = (HandleID)safeGet(typeid_cast(*args[6]).value); + auto start1 = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end1 = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start2 = static_cast(safeGet(typeid_cast(*args[5]).value)); + auto end2 = static_cast(safeGet(typeid_cast(*args[6]).value)); start_key1 = RecordKVFormat::genKey(table_id, start1); start_key2 = RecordKVFormat::genKey(table_id, start2); end_key1 = RecordKVFormat::genKey(table_id, end1); @@ -110,7 +118,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar request.set_cmd_type(raft_cmdpb::AdminCmdType::BatchSplit); raft_cmdpb::BatchSplitResponse * splits = response.mutable_splits(); { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id); region->set_start_key(start_key1); region->set_end_key(end_key1); @@ -118,7 +126,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar *region->mutable_region_epoch() = new_epoch; } { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id2); region->set_start_key(start_key2); region->set_end_key(end_key2); @@ -144,8 +152,8 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: source-id1, target-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID target_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto target_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -157,7 +165,7 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::PrepareMerge); - auto prepare_merge = request.mutable_prepare_merge(); + auto * prepare_merge = request.mutable_prepare_merge(); { auto min_index = region->appliedIndex(); prepare_merge->set_min_index(min_index); @@ -184,8 +192,8 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D throw Exception("Args not matched, should be: source-id1, current-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID source_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID current_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto source_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto current_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -196,7 +204,7 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D { request.set_cmd_type(raft_cmdpb::AdminCmdType::CommitMerge); - auto commit_merge = request.mutable_commit_merge(); + auto * commit_merge = request.mutable_commit_merge(); { commit_merge->set_commit(source_region->appliedIndex()); *commit_merge->mutable_source() = source_region->getMetaRegion(); @@ -220,7 +228,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -231,7 +239,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::RollbackMerge); - auto rollback_merge = request.mutable_rollback_merge(); + auto * rollback_merge = request.mutable_rollback_merge(); { auto merge_state = region->getMergeState(); rollback_merge->set_commit(merge_state.commit()); diff --git a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp index 7eecbbdf6f7..b5d3f252d0a 100644 --- a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp @@ -60,7 +60,7 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); TableID table_id = RegionBench::getTableID(context, database_name, table_name, ""); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); auto & table_info = table->table_info; @@ -68,10 +68,16 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) size_t handle_column_size = is_common_handle ? table_info.getPrimaryIndexInfo().idx_cols.size() : 1; RegionPtr region; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + if (!is_common_handle) { - HandleID start = static_cast(safeGet(typeid_cast(*args[3]).value)); - HandleID end = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end = static_cast(safeGet(typeid_cast(*args[4]).value)); region = RegionBench::createRegion(table_id, region_id, start, end); } else @@ -81,7 +87,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector end_keys; for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -105,8 +112,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) for (auto it = args_begin; it != args_end; it += len) { HandleID handle_id = is_common_handle ? 0 : static_cast(safeGet(typeid_cast(*it[0]).value)); - Timestamp tso = static_cast(safeGet(typeid_cast(*it[1]).value)); - UInt8 del = static_cast(safeGet(typeid_cast(*it[2]).value)); + auto tso = static_cast(safeGet(typeid_cast(*it[1]).value)); + auto del = static_cast(safeGet(typeid_cast(*it[2]).value)); { std::vector fields; @@ -122,9 +129,9 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector keys; // handle key for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; + auto start_field = RegionBench::convertField(column_info, fields[idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } @@ -168,7 +175,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotWithData(Context & context, const AST // DBGInvoke region_snapshot(region-id, start-key, end-key, database-name, table-name[, partition-id]) void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args, DBGInvoker::Printer output) { - RegionID region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); bool has_partition_id = false; size_t args_size = args.size(); if (dynamic_cast(args[args_size - 1].get()) != nullptr) @@ -198,9 +205,16 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args // Get start key and end key form multiple column if it is clustered_index. std::vector start_keys; std::vector end_keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -214,15 +228,15 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args } else { - HandleID start = static_cast(safeGet(typeid_cast(*args[1]).value)); - HandleID end = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start = static_cast(safeGet(typeid_cast(*args[1]).value)); + auto end = static_cast(safeGet(typeid_cast(*args[2]).value)); start_key = RecordKVFormat::genKey(table_id, start); end_key = RecordKVFormat::genKey(table_id, end); } region_info.set_start_key(start_key.toString()); region_info.set_end_key(end_key.toString()); - *region_info.add_peers() = createPeer(1, true); - *region_info.add_peers() = createPeer(2, true); + *region_info.add_peers() = tests::createPeer(1, true); + *region_info.add_peers() = tests::createPeer(2, true); auto peer_id = 1; auto start_decoded_key = RecordKVFormat::decodeTiKVKey(start_key); auto end_decoded_key = RecordKVFormat::decodeTiKVKey(end_key); @@ -432,9 +446,9 @@ void MockRaftCommand::dbgFuncIngestSST(Context & context, const ASTs & args, DBG { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); - RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); const auto & table_info = RegionBench::getTableInfo(context, database_name, table_name); @@ -555,7 +569,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyBlock(Context & context, const A throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); auto [region, block_cache] = GLOBAL_REGION_MAP.popRegionCache("__snap_" + std::to_string(region_id)); auto & tmt = context.getTMTContext(); context.getTMTContext().getKVStore()->checkAndApplySnapshot({region, std::move(block_cache)}, tmt); @@ -577,12 +591,12 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFiles(Context & context, c const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); - RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); - const String schema_str = safeGet(typeid_cast(*args[5]).value); - String handle_pk_name = safeGet(typeid_cast(*args[6]).value); + const auto schema_str = safeGet(typeid_cast(*args[5]).value); + auto handle_pk_name = safeGet(typeid_cast(*args[6]).value); UInt64 test_fields = 1; if (args.size() > 7) @@ -677,10 +691,10 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFilesWithHandles(Context & const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - const String schema_str = safeGet(typeid_cast(*args[3]).value); - String handle_pk_name = safeGet(typeid_cast(*args[4]).value); + const auto schema_str = safeGet(typeid_cast(*args[3]).value); + auto handle_pk_name = safeGet(typeid_cast(*args[4]).value); std::vector handles; for (size_t i = 5; i < args.size(); ++i) @@ -770,7 +784,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyDTFiles(Context & context, const if (args.size() != 1) throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); const auto region_name = "__snap_snap_" + std::to_string(region_id); auto [new_region, ingest_ids] = GLOBAL_REGION_MAP.popRegionSnap(region_name); auto & tmt = context.getTMTContext(); diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index b2024eac1d8..f65a18b8fd0 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -61,9 +61,15 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer { std::vector start_keys; std::vector end_keys; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 685b2563a3b..854d8a18bd5 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -310,7 +310,7 @@ void insert( // // Parse the fields in the inserted row std::vector fields; { - for (ASTs::const_iterator it = values_begin; it != values_end; ++it) + for (auto it = values_begin; it != values_end; ++it) { auto field = typeid_cast((*it).get())->value; fields.emplace_back(field); @@ -330,11 +330,18 @@ void insert( // if (table_info.is_common_handle) { std::vector keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - const auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - const auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + const auto & col_idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[col_idx]; + auto start_field = RegionBench::convertField(column_info, fields[col_idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 8573bdad6bd..0d7243ede8f 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -12,57 +12,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include #include +#include +#include #include -#include +#include #include -#include -#include #include -#include +#include +#include +#include #include #include #include -#include -#include -#include - - -namespace ProfileEvents -{ - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; -} - -namespace CurrentMetrics -{ - extern const Metric DictCacheRequests; -} +#include +#include +#include +#include +#include +#include namespace DB { - namespace ErrorCodes { - extern const int TYPE_MISMATCH; - extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; - extern const int LOGICAL_ERROR; -} +extern const int TYPE_MISMATCH; +extern const int BAD_ARGUMENTS; +extern const int UNSUPPORTED_METHOD; +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes inline size_t CacheDictionary::getCellIdx(const Key id) const @@ -73,15 +52,15 @@ inline size_t CacheDictionary::getCellIdx(const Key id) const } -CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, - const size_t size) - : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - cells{this->size}, - rnd_engine(randomSeed()) +CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const size_t size) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , cells{this->size} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{ @@ -100,13 +79,19 @@ void CacheDictionary::toParent(const PaddedPODArray & ids, PaddedPODArray(hierarchical_attribute->null_values); - getItemsNumber(*hierarchical_attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsNumber(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; }); } /// Allow to use single value in same way as array. -static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) { return arr[idx]; } -static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) { return value; } +static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) +{ + return arr[idx]; +} +static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) +{ + return value; +} template @@ -118,7 +103,7 @@ void CacheDictionary::isInImpl( /// Transform all children to parents until ancestor id or null_value will be reached. size_t size = out.size(); - memset(out.data(), 0xFF, size); /// 0xFF means "not calculated" + memset(out.data(), 0xFF, size); /// 0xFF means "not calculated" const auto null_value = std::get(hierarchical_attribute->null_values); @@ -224,19 +209,19 @@ void CacheDictionary::isInConstantVector( } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, PaddedPODArray & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{\ - name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ - ErrorCodes::TYPE_MISMATCH};\ - \ - const auto null_value = std::get(attribute.null_values);\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t) { return null_value; });\ -} +#define DECLARE(TYPE) \ + void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, PaddedPODArray & out) const \ + { \ + auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{ \ + name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + const auto null_value = std::get(attribute.null_values); \ + \ + getItemsNumber(attribute, ids, out, [&](const size_t) { return null_value; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -260,22 +245,24 @@ void CacheDictionary::getString(const std::string & attribute_name, const Padded const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, ids, out, [&](const size_t) { return null_value; }); } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const PaddedPODArray & def,\ - PaddedPODArray & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{\ - name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ - ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t row) { return def[row]; });\ -} +#define DECLARE(TYPE) \ + void CacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const PaddedPODArray & def, \ + PaddedPODArray & out) const \ + { \ + auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{ \ + name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber(attribute, ids, out, [&](const size_t row) { return def[row]; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -290,7 +277,9 @@ DECLARE(Float64) #undef DECLARE void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, + const std::string & attribute_name, + const PaddedPODArray & ids, + const ColumnString * const def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); @@ -299,21 +288,24 @@ void CacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); }); } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, PaddedPODArray & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{\ - name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ - ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t) { return def; });\ -} +#define DECLARE(TYPE) \ + void CacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const TYPE def, \ + PaddedPODArray & out) const \ + { \ + auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{ \ + name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber(attribute, ids, out, [&](const size_t) { return def; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -328,7 +320,9 @@ DECLARE(Float64) #undef DECLARE void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const String & def, + const std::string & attribute_name, + const PaddedPODArray & ids, + const String & def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); @@ -337,7 +331,7 @@ void CacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; }); } @@ -390,8 +384,6 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray const auto rows = ext::size(ids); { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows)) @@ -416,10 +408,6 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); @@ -427,21 +415,19 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = true; - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = false; - }); + update( + required_ids, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + out[row] = true; + }, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + out[row] = false; + }); } @@ -476,68 +462,68 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib switch (type) { - case AttributeUnderlyingType::UInt8: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt8); - break; - case AttributeUnderlyingType::UInt16: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt16); - break; - case AttributeUnderlyingType::UInt32: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt32); - break; - case AttributeUnderlyingType::UInt64: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt64); - break; - case AttributeUnderlyingType::UInt128: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt128); - break; - case AttributeUnderlyingType::Int8: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int8); - break; - case AttributeUnderlyingType::Int16: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int16); - break; - case AttributeUnderlyingType::Int32: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int32); - break; - case AttributeUnderlyingType::Int64: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int64); - break; - case AttributeUnderlyingType::Float32: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Float32); - break; - case AttributeUnderlyingType::Float64: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Float64); - break; - case AttributeUnderlyingType::String: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(StringRef); - if (!string_arena) - string_arena = std::make_unique(); - break; + case AttributeUnderlyingType::UInt8: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt8); + break; + case AttributeUnderlyingType::UInt16: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt16); + break; + case AttributeUnderlyingType::UInt32: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt32); + break; + case AttributeUnderlyingType::UInt64: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt64); + break; + case AttributeUnderlyingType::UInt128: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt128); + break; + case AttributeUnderlyingType::Int8: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int8); + break; + case AttributeUnderlyingType::Int16: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int16); + break; + case AttributeUnderlyingType::Int32: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int32); + break; + case AttributeUnderlyingType::Int64: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int64); + break; + case AttributeUnderlyingType::Float32: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Float32); + break; + case AttributeUnderlyingType::Float64: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Float64); + break; + case AttributeUnderlyingType::String: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(StringRef); + if (!string_arena) + string_arena = std::make_unique(); + break; } return attr; @@ -551,8 +537,8 @@ void CacheDictionary::getItemsNumber( PaddedPODArray & out, DefaultGetter && get_default) const { - if (false) {} -#define DISPATCH(TYPE) \ + if (false) {} // NOLINT +#define DISPATCH(TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) \ getItemsNumberImpl(attribute, ids, out, std::forward(get_default)); DISPATCH(UInt8) @@ -567,8 +553,7 @@ void CacheDictionary::getItemsNumber( DISPATCH(Float32) DISPATCH(Float64) #undef DISPATCH - else - throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); + else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); } template @@ -586,8 +571,6 @@ void CacheDictionary::getItemsNumberImpl( size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows)) @@ -618,10 +601,6 @@ void CacheDictionary::getItemsNumberImpl( } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); @@ -629,23 +608,21 @@ void CacheDictionary::getItemsNumberImpl( return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; + update( + required_ids, + [&](const auto id, const auto cell_idx) { + const auto attribute_value = attribute_array[cell_idx]; - for (const auto row : outdated_ids[id]) - out[row] = static_cast(attribute_value); - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = get_default(row); - }); + for (const auto row : outdated_ids[id]) + out[row] = static_cast(attribute_value); // NOLINT + }, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + out[row] = get_default(row); + }); } template @@ -666,8 +643,6 @@ void CacheDictionary::getItemsString( /// perform optimistic version, fallback to pessimistic if failed { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, discard on fail for (const auto row : ext::range(0, rows)) @@ -710,8 +685,6 @@ void CacheDictionary::getItemsString( size_t total_length = 0; size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); for (const auto row : ext::range(0, ids.size())) { @@ -741,10 +714,6 @@ void CacheDictionary::getItemsString( } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); @@ -752,22 +721,20 @@ void CacheDictionary::getItemsString( if (!outdated_ids.empty()) { std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); - - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; - - map[id] = String{attribute_value}; - total_length += (attribute_value.size + 1) * outdated_ids[id].size(); - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - total_length += get_default(row).size + 1; - }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); + + update( + required_ids, + [&](const auto id, const auto cell_idx) { + const auto attribute_value = attribute_array[cell_idx]; + + map[id] = String{attribute_value}; + total_length += (attribute_value.size + 1) * outdated_ids[id].size(); + }, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + total_length += get_default(row).size + 1; + }); } out->getChars().reserve(total_length); @@ -790,18 +757,13 @@ void CacheDictionary::update( { std::unordered_map remaining_ids{requested_ids.size()}; for (const auto id : requested_ids) - remaining_ids.insert({ id, 0 }); + remaining_ids.insert({id, 0}); - std::uniform_int_distribution distribution - { + std::uniform_int_distribution distribution{ dict_lifetime.min_sec, - dict_lifetime.max_sec - }; - - const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + dict_lifetime.max_sec}; { - CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; Stopwatch watch; auto stream = source_ptr->loadIds(requested_ids); stream->readPrefix(); @@ -810,7 +772,7 @@ void CacheDictionary::update( while (const auto block = stream->read()) { - const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); + const auto * id_column = typeid_cast(block.safeGetByPosition(0).column.get()); if (!id_column) throw Exception{ name + ": id column has type different from UInt64.", @@ -819,8 +781,7 @@ void CacheDictionary::update( const auto & ids = id_column->getData(); /// cache column pointers - const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block] (size_t i) - { + const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); @@ -859,9 +820,6 @@ void CacheDictionary::update( } stream->readSuffix(); - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); - ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); } size_t not_found_num = 0, found_num = 0; @@ -903,10 +861,6 @@ void CacheDictionary::update( /// inform caller that the cell has not been found on_id_not_found(id, cell_idx); } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); - ProfileEvents::increment(ProfileEvents::DictCacheRequests); } @@ -914,32 +868,54 @@ void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::String: - { - const auto & null_value_ref = std::get(attribute.null_values); - auto & string_ref = std::get>(attribute.arrays)[idx]; - - if (string_ref.data != null_value_ref.data()) - { - if (string_ref.data) - string_arena->free(const_cast(string_ref.data), string_ref.size); + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::String: + { + const auto & null_value_ref = std::get(attribute.null_values); + auto & string_ref = std::get>(attribute.arrays)[idx]; - string_ref = StringRef{null_value_ref}; - } + if (string_ref.data != null_value_ref.data()) + { + if (string_ref.data) + string_arena->free(const_cast(string_ref.data), string_ref.size); - break; + string_ref = StringRef{null_value_ref}; } + + break; + } } } @@ -947,39 +923,61 @@ void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, co { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::String: - { - const auto & string = value.get(); - auto & string_ref = std::get>(attribute.arrays)[idx]; - const auto & null_value_ref = std::get(attribute.null_values); - - /// free memory unless it points to a null_value - if (string_ref.data && string_ref.data != null_value_ref.data()) - string_arena->free(const_cast(string_ref.data), string_ref.size); + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::String: + { + const auto & string = value.get(); + auto & string_ref = std::get>(attribute.arrays)[idx]; + const auto & null_value_ref = std::get(attribute.null_values); - const auto size = string.size(); - if (size != 0) - { - auto string_ptr = string_arena->alloc(size + 1); - std::copy(string.data(), string.data() + size + 1, string_ptr); - string_ref = StringRef{string_ptr, size}; - } - else - string_ref = {}; + /// free memory unless it points to a null_value + if (string_ref.data && string_ref.data != null_value_ref.data()) + string_arena->free(const_cast(string_ref.data), string_ref.size); - break; + const auto size = string.size(); + if (size != 0) + { + auto * string_ptr = string_arena->alloc(size + 1); + std::copy(string.data(), string.data() + size + 1, string_ptr); + string_ref = StringRef{string_ptr, size}; } + else + string_ref = {}; + + break; + } } } @@ -989,22 +987,18 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a if (it == std::end(attribute_index_by_name)) throw Exception{ name + ": no such attribute '" + attribute_name + "'", - ErrorCodes::BAD_ARGUMENTS - }; + ErrorCodes::BAD_ARGUMENTS}; return attributes[it->second]; } bool CacheDictionary::isEmptyCell(const UInt64 idx) const { - return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data - == ext::safe_bit_cast(CellMetadata::time_point_t())); + return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t())); } PaddedPODArray CacheDictionary::getCachedIds() const { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - PaddedPODArray array; for (size_t idx = 0; idx < cells.size(); ++idx) { @@ -1024,4 +1018,4 @@ BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_na } -} +} // namespace DB diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index 330ee036136..fb9a94b29a0 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -12,48 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include #include -#include -#include -#include -#include #include -#include -#include - - -namespace ProfileEvents -{ - - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; -} - -namespace CurrentMetrics -{ - extern const Metric DictCacheRequests; -} +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { - namespace ErrorCodes { - extern const int TYPE_MISMATCH; - extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; -} +extern const int TYPE_MISMATCH; +extern const int BAD_ARGUMENTS; +extern const int UNSUPPORTED_METHOD; +} // namespace ErrorCodes inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const @@ -64,13 +43,14 @@ inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const } -ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, - const size_t size) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - rnd_engine(randomSeed()) +ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const size_t size) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{ @@ -85,7 +65,9 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const ComplexKeyCacheDictio {} void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, ColumnString * out) const { dict_struct.validateKeyTypes(key_types); @@ -98,12 +80,15 @@ void ComplexKeyCacheDictionary::getString( const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); @@ -113,12 +98,15 @@ void ComplexKeyCacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); @@ -128,7 +116,7 @@ void ComplexKeyCacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; }); } /// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag, @@ -190,8 +178,6 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows_num)) @@ -220,9 +206,6 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes } } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); query_count.fetch_add(rows_num, std::memory_order_relaxed); hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release); @@ -231,18 +214,18 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes return; std::vector required_rows(outdated_keys.size()); - std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), - [] (auto & pair) { return pair.getMapped().front(); }); + std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); /// request new values - update(key_columns, keys_array, required_rows, - [&] (const StringRef key, const auto) - { + update( + key_columns, + keys_array, + required_rows, + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = true; }, - [&] (const StringRef key, const auto) - { + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = false; }); @@ -297,8 +280,11 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Pool & pool) + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Pool & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; @@ -337,25 +323,32 @@ StringRef ComplexKeyCacheDictionary::placeKeysInPool( } } - return { place, sum_keys_size }; + return {place, sum_keys_size}; } /// Explicit instantiations. template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Arena & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Arena & pool); template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, ArenaWithFreeLists & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + ArenaWithFreeLists & pool); StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( - const size_t row, const Columns & key_columns) const + const size_t row, + const Columns & key_columns) const { - const auto res = fixed_size_keys_pool->alloc(); - auto place = res; + auto * const res = fixed_size_keys_pool->alloc(); + auto * place = res; for (const auto & key_column : key_columns) { @@ -364,36 +357,33 @@ StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( place += key.size; } - return { res, key_size }; + return {res, key_size}; } StringRef ComplexKeyCacheDictionary::copyIntoArena(StringRef src, Arena & arena) { char * allocated = arena.alloc(src.size); memcpy(allocated, src.data, src.size); - return { allocated, src.size }; + return {allocated, src.size}; } StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const { - const auto res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size); + auto * const res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size); memcpy(res, key.data, key.size); - return { res, key.size }; + return {res, key.size}; } bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const { - return (cells[idx].key == StringRef{} && (idx != zero_cell_idx - || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); + return (cells[idx].key == StringRef{} && (idx != zero_cell_idx || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); } BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const { std::vector keys; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - for (auto idx : ext::range(0, cells.size())) if (!isEmptyCell(idx) && !cells[idx].isDefault()) @@ -404,4 +394,4 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & return std::make_shared(shared_from_this(), max_block_size, keys, column_names); } -} +} // namespace DB diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index feb61261f1d..19fe5214fef 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -14,12 +14,6 @@ #pragma once -#include -#include -#include -#include -#include -#include #include #include #include @@ -29,24 +23,17 @@ #include #include #include + +#include +#include #include #include #include +#include #include - - -namespace ProfileEvents -{ -extern const Event DictCacheKeysRequested; -extern const Event DictCacheKeysRequestedMiss; -extern const Event DictCacheKeysRequestedFound; -extern const Event DictCacheKeysExpired; -extern const Event DictCacheKeysNotFound; -extern const Event DictCacheKeysHit; -extern const Event DictCacheRequestTimeNs; -extern const Event DictCacheLockWriteNs; -extern const Event DictCacheLockReadNs; -} +#include +#include +#include namespace DB { @@ -54,10 +41,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase { public: ComplexKeyCacheDictionary(const std::string & name, - const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime, - const size_t size); + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, + const size_t size); ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other); @@ -144,9 +131,12 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase /// In all functions below, key_columns must be full (non-constant) columns. /// See the requirement in IDataType.h for text-serialization functions. -#define DECLARE(TYPE) \ - void get##TYPE( \ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + PaddedPODArray & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -164,10 +154,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const PaddedPODArray & def, \ - PaddedPODArray & out) const; + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + PaddedPODArray & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -182,17 +172,17 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase #undef DECLARE void getString(const std::string & attribute_name, - const Columns & key_columns, - const DataTypes & key_types, - const ColumnString * const def, - ColumnString * const out) const; + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const; #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const TYPE def, \ - PaddedPODArray & out) const; + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + PaddedPODArray & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -207,10 +197,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase #undef DECLARE void getString(const std::string & attribute_name, - const Columns & key_columns, - const DataTypes & key_types, - const String & def, - ColumnString * const out) const; + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const; void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; @@ -263,17 +253,17 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase AttributeUnderlyingType type; std::tuple null_values; std::tuple, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType> + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType> arrays; }; @@ -283,7 +273,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase template void getItemsNumber( - Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const + Attribute & attribute, + const Columns & key_columns, + PaddedPODArray & out, + DefaultGetter && get_default) const { if (false) { @@ -308,7 +301,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase template void getItemsNumberImpl( - Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const + Attribute & attribute, + const Columns & key_columns, + PaddedPODArray & out, + DefaultGetter && get_default) const { /// Mapping: -> { all indices `i` of `key_columns` such that `key_columns[i]` = } MapType> outdated_keys; @@ -322,8 +318,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows_num)) @@ -354,9 +348,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase } } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); query_count.fetch_add(rows_num, std::memory_order_relaxed); hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release); @@ -365,19 +356,21 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase std::vector required_rows(outdated_keys.size()); std::transform( - std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); + std::begin(outdated_keys), + std::end(outdated_keys), + std::begin(required_rows), + [](auto & pair) { return pair.getMapped().front(); }); /// request new values - update(key_columns, + update( + key_columns, keys_array, required_rows, - [&](const StringRef key, const size_t cell_idx) - { + [&](const StringRef key, const size_t cell_idx) { for (const auto row : outdated_keys[key]) out[row] = static_cast(attribute_array[cell_idx]); }, - [&](const StringRef key, const size_t) - { + [&](const StringRef key, const size_t) { for (const auto row : outdated_keys[key]) out[row] = get_default(row); }); @@ -400,8 +393,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase /// perform optimistic version, fallback to pessimistic if failed { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, discard on fail for (const auto row : ext::range(0, rows_num)) @@ -446,8 +437,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase size_t total_length = 0; size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); for (const auto row : ext::range(0, rows_num)) { @@ -477,9 +466,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase } } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); query_count.fetch_add(rows_num, std::memory_order_relaxed); hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release); @@ -488,16 +474,15 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase if (!outdated_keys.empty()) { std::vector required_rows(outdated_keys.size()); - std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) - { + std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); - update(key_columns, + update( + key_columns, keys_array, required_rows, - [&](const StringRef key, const size_t cell_idx) - { + [&](const StringRef key, const size_t cell_idx) { const StringRef attribute_value = attribute_array[cell_idx]; /// We must copy key and value to own memory, because it may be replaced with another @@ -508,8 +493,7 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase map[copied_key] = copied_value; total_length += (attribute_value.size + 1) * outdated_keys[key].size(); }, - [&](const StringRef key, const size_t) - { + [&](const StringRef key, const size_t) { for (const auto row : outdated_keys[key]) total_length += get_default(row).size + 1; }); @@ -521,17 +505,17 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase { const StringRef key = keys_array[row]; const auto it = map.find(key); - const auto string_ref = it != std::end(map) ? it->getMapped(): get_default(row); + const auto string_ref = it != std::end(map) ? it->getMapped() : get_default(row); out->insertData(string_ref.data, string_ref.size); } }; template void update(const Columns & in_key_columns, - const PODArray & in_keys, - const std::vector & in_requested_rows, - PresentKeyHandler && on_cell_updated, - AbsentKeyHandler && on_key_not_found) const + const PODArray & in_keys, + const std::vector & in_requested_rows, + PresentKeyHandler && on_cell_updated, + AbsentKeyHandler && on_key_not_found) const { MapType remaining_keys{in_requested_rows.size()}; for (const auto row : in_requested_rows) @@ -539,7 +523,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase std::uniform_int_distribution distribution(dict_lifetime.min_sec, dict_lifetime.max_sec); - const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; { Stopwatch watch; auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows); @@ -555,10 +538,11 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase { /// cache column pointers const auto key_columns = ext::map( - ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); + ext::range(0, keys_size), + [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); const auto attribute_columns = ext::map(ext::range(0, attributes_size), - [&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; }); + [&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; }); const auto rows_num = block.rows(); @@ -612,9 +596,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase } stream->readSuffix(); - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size()); - ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); } size_t found_num = 0; @@ -671,9 +652,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase /// inform caller that the cell has not been found on_key_not_found(key, cell_idx); } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, found_num); - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); }; UInt64 getCellIdx(const StringRef key) const; @@ -690,10 +668,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase template static StringRef placeKeysInPool(const size_t row, - const Columns & key_columns, - StringRefs & keys, - const std::vector & key_attributes, - Arena & pool); + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Arena & pool); StringRef placeKeysInFixedSizePool(const size_t row, const Columns & key_columns) const; @@ -752,4 +730,4 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase const std::chrono::time_point creation_time = std::chrono::system_clock::now(); }; -} +} // namespace DB diff --git a/dbms/src/Encryption/WriteBufferFromFileProvider.cpp b/dbms/src/Encryption/WriteBufferFromFileProvider.cpp index 4c99b8e24b1..a17dd85d379 100644 --- a/dbms/src/Encryption/WriteBufferFromFileProvider.cpp +++ b/dbms/src/Encryption/WriteBufferFromFileProvider.cpp @@ -19,7 +19,6 @@ namespace ProfileEvents { extern const Event WriteBufferFromFileDescriptorWrite; -extern const Event WriteBufferFromFileDescriptorWriteFailed; extern const Event WriteBufferFromFileDescriptorWriteBytes; } // namespace ProfileEvents @@ -72,8 +71,7 @@ void WriteBufferFromFileProvider::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - throwFromErrno("Cannot write to file " + getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); + throwFromErrno("Cannot write to file " + getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); // NOLINT } if (res > 0) diff --git a/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp b/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp index b76d58c20cd..1858d474d60 100644 --- a/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp +++ b/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp @@ -20,10 +20,6 @@ #endif #include #include -namespace ProfileEvents -{ -extern const Event CreatedReadBufferOrdinary; -} namespace DB { @@ -46,7 +42,6 @@ std::unique_ptr createReadBufferFromFileBaseByFileProvid { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); return std::make_unique( file_provider, filename_, @@ -75,7 +70,6 @@ createReadBufferFromFileBaseByFileProvider( size_t checksum_frame_size, int flags_) { - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); auto file = file_provider->newRandomAccessFile(filename_, encryption_path_, read_limiter, flags_); auto allocation_size = std::min(estimated_size, checksum_frame_size); switch (checksum_algorithm) diff --git a/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp b/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp index 2f1a2cbaeb8..5e8a6940598 100644 --- a/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp +++ b/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp @@ -20,11 +20,6 @@ #include #include -namespace ProfileEvents -{ -extern const Event CreatedWriteBufferOrdinary; -} - namespace DB { namespace ErrorCodes @@ -49,7 +44,6 @@ createWriteBufferFromFileBaseByFileProvider( { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferOrdinary); return std::make_unique( file_provider, filename_, @@ -81,7 +75,6 @@ createWriteBufferFromFileBaseByFileProvider( int flags_, mode_t mode) { - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferOrdinary); auto file_ptr = file_provider->newWritableFile(filename_, encryption_path_, true, create_new_encryption_info_, write_limiter_, flags_, mode); switch (checksum_algorithm) diff --git a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp index a1c6061948a..1609c83b029 100644 --- a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp +++ b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -41,7 +40,7 @@ extern const int NOT_IMPLEMENTED; const IColumn * getNestedCol(const IColumn * flash_col) { if (flash_col->isColumnNullable()) - return dynamic_cast(flash_col)->getNestedColumnPtr().get(); + return static_cast(flash_col)->getNestedColumnPtr().get(); else return flash_col; } @@ -75,8 +74,8 @@ bool flashDecimalColToArrowColInternal( const IColumn * nested_col = getNestedCol(flash_col_untyped); if (checkColumn>(nested_col) && checkDataType>(data_type)) { - const ColumnDecimal * flash_col = checkAndGetColumn>(nested_col); - const DataTypeDecimal * type = checkAndGetDataType>(data_type); + const auto * flash_col = checkAndGetColumn>(nested_col); + const auto * type = checkAndGetDataType>(data_type); UInt32 scale = type->getScale(); for (size_t i = start_index; i < end_index; i++) { @@ -92,8 +91,8 @@ bool flashDecimalColToArrowColInternal( std::vector digits; digits.reserve(type->getPrec()); decimalToVector(dec.value, digits, scale); - TiDBDecimal tiDecimal(scale, digits, dec.value < 0); - dag_column.append(tiDecimal); + TiDBDecimal ti_decimal(scale, digits, dec.value < 0); + dag_column.append(ti_decimal); } return true; } @@ -121,7 +120,7 @@ template bool flashIntegerColToArrowColInternal(TiDBColumn & dag_column, const IColumn * flash_col_untyped, size_t start_index, size_t end_index) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - if (const ColumnVector * flash_col = checkAndGetColumn>(nested_col)) + if (const auto * flash_col = checkAndGetColumn>(nested_col)) { constexpr bool is_unsigned = std::is_unsigned_v; for (size_t i = start_index; i < end_index; i++) @@ -135,9 +134,9 @@ bool flashIntegerColToArrowColInternal(TiDBColumn & dag_column, const IColumn * } } if constexpr (is_unsigned) - dag_column.append((UInt64)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); else - dag_column.append((Int64)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); } return true; } @@ -148,7 +147,7 @@ template void flashDoubleColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col_untyped, size_t start_index, size_t end_index) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - if (const ColumnVector * flash_col = checkAndGetColumn>(nested_col)) + if (const auto * flash_col = checkAndGetColumn>(nested_col)) { for (size_t i = start_index; i < end_index; i++) { @@ -160,7 +159,7 @@ void flashDoubleColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col continue; } } - dag_column.append((T)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); } return; } @@ -196,7 +195,7 @@ void flashDateOrDateTimeColToArrowCol( { const IColumn * nested_col = getNestedCol(flash_col_untyped); using DateFieldType = DataTypeMyTimeBase::FieldType; - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); for (size_t i = start_index; i < end_index; i++) { if constexpr (is_nullable) @@ -217,7 +216,7 @@ void flashStringColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col { const IColumn * nested_col = getNestedCol(flash_col_untyped); // columnFixedString is not used so do not check it - auto * flash_col = checkAndGetColumn(nested_col); + const auto * flash_col = checkAndGetColumn(nested_col); for (size_t i = start_index; i < end_index; i++) { // todo check if we can convert flash_col to DAG col directly since the internal representation is almost the same @@ -242,7 +241,7 @@ void flashBitColToArrowCol( const tipb::FieldType & field_type) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); for (size_t i = start_index; i < end_index; i++) { if constexpr (is_nullable) @@ -267,7 +266,7 @@ void flashEnumColToArrowCol( const IDataType * data_type) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); const auto * enum_type = checkAndGetDataType(data_type); size_t enum_value_size = enum_type->getValues().size(); for (size_t i = start_index; i < end_index; i++) @@ -280,10 +279,10 @@ void flashEnumColToArrowCol( continue; } } - auto enum_value = (UInt64)flash_col->getElement(i); + auto enum_value = static_cast(flash_col->getElement(i)); if (enum_value == 0 || enum_value > enum_value_size) throw TiFlashException("number of enum overflow enum boundary", Errors::Coprocessor::Internal); - TiDBEnum ti_enum(enum_value, enum_type->getNameForValue((const DataTypeEnum16::FieldType)enum_value)); + TiDBEnum ti_enum(enum_value, enum_type->getNameForValue(static_cast(enum_value))); dag_column.append(ti_enum); } } @@ -300,7 +299,7 @@ void flashColToArrowCol(TiDBColumn & dag_column, const ColumnWithTypeAndName & f throw TiFlashException("Flash column and TiDB column has different not null flag", Errors::Coprocessor::Internal); } if (type->isNullable()) - type = dynamic_cast(type)->getNestedType().get(); + type = static_cast(type)->getNestedType().get(); switch (tidb_column_info.tp) { @@ -457,7 +456,7 @@ const char * arrowEnumColToFlashCol( { if (checkNull(i, null_count, null_bitmap, col)) continue; - const auto enum_value = (Int64)toLittleEndian(*(reinterpret_cast(pos + offsets[i]))); + const auto enum_value = static_cast(toLittleEndian(*(reinterpret_cast(pos + offsets[i])))); col.column->assumeMutable()->insert(Field(enum_value)); } return pos + offsets[length]; @@ -479,11 +478,11 @@ const char * arrowBitColToFlashCol( continue; const String value = String(pos + offsets[i], pos + offsets[i + 1]); if (value.length() == 0) - col.column->assumeMutable()->insert(Field(UInt64(0))); + col.column->assumeMutable()->insert(Field(static_cast(0))); UInt64 result = 0; - for (auto & c : value) + for (const auto & c : value) { - result = (result << 8u) | (UInt8)c; + result = (result << 8u) | static_cast(c); } col.column->assumeMutable()->insert(Field(result)); } @@ -500,7 +499,7 @@ T toCHDecimal(UInt8 digits_int, UInt8 digits_frac, bool negative, const Int32 * UInt8 tailing_digit = digits_frac % DIGITS_PER_WORD; typename T::NativeType value = 0; - const int word_max = int(1e9); + const int word_max = static_cast(1e9); for (int i = 0; i < word_int; i++) { value = value * word_max + word_buf[i]; @@ -552,28 +551,28 @@ const char * arrowDecimalColToFlashCol( pos += 1; Int32 word_buf[MAX_WORD_BUF_LEN]; const DataTypePtr decimal_type - = col.type->isNullable() ? dynamic_cast(col.type.get())->getNestedType() : col.type; - for (int j = 0; j < MAX_WORD_BUF_LEN; j++) + = col.type->isNullable() ? static_cast(col.type.get())->getNestedType() : col.type; + for (int & j : word_buf) { - word_buf[j] = toLittleEndian(*(reinterpret_cast(pos))); + j = toLittleEndian(*(reinterpret_cast(pos))); pos += 4; } - if (auto * type32 = checkDecimal(*decimal_type)) + if (const auto * type32 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type32->getScale())); } - else if (auto * type64 = checkDecimal(*decimal_type)) + else if (const auto * type64 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type64->getScale())); } - else if (auto * type128 = checkDecimal(*decimal_type)) + else if (const auto * type128 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type128->getScale())); } - else if (auto * type256 = checkDecimal(*decimal_type)) + else if (const auto * type256 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type256->getScale())); @@ -600,13 +599,13 @@ const char * arrowDateColToFlashCol( continue; } UInt64 chunk_time = toLittleEndian(*(reinterpret_cast(pos))); - UInt16 year = (UInt16)((chunk_time & MyTimeBase::YEAR_BIT_FIELD_MASK) >> MyTimeBase::YEAR_BIT_FIELD_OFFSET); - UInt8 month = (UInt8)((chunk_time & MyTimeBase::MONTH_BIT_FIELD_MASK) >> MyTimeBase::MONTH_BIT_FIELD_OFFSET); - UInt8 day = (UInt8)((chunk_time & MyTimeBase::DAY_BIT_FIELD_MASK) >> MyTimeBase::DAY_BIT_FIELD_OFFSET); - UInt16 hour = (UInt16)((chunk_time & MyTimeBase::HOUR_BIT_FIELD_MASK) >> MyTimeBase::HOUR_BIT_FIELD_OFFSET); - UInt8 minute = (UInt8)((chunk_time & MyTimeBase::MINUTE_BIT_FIELD_MASK) >> MyTimeBase::MINUTE_BIT_FIELD_OFFSET); - UInt8 second = (UInt8)((chunk_time & MyTimeBase::SECOND_BIT_FIELD_MASK) >> MyTimeBase::SECOND_BIT_FIELD_OFFSET); - UInt32 micro_second = (UInt32)((chunk_time & MyTimeBase::MICROSECOND_BIT_FIELD_MASK) >> MyTimeBase::MICROSECOND_BIT_FIELD_OFFSET); + auto year = static_cast((chunk_time & MyTimeBase::YEAR_BIT_FIELD_MASK) >> MyTimeBase::YEAR_BIT_FIELD_OFFSET); + auto month = static_cast((chunk_time & MyTimeBase::MONTH_BIT_FIELD_MASK) >> MyTimeBase::MONTH_BIT_FIELD_OFFSET); + auto day = static_cast((chunk_time & MyTimeBase::DAY_BIT_FIELD_MASK) >> MyTimeBase::DAY_BIT_FIELD_OFFSET); + auto hour = static_cast((chunk_time & MyTimeBase::HOUR_BIT_FIELD_MASK) >> MyTimeBase::HOUR_BIT_FIELD_OFFSET); + auto minute = static_cast((chunk_time & MyTimeBase::MINUTE_BIT_FIELD_MASK) >> MyTimeBase::MINUTE_BIT_FIELD_OFFSET); + auto second = static_cast((chunk_time & MyTimeBase::SECOND_BIT_FIELD_MASK) >> MyTimeBase::SECOND_BIT_FIELD_OFFSET); + auto micro_second = static_cast((chunk_time & MyTimeBase::MICROSECOND_BIT_FIELD_MASK) >> MyTimeBase::MICROSECOND_BIT_FIELD_OFFSET); MyDateTime mt(year, month, day, hour, minute, second, micro_second); pos += field_length; col.column->assumeMutable()->insert(Field(mt.toPackedUInt())); @@ -659,7 +658,7 @@ const char * arrowNumColToFlashCol( case TiDB::TypeFloat: u32 = toLittleEndian(*(reinterpret_cast(pos))); std::memcpy(&f32, &u32, sizeof(Float32)); - col.column->assumeMutable()->insert(Field((Float64)f32)); + col.column->assumeMutable()->insert(Field(static_cast(f32))); break; case TiDB::TypeDouble: u64 = toLittleEndian(*(reinterpret_cast(pos))); diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index 1736e0b6cec..ec0544c6ee4 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -205,60 +205,21 @@ void DAGContext::attachBlockIO(const BlockIO & io_) { io = io_; } -void DAGContext::initExchangeReceiverIfMPP(Context & context, size_t max_streams) -{ - if (isMPPTask()) - { - if (mpp_exchange_receiver_map_inited) - throw TiFlashException("Repeatedly initialize mpp_exchange_receiver_map", Errors::Coprocessor::Internal); - traverseExecutors(dag_request, [&](const tipb::Executor & executor) { - if (executor.tp() == tipb::ExecType::TypeExchangeReceiver) - { - assert(executor.has_executor_id()); - const auto & executor_id = executor.executor_id(); - // In order to distinguish different exchange receivers. - auto exchange_receiver = std::make_shared( - std::make_shared( - executor.exchange_receiver(), - getMPPTaskMeta(), - context.getTMTContext().getKVCluster(), - context.getTMTContext().getMPPTaskManager(), - context.getSettingsRef().enable_local_tunnel, - context.getSettingsRef().enable_async_grpc_client), - executor.exchange_receiver().encoded_task_meta_size(), - max_streams, - log->identifier(), - executor_id); - mpp_exchange_receiver_map[executor_id] = exchange_receiver; - new_thread_count_of_exchange_receiver += exchange_receiver->computeNewThreadCount(); - } - return true; - }); - mpp_exchange_receiver_map_inited = true; - } -} - -const std::unordered_map> & DAGContext::getMPPExchangeReceiverMap() const +ExchangeReceiverPtr DAGContext::getMPPExchangeReceiver(const String & executor_id) const { if (!isMPPTask()) throw TiFlashException("mpp_exchange_receiver_map is used in mpp only", Errors::Coprocessor::Internal); - if (!mpp_exchange_receiver_map_inited) - throw TiFlashException("mpp_exchange_receiver_map has not been initialized", Errors::Coprocessor::Internal); - return mpp_exchange_receiver_map; + RUNTIME_ASSERT(mpp_receiver_set != nullptr, log, "MPPTask without receiver set"); + return mpp_receiver_set->getExchangeReceiver(executor_id); } -void DAGContext::cancelAllExchangeReceiver() +void DAGContext::addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader) { - for (auto & it : mpp_exchange_receiver_map) - { - it.second->cancel(); - } -} - -int DAGContext::getNewThreadCountOfExchangeReceiver() const -{ - return new_thread_count_of_exchange_receiver; + if (!isMPPTask()) + return; + RUNTIME_ASSERT(mpp_receiver_set != nullptr, log, "MPPTask without receiver set"); + return mpp_receiver_set->addCoprocessorReader(coprocessor_reader); } bool DAGContext::containsRegionsInfoForTable(Int64 table_id) const diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index bc00eee12e1..a50a4d4007b 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -37,6 +37,13 @@ namespace DB class Context; class MPPTunnelSet; class ExchangeReceiver; +using ExchangeReceiverPtr = std::shared_ptr; +/// key: executor_id of ExchangeReceiver nodes in dag. +using ExchangeReceiverMap = std::unordered_map; +class MPPReceiverSet; +using MPPReceiverSetPtr = std::shared_ptr; +class CoprocessorReader; +using CoprocessorReaderPtr = std::shared_ptr; class Join; using JoinPtr = std::shared_ptr; @@ -254,7 +261,6 @@ class DAGContext return io; } - int getNewThreadCountOfExchangeReceiver() const; UInt64 getFlags() const { return flags; @@ -303,10 +309,12 @@ class DAGContext bool columnsForTestEmpty() { return columns_for_test_map.empty(); } - void cancelAllExchangeReceiver(); - - void initExchangeReceiverIfMPP(Context & context, size_t max_streams); - const std::unordered_map> & getMPPExchangeReceiverMap() const; + ExchangeReceiverPtr getMPPExchangeReceiver(const String & executor_id) const; + void setMPPReceiverSet(const MPPReceiverSetPtr & receiver_set) + { + mpp_receiver_set = receiver_set; + } + void addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader); void addSubquery(const String & subquery_id, SubqueryForSet && subquery); bool hasSubquery() const { return !subqueries.empty(); } @@ -371,10 +379,8 @@ class DAGContext ConcurrentBoundedQueue warnings; /// warning_count is the actual warning count during the entire execution std::atomic warning_count; - int new_thread_count_of_exchange_receiver = 0; - /// key: executor_id of ExchangeReceiver nodes in dag. - std::unordered_map> mpp_exchange_receiver_map; - bool mpp_exchange_receiver_map_inited = false; + + MPPReceiverSetPtr mpp_receiver_set; /// vector of SubqueriesForSets(such as join build subquery). /// The order of the vector is also the order of the subquery. std::vector subqueries; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 9f201006a88..046088ab2b2 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -153,7 +153,9 @@ class DAGExpressionAnalyzer : private boost::noncopyable const tipb::Window & window, size_t window_columns_start_index); +#ifndef DBMS_PUBLIC_GTEST private: +#endif NamesAndTypes buildOrderColumns( const ExpressionActionsPtr & actions, const ::google::protobuf::RepeatedPtrField & order_by); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 86d6428c92a..e322a830744 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -481,14 +481,14 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) { - auto it = dagContext().getMPPExchangeReceiverMap().find(query_block.source_name); - if (unlikely(it == dagContext().getMPPExchangeReceiverMap().end())) + auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name); + if (unlikely(exchange_receiver == nullptr)) throw Exception("Can not find exchange receiver for " + query_block.source_name, ErrorCodes::LOGICAL_ERROR); // todo choose a more reasonable stream number auto & exchange_receiver_io_input_streams = dagContext().getInBoundIOInputStreamsMap()[query_block.source_name]; for (size_t i = 0; i < max_streams; ++i) { - BlockInputStreamPtr stream = std::make_shared(it->second, log->identifier(), query_block.source_name); + BlockInputStreamPtr stream = std::make_shared(exchange_receiver, log->identifier(), query_block.source_name); exchange_receiver_io_input_streams.push_back(stream); stream = std::make_shared(stream, 8192, 0, log->identifier()); stream->setExtraInfo("squashing after exchange receiver"); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 0b3b2db9623..e68c4f91cee 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -54,7 +54,9 @@ class DAGQueryBlockInterpreter BlockInputStreams execute(); +#ifndef DBMS_PUBLIC_GTEST private: +#endif void executeImpl(DAGPipeline & pipeline); void handleMockTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); void handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index 14cddd94730..ad2de7217e0 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -486,6 +486,7 @@ void DAGStorageInterpreter::buildRemoteStreams(std::vector && rem std::vector tasks(all_tasks.begin() + task_start, all_tasks.begin() + task_end); auto coprocessor_reader = std::make_shared(schema, cluster, tasks, has_enforce_encode_type, 1); + context.getDAGContext()->addCoprocessorReader(coprocessor_reader); BlockInputStreamPtr input = std::make_shared(coprocessor_reader, log->identifier(), table_scan.getTableScanExecutorID()); pipeline.streams.push_back(input); task_start = task_end; diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index bea26fe9f99..9ffa29cd14d 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -1432,6 +1432,7 @@ tipb::EncodeType analyzeDAGEncodeType(DAGContext & dag_context) return tipb::EncodeType::TypeDefault; return encode_type; } + tipb::ScalarFuncSig reverseGetFuncSigByFuncName(const String & name) { static std::unordered_map func_name_sig_map = getFuncNameToSigMap(); diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index a67ebf20aa5..0e767d65d77 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -24,19 +24,8 @@ namespace DB InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) : context(context_) , dag(dag_) + , max_streams(context.getMaxStreams()) { - const Settings & settings = context.getSettingsRef(); - if (dagContext().isBatchCop() || (dagContext().isMPPTask() && !dagContext().isTest())) - max_streams = settings.max_threads; - else if (dagContext().isTest()) - max_streams = dagContext().initialize_concurrency; - else - max_streams = 1; - - if (max_streams > 1) - { - max_streams *= settings.max_streams_to_max_threads_ratio; - } } void setRestorePipelineConcurrency(DAGQueryBlock & query_block) @@ -75,10 +64,6 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) BlockIO InterpreterDAG::execute() { - /// Due to learner read, DAGQueryBlockInterpreter may take a long time to build - /// the query plan, so we init mpp exchange receiver before executeQueryBlock - dagContext().initExchangeReceiverIfMPP(context, max_streams); - BlockInputStreams streams = executeQueryBlock(*dag.getRootQueryBlock()); DAGPipeline pipeline; pipeline.streams = streams; diff --git a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp index 7183374a5c1..eef89696d3a 100644 --- a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp +++ b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp @@ -28,7 +28,7 @@ template void encodeLittleEndian(const T & value, WriteBuffer & ss) { auto v = toLittleEndian(value); - ss.write(reinterpret_cast(&v), sizeof(v)); + ss.template writeFixed(&v); } TiDBColumn::TiDBColumn(Int8 element_len_) @@ -141,10 +141,10 @@ void TiDBColumn::append(const TiDBDecimal & decimal) encodeLittleEndian(decimal.digits_int, *data); encodeLittleEndian(decimal.digits_frac, *data); encodeLittleEndian(decimal.result_frac, *data); - encodeLittleEndian((UInt8)decimal.negative, *data); - for (int i = 0; i < MAX_WORD_BUF_LEN; i++) + encodeLittleEndian(static_cast(decimal.negative), *data); + for (int i : decimal.word_buf) { - encodeLittleEndian(decimal.word_buf[i], *data); + encodeLittleEndian(i, *data); } finishAppendFixed(); } diff --git a/dbms/src/Flash/EstablishCall.cpp b/dbms/src/Flash/EstablishCall.cpp index 8af81e30962..89857a2407e 100644 --- a/dbms/src/Flash/EstablishCall.cpp +++ b/dbms/src/Flash/EstablishCall.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -19,6 +20,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_tunnel_init_rpc_failure_failpoint[]; +} // namespace FailPoints + EstablishCallData::EstablishCallData(AsyncFlashService * service, grpc::ServerCompletionQueue * cq, grpc::ServerCompletionQueue * notify_cq, const std::shared_ptr> & is_shutdown) : service(service) , cq(cq) @@ -71,6 +77,7 @@ void EstablishCallData::initRpc() std::exception_ptr eptr = nullptr; try { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_tunnel_init_rpc_failure_failpoint); service->establishMPPConnectionSyncOrAsync(&ctx, &request, nullptr, this); } catch (...) diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index f194afee31f..3b36adf2c40 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -22,6 +23,12 @@ namespace DB { +namespace FailPoints +{ +extern const char random_receiver_sync_msg_push_failure_failpoint[]; +extern const char random_receiver_async_msg_push_failure_failpoint[]; +} // namespace FailPoints + namespace { String getReceiverStateStr(const ExchangeReceiverState & s) @@ -257,7 +264,9 @@ class AsyncRequestHandler : public UnaryCallback recv_msg->packet = std::move(packet); recv_msg->source_index = request->source_index; recv_msg->req_info = req_info; - if (!msg_channel->push(std::move(recv_msg))) + bool push_success = msg_channel->push(std::move(recv_msg)); + fiu_do_on(FailPoints::random_receiver_async_msg_push_failure_failpoint, push_success = false;); + if (!push_success) return false; // can't reuse packet since it is sent to readers. packet = std::make_shared(); @@ -349,7 +358,7 @@ template void ExchangeReceiverBase::cancel() { setEndState(ExchangeReceiverState::CANCELED); - msg_channel.finish(); + msg_channel.cancel(); } template @@ -483,7 +492,9 @@ void ExchangeReceiverBase::readLoop(const Request & req) if (recv_msg->packet->has_error()) throw Exception("Exchange receiver meet error : " + recv_msg->packet->error().msg()); - if (!msg_channel.push(std::move(recv_msg))) + bool push_success = msg_channel.push(std::move(recv_msg)); + fiu_do_on(FailPoints::random_receiver_sync_msg_push_failure_failpoint, push_success = false;); + if (!push_success) { meet_error = true; auto local_state = getState(); diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp index a3096aaa644..7f97a1dd698 100644 --- a/dbms/src/Flash/Mpp/MPPHandler.cpp +++ b/dbms/src/Flash/Mpp/MPPHandler.cpp @@ -31,7 +31,7 @@ void MPPHandler::handleError(const MPPTaskPtr & task, String error) try { if (task) - task->cancel(error); + task->handleError(error); } catch (...) { diff --git a/dbms/src/Flash/Mpp/MPPReceiverSet.cpp b/dbms/src/Flash/Mpp/MPPReceiverSet.cpp new file mode 100644 index 00000000000..60cca308c18 --- /dev/null +++ b/dbms/src/Flash/Mpp/MPPReceiverSet.cpp @@ -0,0 +1,48 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +void MPPReceiverSet::addExchangeReceiver(const String & executor_id, const ExchangeReceiverPtr & exchange_receiver) +{ + RUNTIME_ASSERT(exchange_receiver_map.find(executor_id) == exchange_receiver_map.end(), log, "Duplicate executor_id: {} in DAGRequest", executor_id); + exchange_receiver_map[executor_id] = exchange_receiver; +} + +void MPPReceiverSet::addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader) +{ + coprocessor_readers.push_back(coprocessor_reader); +} + +ExchangeReceiverPtr MPPReceiverSet::getExchangeReceiver(const String & executor_id) const +{ + auto it = exchange_receiver_map.find(executor_id); + if (unlikely(it == exchange_receiver_map.end())) + return nullptr; + return it->second; +} + +void MPPReceiverSet::cancel() +{ + for (auto & it : exchange_receiver_map) + { + it.second->cancel(); + } + for (auto & cop_reader : coprocessor_readers) + cop_reader->cancel(); +} +} // namespace DB diff --git a/dbms/src/Flash/Mpp/MPPReceiverSet.h b/dbms/src/Flash/Mpp/MPPReceiverSet.h new file mode 100644 index 00000000000..44274cb3ce8 --- /dev/null +++ b/dbms/src/Flash/Mpp/MPPReceiverSet.h @@ -0,0 +1,44 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB +{ +class MPPReceiverSet +{ +public: + explicit MPPReceiverSet(const String & req_id) + : log(Logger::get("MPPReceiverSet", req_id)) + {} + void addExchangeReceiver(const String & executor_id, const ExchangeReceiverPtr & exchange_receiver); + void addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader); + ExchangeReceiverPtr getExchangeReceiver(const String & executor_id) const; + void cancel(); + +private: + /// two kinds of receiver in MPP + /// ExchangeReceiver: receiver data from other MPPTask + /// CoprocessorReader: used in remote read + ExchangeReceiverMap exchange_receiver_map; + std::vector coprocessor_readers; + const LoggerPtr log; +}; + +using MPPReceiverSetPtr = std::shared_ptr; + +} // namespace DB diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 8f9ca8e55e5..c2d5e6f49f8 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -22,11 +22,14 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -48,6 +51,7 @@ extern const char exception_before_mpp_register_tunnel_for_root_mpp_task[]; extern const char exception_during_mpp_register_tunnel_for_non_root_mpp_task[]; extern const char exception_during_mpp_write_err_to_tunnel[]; extern const char force_no_local_region_for_mpp_task[]; +extern const char random_task_lifecycle_failpoint[]; } // namespace FailPoints MPPTask::MPPTask(const mpp::TaskMeta & meta_, const ContextPtr & context_) @@ -77,6 +81,34 @@ MPPTask::~MPPTask() LOG_FMT_DEBUG(log, "finish MPPTask: {}", id.toString()); } +void MPPTask::abortTunnels(const String & message, AbortType abort_type) +{ + if (abort_type == AbortType::ONCANCELLATION) + { + closeAllTunnels(message); + } + else + { + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + tunnel_set->writeError(message); + } +} + +void MPPTask::abortReceivers() +{ + if (likely(receiver_set != nullptr)) + { + receiver_set->cancel(); + } +} + +void MPPTask::abortDataStreams(AbortType abort_type) +{ + /// When abort type is ONERROR, it means MPPTask already known it meet error, so let the remaining task stop silently to avoid too many useless error message + bool is_kill = abort_type == AbortType::ONCANCELLATION; + context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, is_kill); +} + void MPPTask::closeAllTunnels(const String & reason) { if (likely(tunnel_set)) @@ -94,13 +126,62 @@ void MPPTask::run() newThreadManager()->scheduleThenDetach(true, "MPPTask", [self = shared_from_this()] { self->runImpl(); }); } -void MPPTask::registerTunnel(const MPPTaskId & task_id, MPPTunnelPtr tunnel) +void MPPTask::registerTunnels(const mpp::DispatchTaskRequest & task_request) { - if (status == CANCELLED) - throw Exception("the tunnel " + tunnel->id() + " can not been registered, because the task is cancelled"); + tunnel_set = std::make_shared(log->identifier()); + std::chrono::seconds timeout(task_request.timeout()); + const auto & exchange_sender = dag_req.root_executor().exchange_sender(); - RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); - tunnel_set->registerTunnel(task_id, tunnel); + for (int i = 0; i < exchange_sender.encoded_task_meta_size(); ++i) + { + // exchange sender will register the tunnels and wait receiver to found a connection. + mpp::TaskMeta task_meta; + if (unlikely(!task_meta.ParseFromString(exchange_sender.encoded_task_meta(i)))) + throw TiFlashException("Failed to decode task meta info in ExchangeSender", Errors::Coprocessor::BadRequest); + bool is_local = context->getSettingsRef().enable_local_tunnel && meta.address() == task_meta.address(); + bool is_async = !is_local && context->getSettingsRef().enable_async_server; + MPPTunnelPtr tunnel = std::make_shared(task_meta, task_request.meta(), timeout, context->getSettingsRef().max_threads, is_local, is_async, log->identifier()); + LOG_FMT_DEBUG(log, "begin to register the tunnel {}", tunnel->id()); + if (status != INITIALIZING) + throw Exception(fmt::format("The tunnel {} can not be registered, because the task is not in initializing state", tunnel->id())); + tunnel_set->registerTunnel(MPPTaskId{task_meta.start_ts(), task_meta.task_id()}, tunnel); + if (!dag_context->isRootMPPTask()) + { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_register_tunnel_for_non_root_mpp_task); + } + } +} + +void MPPTask::initExchangeReceivers() +{ + receiver_set = std::make_shared(log->identifier()); + traverseExecutors(&dag_req, [&](const tipb::Executor & executor) { + if (executor.tp() == tipb::ExecType::TypeExchangeReceiver) + { + assert(executor.has_executor_id()); + const auto & executor_id = executor.executor_id(); + // In order to distinguish different exchange receivers. + auto exchange_receiver = std::make_shared( + std::make_shared( + executor.exchange_receiver(), + dag_context->getMPPTaskMeta(), + context->getTMTContext().getKVCluster(), + context->getTMTContext().getMPPTaskManager(), + context->getSettingsRef().enable_local_tunnel, + context->getSettingsRef().enable_async_grpc_client), + executor.exchange_receiver().encoded_task_meta_size(), + context->getMaxStreams(), + log->identifier(), + executor_id); + if (status != RUNNING) + throw Exception("exchange receiver map can not be initialized, because the task is not in running state"); + + receiver_set->addExchangeReceiver(executor_id, exchange_receiver); + new_thread_count_of_exchange_receiver += exchange_receiver->computeNewThreadCount(); + } + return true; + }); + dag_context->setMPPReceiverSet(receiver_set); } std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConnectionRequest * request) @@ -116,7 +197,7 @@ std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConn MPPTaskId receiver_id{request->receiver_meta().start_ts(), request->receiver_meta().task_id()}; RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); - auto tunnel_ptr = tunnel_set->getTunnelById(receiver_id); + auto tunnel_ptr = tunnel_set->getTunnelByReceiverTaskId(receiver_id); if (tunnel_ptr == nullptr) { auto err_msg = fmt::format( @@ -207,25 +288,8 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) } // register tunnels - tunnel_set = std::make_shared(log->identifier()); - std::chrono::seconds timeout(task_request.timeout()); + registerTunnels(task_request); - for (int i = 0; i < exchange_sender.encoded_task_meta_size(); i++) - { - // exchange sender will register the tunnels and wait receiver to found a connection. - mpp::TaskMeta task_meta; - if (!task_meta.ParseFromString(exchange_sender.encoded_task_meta(i))) - throw TiFlashException("Failed to decode task meta info in ExchangeSender", Errors::Coprocessor::BadRequest); - bool is_local = context->getSettingsRef().enable_local_tunnel && meta.address() == task_meta.address(); - bool is_async = !is_local && context->getSettingsRef().enable_async_server; - MPPTunnelPtr tunnel = std::make_shared(task_meta, task_request.meta(), timeout, context->getSettingsRef().max_threads, is_local, is_async, log->identifier()); - LOG_FMT_DEBUG(log, "begin to register the tunnel {}", tunnel->id()); - registerTunnel(MPPTaskId{task_meta.start_ts(), task_meta.task_id()}, tunnel); - if (!dag_context->isRootMPPTask()) - { - FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_register_tunnel_for_non_root_mpp_task); - } - } dag_context->tunnel_set = tunnel_set; // register task. auto task_manager = tmt_context.getMPPTaskManager(); @@ -251,6 +315,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) void MPPTask::preprocess() { auto start_time = Clock::now(); + initExchangeReceivers(); DAGQuerySource dag(*context); executeQuery(dag, *context, false, QueryProcessingStage::Complete); auto end_time = Clock::now(); @@ -280,7 +345,7 @@ void MPPTask::runImpl() LOG_FMT_INFO(log, "task starts preprocessing"); preprocess(); needed_threads = estimateCountOfNewThreads(); - LOG_FMT_DEBUG(log, "Estimate new thread count of query :{} including tunnel_threads: {} , receiver_threads: {}", needed_threads, dag_context->tunnel_set->getRemoteTunnelCnt(), dag_context->getNewThreadCountOfExchangeReceiver()); + LOG_FMT_DEBUG(log, "Estimate new thread count of query :{} including tunnel_threads: {} , receiver_threads: {}", needed_threads, dag_context->tunnel_set->getRemoteTunnelCnt(), new_thread_count_of_exchange_receiver); scheduleOrWait(); @@ -312,93 +377,122 @@ void MPPTask::runImpl() return_statistics.blocks, return_statistics.bytes); } - catch (Exception & e) - { - err_msg = e.displayText(); - LOG_FMT_ERROR(log, "task running meets error: {} Stack Trace : {}", err_msg, e.getStackTrace().toString()); - } - catch (pingcap::Exception & e) - { - err_msg = e.message(); - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); - } - catch (std::exception & e) - { - err_msg = e.what(); - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); - } catch (...) { - err_msg = "unrecovered error"; - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); + err_msg = getCurrentExceptionMessage(true); } + if (err_msg.empty()) { - // todo when error happens, should try to update the metrics if it is available - auto throughput = dag_context->getTableScanThroughput(); - if (throughput.first) - GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); - auto process_info = context->getProcessListElement()->getInfo(); - auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0; - GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_task).Observe(peak_memory); - mpp_task_statistics.setMemoryPeak(peak_memory); + if (switchStatus(RUNNING, FINISHED)) + LOG_INFO(log, "finish task"); + else + LOG_FMT_WARNING(log, "finish task which is in {} state", taskStatusToString(status)); + if (status == FINISHED) + { + // todo when error happens, should try to update the metrics if it is available + auto throughput = dag_context->getTableScanThroughput(); + if (throughput.first) + GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); + auto process_info = context->getProcessListElement()->getInfo(); + auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0; + GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_task).Observe(peak_memory); + mpp_task_statistics.setMemoryPeak(peak_memory); + } } else { - context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, true); - if (dag_context) - dag_context->cancelAllExchangeReceiver(); - writeErrToAllTunnels(err_msg); + if (status == RUNNING) + { + LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); + try + { + handleError(err_msg); + } + catch (...) + { + tryLogCurrentException(log, "Meet error while try to handle error in MPPTask"); + } + } } LOG_FMT_INFO(log, "task ends, time cost is {} ms.", stopwatch.elapsedMilliseconds()); - unregisterTask(); - - if (switchStatus(RUNNING, FINISHED)) - LOG_INFO(log, "finish task"); - else - LOG_WARNING(log, "finish task which was cancelled before"); + // unregister flag is only for FailPoint usage, to produce the situation that MPPTask is destructed + // by grpc CancelMPPTask thread; + bool unregister = true; + fiu_do_on(FailPoints::random_task_lifecycle_failpoint, { + if (!err_msg.empty()) + unregister = false; + }); + if (unregister) + unregisterTask(); - mpp_task_statistics.end(status.load(), err_msg); + mpp_task_statistics.end(status.load(), err_string); mpp_task_statistics.logTracingJson(); } -void MPPTask::writeErrToAllTunnels(const String & e) +void MPPTask::handleError(const String & error_msg) { - RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); - tunnel_set->writeError(e); + if (manager == nullptr || !manager->isTaskToBeCancelled(id)) + abort(error_msg, AbortType::ONERROR); } -void MPPTask::cancel(const String & reason) +void MPPTask::abort(const String & message, AbortType abort_type) { - CPUAffinityManager::getInstance().bindSelfQueryThread(); - LOG_FMT_WARNING(log, "Begin cancel task: {}", id.toString()); + String abort_type_string; + TaskStatus next_task_status; + switch (abort_type) + { + case AbortType::ONCANCELLATION: + abort_type_string = "ONCANCELLATION"; + next_task_status = CANCELLED; + break; + case AbortType::ONERROR: + abort_type_string = "ONERROR"; + next_task_status = FAILED; + break; + } + LOG_FMT_WARNING(log, "Begin abort task: {}, abort type: {}", id.toString(), abort_type_string); while (true) { auto previous_status = status.load(); - if (previous_status == FINISHED || previous_status == CANCELLED) + if (previous_status == FINISHED || previous_status == CANCELLED || previous_status == FAILED) { - LOG_FMT_WARNING(log, "task already {}", (previous_status == FINISHED ? "finished" : "cancelled")); + LOG_FMT_WARNING(log, "task already in {} state", taskStatusToString(previous_status)); return; } - else if (previous_status == INITIALIZING && switchStatus(INITIALIZING, CANCELLED)) + else if (previous_status == INITIALIZING && switchStatus(INITIALIZING, next_task_status)) { - closeAllTunnels(reason); + err_string = message; + /// if the task is in initializing state, mpp task can return error to TiDB directly, + /// so just close all tunnels here + closeAllTunnels(message); unregisterTask(); - LOG_WARNING(log, "Finish cancel task from uninitialized"); + LOG_WARNING(log, "Finish abort task from uninitialized"); return; } - else if (previous_status == RUNNING && switchStatus(RUNNING, CANCELLED)) + else if (previous_status == RUNNING && switchStatus(RUNNING, next_task_status)) { + /// abort the components from top to bottom because if bottom components are aborted + /// first, the top components may see an error caused by the abort, which is not + /// the original error + err_string = message; + abortTunnels(message, abort_type); + abortDataStreams(abort_type); + abortReceivers(); scheduleThisTask(ScheduleState::FAILED); - context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, true); - closeAllTunnels(reason); /// runImpl is running, leave remaining work to runImpl - LOG_WARNING(log, "Finish cancel task from running"); + LOG_WARNING(log, "Finish abort task from running"); return; } } } +void MPPTask::cancel(const String & reason) +{ + CPUAffinityManager::getInstance().bindSelfQueryThread(); + abort(reason, AbortType::ONCANCELLATION); +} + bool MPPTask::switchStatus(TaskStatus from, TaskStatus to) { return status.compare_exchange_strong(from, to); diff --git a/dbms/src/Flash/Mpp/MPPTask.h b/dbms/src/Flash/Mpp/MPPTask.h index ee434a2f2ff..a30150b26e8 100644 --- a/dbms/src/Flash/Mpp/MPPTask.h +++ b/dbms/src/Flash/Mpp/MPPTask.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -58,12 +59,12 @@ class MPPTask : public std::enable_shared_from_this void cancel(const String & reason); + void handleError(const String & error_msg); + void prepare(const mpp::DispatchTaskRequest & task_request); void run(); - void registerTunnel(const MPPTaskId & id, MPPTunnelPtr tunnel); - int getNeededThreads(); enum class ScheduleState @@ -91,12 +92,22 @@ class MPPTask : public std::enable_shared_from_this void unregisterTask(); - void writeErrToAllTunnels(const String & e); - /// Similar to `writeErrToAllTunnels`, but it just try to write the error message to tunnel /// without waiting the tunnel to be connected void closeAllTunnels(const String & reason); + enum class AbortType + { + /// todo add ONKILL to distinguish between silent cancellation and kill + ONCANCELLATION, + ONERROR, + }; + void abort(const String & message, AbortType abort_type); + + void abortTunnels(const String & message, AbortType abort_type); + void abortReceivers(); + void abortDataStreams(AbortType abort_type); + void finishWrite(); bool switchStatus(TaskStatus from, TaskStatus to); @@ -107,6 +118,10 @@ class MPPTask : public std::enable_shared_from_this int estimateCountOfNewThreads(); + void registerTunnels(const mpp::DispatchTaskRequest & task_request); + + void initExchangeReceivers(); + tipb::DAGRequest dag_req; ContextPtr context; @@ -116,6 +131,7 @@ class MPPTask : public std::enable_shared_from_this MemoryTracker * memory_tracker = nullptr; std::atomic status{INITIALIZING}; + String err_string; mpp::TaskMeta meta; @@ -123,14 +139,16 @@ class MPPTask : public std::enable_shared_from_this MPPTunnelSetPtr tunnel_set; + MPPReceiverSetPtr receiver_set; + + int new_thread_count_of_exchange_receiver = 0; + MPPTaskManager * manager = nullptr; const LoggerPtr log; MPPTaskStatistics mpp_task_statistics; - Exception err; - friend class MPPTaskManager; int needed_threads; diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.cpp b/dbms/src/Flash/Mpp/MPPTaskManager.cpp index 531f8f7a10d..c5499eda89d 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.cpp +++ b/dbms/src/Flash/Mpp/MPPTaskManager.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -22,6 +23,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_task_manager_find_task_failure_failpoint[]; +} // namespace FailPoints + MPPTaskManager::MPPTaskManager(MPPTaskSchedulerPtr scheduler_) : scheduler(std::move(scheduler_)) , log(&Poco::Logger::get("TaskManager")) @@ -50,6 +56,7 @@ MPPTaskPtr MPPTaskManager::findTaskWithTimeout(const mpp::TaskMeta & meta, std:: it = query_it->second->task_map.find(id); return it != query_it->second->task_map.end(); }); + fiu_do_on(FailPoints::random_task_manager_find_task_failure_failpoint, ret = false;); if (cancelled) { errMsg = fmt::format("Task [{},{}] has been cancelled.", meta.start_ts(), meta.task_id()); @@ -140,6 +147,17 @@ bool MPPTaskManager::registerTask(MPPTaskPtr task) return true; } +bool MPPTaskManager::isTaskToBeCancelled(const MPPTaskId & task_id) +{ + std::unique_lock lock(mu); + auto it = mpp_query_map.find(task_id.start_ts); + if (it != mpp_query_map.end() && it->second->to_be_cancelled) + { + return it->second->task_map.find(task_id) != it->second->task_map.end(); + } + return false; +} + void MPPTaskManager::unregisterTask(MPPTask * task) { std::unique_lock lock(mu); diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.h b/dbms/src/Flash/Mpp/MPPTaskManager.h index d7047804aca..770acea3853 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.h +++ b/dbms/src/Flash/Mpp/MPPTaskManager.h @@ -73,6 +73,8 @@ class MPPTaskManager : private boost::noncopyable void unregisterTask(MPPTask * task); + bool isTaskToBeCancelled(const MPPTaskId & task_id); + bool tryToScheduleTask(const MPPTaskPtr & task); void releaseThreadsFromScheduler(const int needed_threads); diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index 826e7fea88a..13a7eaad95e 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -25,6 +25,7 @@ namespace DB namespace FailPoints { extern const char exception_during_mpp_close_tunnel[]; +extern const char random_tunnel_wait_timeout_failpoint[]; } // namespace FailPoints template @@ -322,6 +323,7 @@ void MPPTunnelBase::waitUntilConnectedOrFinished(std::unique_lock::writeError(const String & msg) } template -void MPPTunnelSetBase::registerTunnel(const MPPTaskId & id, const TunnelPtr & tunnel) +void MPPTunnelSetBase::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel) { - if (id_to_index_map.find(id) != id_to_index_map.end()) - throw Exception("the tunnel " + tunnel->id() + " has been registered"); + if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end()) + throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id())); - id_to_index_map[id] = tunnels.size(); + receiver_task_id_to_index_map[receiver_task_id] = tunnels.size(); tunnels.push_back(tunnel); if (!tunnel->isLocal()) { @@ -163,10 +163,10 @@ void MPPTunnelSetBase::finishWrite() } template -typename MPPTunnelSetBase::TunnelPtr MPPTunnelSetBase::getTunnelById(const MPPTaskId & id) +typename MPPTunnelSetBase::TunnelPtr MPPTunnelSetBase::getTunnelByReceiverTaskId(const MPPTaskId & id) { - auto it = id_to_index_map.find(id); - if (it == id_to_index_map.end()) + auto it = receiver_task_id_to_index_map.find(id); + if (it == receiver_task_id_to_index_map.end()) { return nullptr; } diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.h b/dbms/src/Flash/Mpp/MPPTunnelSet.h index 021c609f516..e4123db1be5 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSet.h +++ b/dbms/src/Flash/Mpp/MPPTunnelSet.h @@ -59,7 +59,7 @@ class MPPTunnelSetBase : private boost::noncopyable void finishWrite(); void registerTunnel(const MPPTaskId & id, const TunnelPtr & tunnel); - TunnelPtr getTunnelById(const MPPTaskId & id); + TunnelPtr getTunnelByReceiverTaskId(const MPPTaskId & id); uint16_t getPartitionNum() const { return tunnels.size(); } @@ -72,7 +72,7 @@ class MPPTunnelSetBase : private boost::noncopyable private: std::vector tunnels; - std::unordered_map id_to_index_map; + std::unordered_map receiver_task_id_to_index_map; const LoggerPtr log; int remote_tunnel_cnt = 0; diff --git a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp index af525bd1a55..967bfcecfa3 100644 --- a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp +++ b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp @@ -12,12 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include namespace DB { +namespace FailPoints +{ +extern const char random_min_tso_scheduler_failpoint[]; +} // namespace FailPoints + constexpr UInt64 MAX_UINT64 = std::numeric_limits::max(); constexpr UInt64 OS_THREAD_SOFT_LIMIT = 100000; @@ -193,7 +199,9 @@ bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & q } else { - if (tso <= min_tso) /// the min_tso query should fully run, otherwise throw errors here. + bool is_tso_min = tso <= min_tso; + fiu_do_on(FailPoints::random_min_tso_scheduler_failpoint, is_tso_min = true;); + if (is_tso_min) /// the min_tso query should fully run, otherwise throw errors here. { has_error = true; auto msg = fmt::format("threads are unavailable for the query {} ({} min_tso {}) {}, need {}, but used {} of the thread hard limit {}, {} active and {} waiting queries.", tso, tso == min_tso ? "is" : "is newer than", min_tso, isWaiting ? "from the waiting set" : "when directly schedule it", needed_threads, estimated_thread_usage, thread_hard_limit, active_set.size(), waiting_set.size()); diff --git a/dbms/src/Flash/Mpp/TaskStatus.cpp b/dbms/src/Flash/Mpp/TaskStatus.cpp index 423b768faea..c87ae2b8eb4 100644 --- a/dbms/src/Flash/Mpp/TaskStatus.cpp +++ b/dbms/src/Flash/Mpp/TaskStatus.cpp @@ -29,6 +29,8 @@ StringRef taskStatusToString(const TaskStatus & status) return "FINISHED"; case CANCELLED: return "CANCELLED"; + case FAILED: + return "FAILED"; default: throw Exception("Unknown TaskStatus"); } diff --git a/dbms/src/Flash/Mpp/TaskStatus.h b/dbms/src/Flash/Mpp/TaskStatus.h index 999e30790bf..0997c8adc52 100644 --- a/dbms/src/Flash/Mpp/TaskStatus.h +++ b/dbms/src/Flash/Mpp/TaskStatus.h @@ -24,6 +24,7 @@ enum TaskStatus RUNNING, FINISHED, CANCELLED, + FAILED, }; StringRef taskStatusToString(const TaskStatus & status); diff --git a/dbms/src/Flash/tests/CMakeLists.txt b/dbms/src/Flash/tests/CMakeLists.txt index a34e4b23432..944908dcb25 100644 --- a/dbms/src/Flash/tests/CMakeLists.txt +++ b/dbms/src/Flash/tests/CMakeLists.txt @@ -13,14 +13,3 @@ # limitations under the License. include_directories (${CMAKE_CURRENT_BINARY_DIR}) - -add_executable (exchange_perftest - exchange_perftest.cpp - ${TiFlash_SOURCE_DIR}/dbms/src/Server/StorageConfigParser.cpp - ${TiFlash_SOURCE_DIR}/dbms/src/Functions/FunctionsConversion.cpp) -target_link_libraries (exchange_perftest - gtest_main - dbms - clickhouse_functions - clickhouse_aggregate_functions - tiflash-dttool-lib) diff --git a/dbms/src/Flash/tests/WindowTestUtil.h b/dbms/src/Flash/tests/WindowTestUtil.h new file mode 100644 index 00000000000..3f4cb7d595f --- /dev/null +++ b/dbms/src/Flash/tests/WindowTestUtil.h @@ -0,0 +1,81 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB +{ +namespace tests +{ + +inline std::shared_ptr mockInterpreter(Context & context, const std::vector & source_columns, int concurrency) +{ + std::vector mock_input_streams_vec = {}; + DAGQueryBlock mock_query_block(0, static_cast>(nullptr)); + std::vector mock_subqueries_for_sets = {}; + std::shared_ptr mock_interpreter = std::make_shared(context, + mock_input_streams_vec, + mock_query_block, + concurrency); + mock_interpreter->analyzer = std::make_unique(std::move(source_columns), context); + return mock_interpreter; +} + +inline void mockExecuteProject(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, NamesWithAliases & final_project) +{ + mock_interpreter->executeProject(pipeline, final_project); +} + +inline void mockExecuteWindowOrder(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Sort & sort) +{ + mock_interpreter->handleWindowOrder(pipeline, sort); + mock_interpreter->input_streams_vec[0] = pipeline.streams; + NamesWithAliases final_project; + for (const auto & column : (*mock_interpreter->analyzer).source_columns) + { + final_project.push_back({column.name, ""}); + } + mockExecuteProject(mock_interpreter, pipeline, final_project); +} + +inline void mockExecuteWindowOrder(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const String & sort_json) +{ + tipb::Sort sort; + ::google::protobuf::util::JsonStringToMessage(sort_json, &sort); + mockExecuteWindowOrder(mock_interpreter, pipeline, sort); +} + +inline void mockExecuteWindow(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Window & window) +{ + mock_interpreter->handleWindow(pipeline, window); + mock_interpreter->input_streams_vec[0] = pipeline.streams; + NamesWithAliases final_project; + for (const auto & column : (*mock_interpreter->analyzer).source_columns) + { + final_project.push_back({column.name, ""}); + } + mockExecuteProject(mock_interpreter, pipeline, final_project); +} + +inline void mockExecuteWindow(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, std::string window_json_str) +{ + tipb::Window window; + google::protobuf::util::JsonStringToMessage(window_json_str, &window); + mockExecuteWindow(mock_interpreter, pipeline, window); +} + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/bench_exchange.cpp b/dbms/src/Flash/tests/bench_exchange.cpp new file mode 100644 index 00000000000..fbb53bfd4a4 --- /dev/null +++ b/dbms/src/Flash/tests/bench_exchange.cpp @@ -0,0 +1,407 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include // to include the implementation of StreamingDAGResponseWriter +#include // to include the implementation of ExchangeReceiver +#include // to include the implementation of MPPTunnel +#include // to include the implementation of MPPTunnelSet +#include +#include + + +namespace DB +{ +namespace tests +{ + +std::random_device rd; + +MockBlockInputStream::MockBlockInputStream(const std::vector & blocks_, StopFlag & stop_flag_) + : blocks(blocks_) + , header(blocks[0].cloneEmpty()) + , mt(rd()) + , dist(0, blocks.size() - 1) + , stop_flag(stop_flag_) +{} + +MockFixedRowsBlockInputStream::MockFixedRowsBlockInputStream(size_t total_rows_, const std::vector & blocks_) + : header(blocks_[0].cloneEmpty()) + , mt(rd()) + , dist(0, blocks_.size() - 1) + , current_rows(0) + , total_rows(total_rows_) + , blocks(blocks_) +{} + +Block makeBlock(int row_num) +{ + std::mt19937 mt(rd()); + std::uniform_int_distribution int64_dist; + std::uniform_int_distribution len_dist(10, 20); + std::uniform_int_distribution char_dist; + + InferredDataVector> int64_vec; + InferredDataVector> int64_vec2; + for (int i = 0; i < row_num; ++i) + { + int64_vec.emplace_back(int64_dist(mt)); + int64_vec2.emplace_back(int64_dist(mt)); + } + + InferredDataVector> string_vec; + for (int i = 0; i < row_num; ++i) + { + int len = len_dist(mt); + String s; + for (int j = 0; j < len; ++j) + s.push_back(char_dist(mt)); + string_vec.push_back(std::move(s)); + } + + auto int64_data_type = makeDataType>(); + ColumnWithTypeAndName int64_column(makeColumn>(int64_data_type, int64_vec), int64_data_type, "int64_1"); + ColumnWithTypeAndName int64_column2(makeColumn>(int64_data_type, int64_vec2), int64_data_type, "int64_2"); + + auto string_data_type = makeDataType>(); + ColumnWithTypeAndName string_column(makeColumn>(string_data_type, string_vec), string_data_type, "string"); + + return Block({int64_column, string_column, int64_column2}); +} + +std::vector makeBlocks(int block_num, int row_num) +{ + std::vector blocks; + for (int i = 0; i < block_num; ++i) + blocks.push_back(makeBlock(row_num)); + return blocks; +} + +mpp::MPPDataPacket makePacket(ChunkCodecStream & codec, int row_num) +{ + auto block = makeBlock(row_num); + codec.encode(block, 0, row_num); + + mpp::MPPDataPacket packet; + packet.add_chunks(codec.getString()); + codec.clear(); + + return packet; +} + +std::vector makePackets(ChunkCodecStream & codec, int packet_num, int row_num) +{ + std::vector packets; + for (int i = 0; i < packet_num; ++i) + packets.push_back(std::make_shared(makePacket(codec, row_num))); + return packets; +} + +std::vector makePacketQueues(int source_num, int queue_size) +{ + std::vector queues(source_num); + for (int i = 0; i < source_num; ++i) + queues[i] = std::make_shared(queue_size); + return queues; +} + +std::vector makeFields() +{ + std::vector fields(3); + fields[0].set_tp(TiDB::TypeLongLong); + fields[1].set_tp(TiDB::TypeString); + fields[2].set_tp(TiDB::TypeLongLong); + return fields; +} + +void printException(const Exception & e) +{ + std::string text = e.displayText(); + + auto embedded_stack_trace_pos = text.find("Stack trace"); + std::cerr << "Code: " << e.code() << ". " << text << std::endl + << std::endl; + if (std::string::npos == embedded_stack_trace_pos) + std::cerr << "Stack trace:" << std::endl + << e.getStackTrace().toString() << std::endl; +} + +void sendPacket(const std::vector & packets, const PacketQueuePtr & queue, StopFlag & stop_flag) +{ + std::mt19937 mt(rd()); + std::uniform_int_distribution dist(0, packets.size() - 1); + + while (!stop_flag.load()) + { + int i = dist(mt); + queue->tryPush(packets[i], std::chrono::milliseconds(10)); + } + queue->finish(); +} + +void receivePacket(const PacketQueuePtr & queue) +{ + while (true) + { + PacketPtr packet; + if (!queue->pop(packet)) + break; + } +} + +ReceiverHelper::ReceiverHelper(int concurrency_, int source_num_) + : concurrency(concurrency_) + , source_num(source_num_) +{ + pb_exchange_receiver.set_tp(tipb::Hash); + for (int i = 0; i < source_num; ++i) + { + mpp::TaskMeta task; + task.set_start_ts(0); + task.set_task_id(i); + task.set_partition_id(i); + task.set_address(""); + + String encoded_task; + task.SerializeToString(&encoded_task); + + pb_exchange_receiver.add_encoded_task_meta(encoded_task); + } + + fields = makeFields(); + *pb_exchange_receiver.add_field_types() = fields[0]; + *pb_exchange_receiver.add_field_types() = fields[1]; + *pb_exchange_receiver.add_field_types() = fields[2]; + + task_meta.set_task_id(100); + + queues = makePacketQueues(source_num, 10); +} + +MockExchangeReceiverPtr ReceiverHelper::buildReceiver() +{ + return std::make_shared( + std::make_shared(queues, fields), + source_num, + concurrency, + "mock_req_id", + "mock_exchange_receiver_id"); +} + +std::vector ReceiverHelper::buildExchangeReceiverStream() +{ + auto receiver = buildReceiver(); + std::vector streams(concurrency); + for (int i = 0; i < concurrency; ++i) + { + streams[i] = std::make_shared(receiver, "mock_req_id", "mock_executor_id" + std::to_string(i)); + } + return streams; +} + +BlockInputStreamPtr ReceiverHelper::buildUnionStream() +{ + auto streams = buildExchangeReceiverStream(); + return std::make_shared>(streams, nullptr, concurrency, /*req_id=*/""); +} + +void ReceiverHelper::finish() +{ + if (join_ptr) + { + join_ptr->setBuildTableState(Join::BuildTableState::SUCCEED); + std::cout << fmt::format("Hash table size: {} bytes", join_ptr->getTotalByteCount()) << std::endl; + } +} + +SenderHelper::SenderHelper( + int source_num_, + int concurrency_, + const std::vector & queues_, + const std::vector & fields) + : source_num(source_num_) + , concurrency(concurrency_) + , queues(queues_) +{ + mpp::TaskMeta task_meta; + tunnel_set = std::make_shared("mock_req_id"); + for (int i = 0; i < source_num; ++i) + { + auto writer = std::make_shared(queues[i]); + mock_writers.push_back(writer); + + auto tunnel = std::make_shared( + task_meta, + task_meta, + std::chrono::seconds(60), + concurrency, + false, + false, + "mock_req_id"); + tunnel->connect(writer.get()); + tunnels.push_back(tunnel); + MPPTaskId id(0, i); + tunnel_set->registerTunnel(id, tunnel); + } + + tipb::DAGRequest dag_request; + tipb::Executor root_executor; + root_executor.set_executor_id("ExchangeSender_100"); + *dag_request.mutable_root_executor() = root_executor; + + dag_context = std::make_unique(dag_request); + dag_context->is_mpp_task = true; + dag_context->is_root_mpp_task = false; + dag_context->encode_type = tipb::EncodeType::TypeCHBlock; + dag_context->result_field_types = fields; +} + +BlockInputStreamPtr SenderHelper::buildUnionStream( + StopFlag & stop_flag, + const std::vector & blocks) +{ + std::vector send_streams; + for (int i = 0; i < concurrency; ++i) + { + BlockInputStreamPtr stream = std::make_shared(blocks, stop_flag); + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + + return std::make_shared>(send_streams, nullptr, concurrency, /*req_id=*/""); +} + +BlockInputStreamPtr SenderHelper::buildUnionStream(size_t total_rows, const std::vector & blocks) +{ + std::vector send_streams; + for (int i = 0; i < concurrency; ++i) + { + BlockInputStreamPtr stream = std::make_shared(total_rows / concurrency, blocks); + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + + return std::make_shared>(send_streams, nullptr, concurrency, /*req_id=*/""); +} + +void SenderHelper::finish() +{ + for (size_t i = 0; i < tunnels.size(); ++i) + { + tunnels[i]->writeDone(); + tunnels[i]->waitForFinish(); + mock_writers[i]->finish(); + } +} + +void ExchangeBench::SetUp(const benchmark::State &) +{ + Poco::Logger::root().setLevel("error"); + + DynamicThreadPool::global_instance = std::make_unique( + /*fixed_thread_num=*/300, + std::chrono::milliseconds(100000)); + + input_blocks = makeBlocks(/*block_num=*/100, /*row_num=*/1024); + + try + { + DB::registerWindowFunctions(); + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } +} + +void ExchangeBench::TearDown(const benchmark::State &) +{ + input_blocks.clear(); + // NOTE: Must reset here, otherwise DynamicThreadPool::fixedWork() may core because metrics already destroyed. + DynamicThreadPool::global_instance.reset(); +} + +void ExchangeBench::runAndWait(std::shared_ptr receiver_helper, + BlockInputStreamPtr receiver_stream, + std::shared_ptr & sender_helper, + BlockInputStreamPtr sender_stream) +{ + std::future sender_future = DynamicThreadPool::global_instance->schedule(/*memory_tracker=*/false, + [sender_stream, sender_helper] { + sender_stream->readPrefix(); + while (const auto & block = sender_stream->read()) {} + sender_stream->readSuffix(); + sender_helper->finish(); + }); + std::future receiver_future = DynamicThreadPool::global_instance->schedule(/*memory_tracker=*/false, + [receiver_stream, receiver_helper] { + receiver_stream->readPrefix(); + while (const auto & block = receiver_stream->read()) {} + receiver_stream->readSuffix(); + receiver_helper->finish(); + }); + sender_future.get(); + receiver_future.get(); +} + +BENCHMARK_DEFINE_F(ExchangeBench, basic_send_receive) +(benchmark::State & state) +try +{ + const int concurrency = state.range(0); + const int source_num = state.range(1); + const int total_rows = state.range(2); + Context context = TiFlashTestEnv::getContext(); + + for (auto _ : state) + { + std::shared_ptr receiver_helper = std::make_shared(concurrency, source_num); + BlockInputStreamPtr receiver_stream = receiver_helper->buildUnionStream(); + + std::shared_ptr sender_helper = std::make_shared(source_num, + concurrency, + receiver_helper->queues, + receiver_helper->fields); + BlockInputStreamPtr sender_stream = sender_helper->buildUnionStream(total_rows, input_blocks); + + runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); + } +} +CATCH +BENCHMARK_REGISTER_F(ExchangeBench, basic_send_receive) + ->Args({8, 1, 1024 * 1000}); + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/bench_exchange.h b/dbms/src/Flash/tests/bench_exchange.h new file mode 100644 index 00000000000..6b09e319613 --- /dev/null +++ b/dbms/src/Flash/tests/bench_exchange.h @@ -0,0 +1,291 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace tests +{ + + +using Packet = mpp::MPPDataPacket; +using PacketPtr = std::shared_ptr; +using PacketQueue = MPMCQueue; +using PacketQueuePtr = std::shared_ptr; +using StopFlag = std::atomic; + +// NOLINTBEGIN(readability-convert-member-functions-to-static) +struct MockReceiverContext +{ + using Status = ::grpc::Status; + struct Request + { + String debugString() const + { + return "{Request}"; + } + + int source_index = 0; + int send_task_id = 0; + int recv_task_id = -1; + }; + + struct Reader + { + explicit Reader(const PacketQueuePtr & queue_) + : queue(queue_) + {} + + void initialize() const {} + + bool read(PacketPtr & packet [[maybe_unused]]) const + { + PacketPtr res; + if (queue->pop(res)) + { + *packet = *res; // avoid change shared packets + return true; + } + return false; + } + + Status finish() const + { + return ::grpc::Status(); + } + + PacketQueuePtr queue; + }; + + struct MockAsyncGrpcExchangePacketReader + { + // Not implement benchmark for Async GRPC for now. + void init(UnaryCallback *) { assert(0); } + void read(MPPDataPacketPtr &, UnaryCallback *) { assert(0); } + void finish(::grpc::Status &, UnaryCallback *) { assert(0); } + }; + + using AsyncReader = MockAsyncGrpcExchangePacketReader; + + MockReceiverContext( + const std::vector & queues_, + const std::vector & field_types_) + : queues(queues_) + , field_types(field_types_) + {} + + void fillSchema(DAGSchema & schema) const + { + schema.clear(); + for (size_t i = 0; i < field_types.size(); ++i) + { + String name = "exchange_receiver_" + std::to_string(i); + ColumnInfo info = TiDB::fieldTypeToColumnInfo(field_types[i]); + schema.emplace_back(std::move(name), std::move(info)); + } + } + + Request makeRequest(int index) const + { + return {index, index, -1}; + } + + std::shared_ptr makeReader(const Request & request) + { + return std::make_shared(queues[request.send_task_id]); + } + + static Status getStatusOK() + { + return ::grpc::Status(); + } + + bool supportAsync(const Request &) const { return false; } + void makeAsyncReader( + const Request &, + std::shared_ptr &, + UnaryCallback *) const {} + + std::vector queues; + std::vector field_types; +}; +// NOLINTEND(readability-convert-member-functions-to-static) + +using MockExchangeReceiver = ExchangeReceiverBase; +using MockExchangeReceiverPtr = std::shared_ptr; +using MockExchangeReceiverInputStream = TiRemoteBlockInputStream; + +struct MockWriter : public PacketWriter +{ + explicit MockWriter(PacketQueuePtr queue_) + : queue(std::move(queue_)) + {} + + bool write(const Packet & packet) override + { + queue->push(std::make_shared(packet)); + return true; + } + + void finish() + { + queue->finish(); + } + + PacketQueuePtr queue; +}; + +using MockWriterPtr = std::shared_ptr; +using MockTunnel = MPPTunnelBase; +using MockTunnelPtr = std::shared_ptr; +using MockTunnelSet = MPPTunnelSetBase; +using MockTunnelSetPtr = std::shared_ptr; + +struct MockBlockInputStream : public IProfilingBlockInputStream +{ + const std::vector & blocks; + Block header; + std::mt19937 mt; + std::uniform_int_distribution dist; + StopFlag & stop_flag; + + MockBlockInputStream(const std::vector & blocks_, StopFlag & stop_flag_); + + String getName() const override { return "MockBlockInputStream"; } + Block getHeader() const override { return header; } + + Block readImpl() override + { + if (stop_flag.load(std::memory_order_relaxed)) + return Block{}; + return blocks[dist(mt)]; + } +}; + +// Similar to MockBlockInputStream, but return fixed count of rows. +struct MockFixedRowsBlockInputStream : public IProfilingBlockInputStream +{ + Block header; + std::mt19937 mt; + std::uniform_int_distribution dist; + size_t current_rows; + size_t total_rows; + const std::vector & blocks; + + MockFixedRowsBlockInputStream(size_t total_rows_, const std::vector & blocks_); + + String getName() const override { return "MockBlockInputStream"; } + Block getHeader() const override { return header; } + + Block readImpl() override + { + if (current_rows >= total_rows) + return Block{}; + Block res = blocks[dist(mt)]; + current_rows += res.rows(); + return res; + } +}; + +Block makeBlock(int row_num); +std::vector makeBlocks(int block_num, int row_num); +mpp::MPPDataPacket makePacket(ChunkCodecStream & codec, int row_num); +std::vector makePackets(ChunkCodecStream & codec, int packet_num, int row_num); +std::vector makePacketQueues(int source_num, int queue_size); +std::vector makeFields(); +void printException(const Exception & e); +void sendPacket(const std::vector & packets, const PacketQueuePtr & queue, StopFlag & stop_flag); +void receivePacket(const PacketQueuePtr & queue); + +struct ReceiverHelper +{ + const int concurrency; + const int source_num; + tipb::ExchangeReceiver pb_exchange_receiver; + std::vector fields; + mpp::TaskMeta task_meta; + std::vector queues; + std::shared_ptr join_ptr; + + explicit ReceiverHelper(int concurrency_, int source_num_); + MockExchangeReceiverPtr buildReceiver(); + std::vector buildExchangeReceiverStream(); + BlockInputStreamPtr buildUnionStream(); + BlockInputStreamPtr buildUnionStreamWithHashJoinBuildStream(); + void finish(); +}; + +struct SenderHelper +{ + const int source_num; + const int concurrency; + + std::vector queues; + std::vector mock_writers; + std::vector tunnels; + MockTunnelSetPtr tunnel_set; + std::unique_ptr dag_context; + + SenderHelper( + int source_num_, + int concurrency_, + const std::vector & queues_, + const std::vector & fields); + + // Using MockBlockInputStream to build streams. + BlockInputStreamPtr buildUnionStream(StopFlag & stop_flag, const std::vector & blocks); + // Using MockFixedRowsBlockInputStream to build streams. + BlockInputStreamPtr buildUnionStream(size_t total_rows, const std::vector & blocks); + + void finish(); +}; + +class ExchangeBench : public benchmark::Fixture +{ +public: + void SetUp(const benchmark::State &) override; + void TearDown(const benchmark::State &) override; + void runAndWait(std::shared_ptr receiver_helper, + BlockInputStreamPtr receiver_stream, + std::shared_ptr & sender_helper, + BlockInputStreamPtr sender_stream); + + std::vector input_blocks; +}; + + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/bench_window.cpp b/dbms/src/Flash/tests/bench_window.cpp new file mode 100644 index 00000000000..da9df20fdf3 --- /dev/null +++ b/dbms/src/Flash/tests/bench_window.cpp @@ -0,0 +1,107 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ +class WindowFunctionBench : public ExchangeBench +{ +public: + void SetUp(const benchmark::State & state) override + { + // build tipb::Window and tipb::Sort. + // select row_number() over w1 from t1 window w1 as (partition by c1, c2, c3 order by c1, c2, c3); + ExchangeBench::SetUp(state); + MockColumnInfos columns{ + {"c1", TiDB::TP::TypeLongLong}, + {"c2", TiDB::TP::TypeString}, + {"c3", TiDB::TP::TypeLongLong}, + }; + size_t executor_index = 0; + DAGRequestBuilder builder(executor_index); + builder + .mockTable("test", "t1", columns) + .sort({{"c1", false}, {"c2", false}, {"c3", false}}, true) + .window(RowNumber(), + {{"c1", false}, {"c2", false}, {"c3", false}}, + {{"c1", false}, {"c2", false}, {"c3", false}}, + buildDefaultRowsFrame()); + tipb::DAGRequest req; + MPPInfo mpp_info(0, -1, -1, {}, std::unordered_map>{}); + builder.getRoot()->toTiPBExecutor(req.mutable_root_executor(), /*collator_id=*/0, mpp_info, TiFlashTestEnv::getContext()); + assert(req.root_executor().tp() == tipb::TypeWindow); + window = req.root_executor().window(); + assert(window.child().tp() == tipb::TypeSort); + sort = window.child().sort(); + } + + void prepareWindowStream(Context & context, int concurrency, int source_num, int total_rows, const std::vector & blocks, BlockInputStreamPtr & sender_stream, BlockInputStreamPtr & receiver_stream, std::shared_ptr & sender_helper, std::shared_ptr & receiver_helper) const + { + DAGPipeline pipeline; + receiver_helper = std::make_shared(concurrency, source_num); + pipeline.streams = receiver_helper->buildExchangeReceiverStream(); + + sender_helper = std::make_shared(source_num, concurrency, receiver_helper->queues, receiver_helper->fields); + sender_stream = sender_helper->buildUnionStream(total_rows, blocks); + + context.setDAGContext(sender_helper->dag_context.get()); + std::vector source_columns{ + NameAndTypePair("c1", makeNullable(std::make_shared())), + NameAndTypePair("c2", makeNullable(std::make_shared())), + NameAndTypePair("c3", makeNullable(std::make_shared()))}; + auto mock_interpreter = mockInterpreter(context, source_columns, concurrency); + mock_interpreter->input_streams_vec.push_back(pipeline.streams); + mockExecuteWindowOrder(mock_interpreter, pipeline, sort); + mockExecuteWindow(mock_interpreter, pipeline, window); + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, 8192, 0, "mock_executor_id_squashing"); + }); + receiver_stream = std::make_shared>(pipeline.streams, nullptr, concurrency, /*req_id=*/""); + } + + tipb::Window window; + tipb::Sort sort; +}; + +BENCHMARK_DEFINE_F(WindowFunctionBench, basic_row_number) +(benchmark::State & state) +try +{ + const int concurrency = state.range(0); + const int source_num = state.range(1); + const int total_rows = state.range(2); + Context context = TiFlashTestEnv::getContext(); + + for (auto _ : state) + { + std::shared_ptr sender_helper; + std::shared_ptr receiver_helper; + BlockInputStreamPtr sender_stream; + BlockInputStreamPtr receiver_stream; + + prepareWindowStream(context, concurrency, source_num, total_rows, input_blocks, sender_stream, receiver_stream, sender_helper, receiver_helper); + + runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); + } +} +CATCH +BENCHMARK_REGISTER_F(WindowFunctionBench, basic_row_number) + ->Args({8, 1, 1024 * 1000}); + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/exchange_perftest.cpp b/dbms/src/Flash/tests/exchange_perftest.cpp deleted file mode 100644 index c2e047bec62..00000000000 --- a/dbms/src/Flash/tests/exchange_perftest.cpp +++ /dev/null @@ -1,699 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include // to include the implementation of StreamingDAGResponseWriter -#include // to include the implementation of ExchangeReceiver -#include // to include the implementation of MPPTunnel -#include // to include the implementation of MPPTunnelSet -#include -#include -#include - -namespace DB::tests -{ -namespace -{ -std::random_device rd; - -using Packet = mpp::MPPDataPacket; -using PacketPtr = std::shared_ptr; -using PacketQueue = MPMCQueue; -using PacketQueuePtr = std::shared_ptr; -using StopFlag = std::atomic; - -std::atomic received_data_size{0}; - -struct MockReceiverContext -{ - struct Status - { - int status_code = 0; - String error_msg; - - bool ok() const - { - return status_code == 0; - } - - const String & error_message() const - { - return error_msg; - } - - int error_code() const - { - return status_code; - } - }; - - struct Request - { - String debugString() const - { - return "{Request}"; - } - - int source_index = 0; - int send_task_id = 0; - int recv_task_id = -1; - }; - - struct Reader - { - explicit Reader(const PacketQueuePtr & queue_) - : queue(queue_) - {} - - void initialize() const - { - } - - bool read(PacketPtr & packet [[maybe_unused]]) const - { - PacketPtr res; - if (queue->pop(res)) - { - received_data_size.fetch_add(res->ByteSizeLong()); - *packet = *res; // avoid change shared packets - return true; - } - return false; - } - - Status finish() const - { - return {0, ""}; - } - - PacketQueuePtr queue; - }; - - MockReceiverContext( - const std::vector & queues_, - const std::vector & field_types_) - : queues(queues_) - , field_types(field_types_) - { - } - - void fillSchema(DAGSchema & schema) const - { - schema.clear(); - for (size_t i = 0; i < field_types.size(); ++i) - { - String name = "exchange_receiver_" + std::to_string(i); - ColumnInfo info = TiDB::fieldTypeToColumnInfo(field_types[i]); - schema.emplace_back(std::move(name), std::move(info)); - } - } - - Request makeRequest(int index) const - { - return {index, index, -1}; - } - - std::shared_ptr makeReader(const Request & request) - { - return std::make_shared(queues[request.send_task_id]); - } - - static Status getStatusOK() - { - return {0, ""}; - } - - std::vector queues; - std::vector field_types; -}; - -using MockExchangeReceiver = ExchangeReceiverBase; -using MockExchangeReceiverPtr = std::shared_ptr; -using MockExchangeReceiverInputStream = TiRemoteBlockInputStream; - -struct MockWriter -{ - explicit MockWriter(PacketQueuePtr queue_) - : queue(std::move(queue_)) - {} - - bool Write(const Packet & packet) - { - queue->push(std::make_shared(packet)); - return true; - } - - void finish() - { - queue->finish(); - } - - PacketQueuePtr queue; -}; - -using MockWriterPtr = std::shared_ptr; -using MockTunnel = MPPTunnelBase; -using MockTunnelPtr = std::shared_ptr; -using MockTunnelSet = MPPTunnelSetBase; -using MockTunnelSetPtr = std::shared_ptr; - -struct MockBlockInputStream : public IProfilingBlockInputStream -{ - const std::vector & blocks; - Block header; - std::mt19937 mt; - std::uniform_int_distribution dist; - StopFlag & stop_flag; - - MockBlockInputStream(const std::vector & blocks_, StopFlag & stop_flag_) - : blocks(blocks_) - , header(blocks[0].cloneEmpty()) - , mt(rd()) - , dist(0, blocks.size() - 1) - , stop_flag(stop_flag_) - {} - - String getName() const override { return "MockBlockInputStream"; } - Block getHeader() const override { return header; } - - Block readImpl() override - { - if (stop_flag.load(std::memory_order_relaxed)) - return Block{}; - return blocks[dist(mt)]; - } -}; - -Block makeBlock(int row_num) -{ - std::mt19937 mt(rd()); - std::uniform_int_distribution int64_dist; - std::uniform_int_distribution len_dist(10, 20); - std::uniform_int_distribution char_dist; - - InferredDataVector> int64_vec; - InferredDataVector> int64_vec2; - for (int i = 0; i < row_num; ++i) - { - int64_vec.emplace_back(int64_dist(mt)); - int64_vec2.emplace_back(int64_dist(mt)); - } - - InferredDataVector> string_vec; - for (int i = 0; i < row_num; ++i) - { - int len = len_dist(mt); - String s; - for (int j = 0; j < len; ++j) - s.push_back(char_dist(mt)); - string_vec.push_back(std::move(s)); - } - - auto int64_data_type = makeDataType>(); - ColumnWithTypeAndName int64_column(makeColumn>(int64_data_type, int64_vec), int64_data_type, "int64_1"); - ColumnWithTypeAndName int64_column2(makeColumn>(int64_data_type, int64_vec2), int64_data_type, "int64_2"); - - auto string_data_type = makeDataType>(); - ColumnWithTypeAndName string_column(makeColumn>(string_data_type, string_vec), string_data_type, "string"); - - return Block({int64_column, string_column, int64_column2}); -} - -std::vector makeBlocks(int block_num, int row_num) -{ - std::vector blocks; - for (int i = 0; i < block_num; ++i) - blocks.push_back(makeBlock(row_num)); - return blocks; -} - -mpp::MPPDataPacket makePacket(ChunkCodecStream & codec, int row_num) -{ - auto block = makeBlock(row_num); - codec.encode(block, 0, row_num); - - mpp::MPPDataPacket packet; - packet.add_chunks(codec.getString()); - codec.clear(); - - return packet; -} - -std::vector makePackets(ChunkCodecStream & codec, int packet_num, int row_num) -{ - std::vector packets; - for (int i = 0; i < packet_num; ++i) - packets.push_back(std::make_shared(makePacket(codec, row_num))); - return packets; -} - -std::vector makePacketQueues(int source_num, int queue_size) -{ - std::vector queues; - for (int i = 0; i < source_num; ++i) - queues.push_back(std::make_shared(queue_size)); - return queues; -} - -std::vector makeFields() -{ - std::vector fields(3); - fields[0].set_tp(TiDB::TypeLongLong); - fields[1].set_tp(TiDB::TypeString); - fields[2].set_tp(TiDB::TypeLongLong); - return fields; -} - -void printException(const Exception & e) -{ - std::string text = e.displayText(); - - auto embedded_stack_trace_pos = text.find("Stack trace"); - std::cerr << "Code: " << e.code() << ". " << text << std::endl - << std::endl; - if (std::string::npos == embedded_stack_trace_pos) - std::cerr << "Stack trace:" << std::endl - << e.getStackTrace().toString() << std::endl; -} - -void sendPacket(const std::vector & packets, const PacketQueuePtr & queue, StopFlag & stop_flag) -{ - std::mt19937 mt(rd()); - std::uniform_int_distribution dist(0, packets.size() - 1); - - while (!stop_flag.load()) - { - int i = dist(mt); - queue->tryPush(packets[i], std::chrono::milliseconds(10)); - } - queue->finish(); -} - -void receivePacket(const PacketQueuePtr & queue) -{ - while (true) - { - PacketPtr packet; - if (queue->pop(packet)) - received_data_size.fetch_add(packet->ByteSizeLong()); - else - break; - } -} - -template -void readBlock(BlockInputStreamPtr stream) -{ - [[maybe_unused]] auto get_rate = [](auto count, auto duration) { - return count * 1000 / duration.count(); - }; - - [[maybe_unused]] auto get_mib = [](auto v) { - return v / 1024 / 1024; - }; - - [[maybe_unused]] auto start = std::chrono::high_resolution_clock::now(); - [[maybe_unused]] auto second_ago = start; - [[maybe_unused]] Int64 block_count = 0; - [[maybe_unused]] Int64 last_block_count = 0; - [[maybe_unused]] Int64 last_data_size = received_data_size.load(); - try - { - stream->readPrefix(); - while (auto block = stream->read()) - { - if constexpr (print_progress) - { - ++block_count; - auto cur = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(cur - second_ago); - if (duration.count() >= 1000) - { - Int64 data_size = received_data_size.load(); - std::cout - << fmt::format( - "Blocks: {:<10} Data(MiB): {:<8} Block/s: {:<6} Data/s(MiB): {:<6}", - block_count, - get_mib(data_size), - get_rate(block_count - last_block_count, duration), - get_mib(get_rate(data_size - last_data_size, duration))) - << std::endl; - second_ago = cur; - last_block_count = block_count; - last_data_size = data_size; - } - } - } - stream->readSuffix(); - - if constexpr (print_progress) - { - auto cur = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(cur - start); - Int64 data_size = received_data_size.load(); - std::cout - << fmt::format( - "End. Blocks: {:<10} Data(MiB): {:<8} Block/s: {:<6} Data/s(MiB): {:<6}", - block_count, - get_mib(data_size), - get_rate(block_count, duration), - get_mib(get_rate(data_size, duration))) - << std::endl; - } - } - catch (const Exception & e) - { - printException(e); - throw; - } -} - -struct ReceiverHelper -{ - const int source_num; - tipb::ExchangeReceiver pb_exchange_receiver; - std::vector fields; - mpp::TaskMeta task_meta; - std::vector queues; - std::shared_ptr join_ptr; - - explicit ReceiverHelper(int source_num_) - : source_num(source_num_) - { - pb_exchange_receiver.set_tp(tipb::Hash); - for (int i = 0; i < source_num; ++i) - { - mpp::TaskMeta task; - task.set_start_ts(0); - task.set_task_id(i); - task.set_partition_id(i); - task.set_address(""); - - String encoded_task; - task.SerializeToString(&encoded_task); - - pb_exchange_receiver.add_encoded_task_meta(encoded_task); - } - - fields = makeFields(); - *pb_exchange_receiver.add_field_types() = fields[0]; - *pb_exchange_receiver.add_field_types() = fields[1]; - *pb_exchange_receiver.add_field_types() = fields[2]; - - task_meta.set_task_id(100); - - queues = makePacketQueues(source_num, 10); - } - - MockExchangeReceiverPtr buildReceiver() - { - return std::make_shared( - std::make_shared(queues, fields), - source_num, - source_num * 5, - nullptr); - } - - BlockInputStreamPtr buildUnionStream(int concurrency) - { - auto receiver = buildReceiver(); - std::vector streams; - for (int i = 0; i < concurrency; ++i) - streams.push_back(std::make_shared(receiver, nullptr)); - return std::make_shared>(streams, nullptr, concurrency, /*req_id=*/""); - } - - BlockInputStreamPtr buildUnionStreamWithHashJoinBuildStream(int concurrency) - { - auto receiver = buildReceiver(); - std::vector streams; - for (int i = 0; i < concurrency; ++i) - streams.push_back(std::make_shared(receiver, nullptr)); - - auto receiver_header = streams.front()->getHeader(); - auto key_name = receiver_header.getByPosition(0).name; - - join_ptr = std::make_shared( - Names{key_name}, - Names{key_name}, - true, - SizeLimits(0, 0, OverflowMode::THROW), - ASTTableJoin::Kind::Inner, - ASTTableJoin::Strictness::All, - /*req_id=*/"", - TiDB::TiDBCollators{nullptr}, - "", - "", - "", - "", - nullptr, - 65536); - - join_ptr->init(receiver_header, concurrency); - - for (int i = 0; i < concurrency; ++i) - streams[i] = std::make_shared(streams[i], join_ptr, i, /*req_id=*/""); - - return std::make_shared>(streams, nullptr, concurrency, /*req_id=*/""); - } - - void finish() - { - if (join_ptr) - { - join_ptr->setBuildTableState(Join::BuildTableState::SUCCEED); - std::cout << fmt::format("Hash table size: {} bytes", join_ptr->getTotalByteCount()) << std::endl; - } - } -}; - -struct SenderHelper -{ - const int source_num; - const int concurrency; - - std::vector queues; - std::vector mock_writers; - std::vector tunnels; - MockTunnelSetPtr tunnel_set; - std::unique_ptr dag_context; - - SenderHelper( - int source_num_, - int concurrency_, - const std::vector & queues_, - const std::vector & fields) - : source_num(source_num_) - , concurrency(concurrency_) - , queues(queues_) - { - mpp::TaskMeta task_meta; - tunnel_set = std::make_shared(); - for (int i = 0; i < source_num; ++i) - { - auto writer = std::make_shared(queues[i]); - mock_writers.push_back(writer); - - auto tunnel = std::make_shared( - task_meta, - task_meta, - std::chrono::seconds(60), - concurrency, - false); - tunnel->connect(writer.get()); - tunnels.push_back(tunnel); - tunnel_set->addTunnel(tunnel); - } - - tipb::DAGRequest dag_request; - tipb::Executor root_executor; - root_executor.set_executor_id("ExchangeSender_100"); - *dag_request.mutable_root_executor() = root_executor; - - dag_context = std::make_unique(dag_request); - dag_context->is_mpp_task = true; - dag_context->is_root_mpp_task = false; - dag_context->encode_type = tipb::EncodeType::TypeCHBlock; - dag_context->result_field_types = fields; - } - - BlockInputStreamPtr buildUnionStream( - StopFlag & stop_flag, - const std::vector & blocks) - { - std::vector send_streams; - for (int i = 0; i < concurrency; ++i) - { - BlockInputStreamPtr stream = std::make_shared(blocks, stop_flag); - std::unique_ptr response_writer( - new StreamingDAGResponseWriter( - tunnel_set, - {0, 1, 2}, - TiDB::TiDBCollators(3), - tipb::Hash, - -1, - -1, - true, - *dag_context)); - send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); - } - - return std::make_shared>(send_streams, nullptr, concurrency, /*req_id=*/""); - } - - void finish() - { - for (size_t i = 0; i < tunnels.size(); ++i) - { - tunnels[i]->writeDone(); - tunnels[i]->waitForFinish(); - mock_writers[i]->finish(); - } - } -}; - -void testOnlyReceiver(int concurrency, int source_num, int block_rows, int seconds) -{ - ReceiverHelper receiver_helper(source_num); - auto union_input_stream = receiver_helper.buildUnionStream(concurrency); - - auto chunk_codec_stream = CHBlockChunkCodec().newCodecStream(receiver_helper.fields); - auto packets = makePackets(*chunk_codec_stream, 100, block_rows); - - StopFlag stop_flag(false); - - std::vector threads; - for (const auto & queue : receiver_helper.queues) - threads.emplace_back(sendPacket, std::cref(packets), queue, std::ref(stop_flag)); - threads.emplace_back(readBlock, union_input_stream); - - std::this_thread::sleep_for(std::chrono::seconds(seconds)); - stop_flag.store(true); - for (auto & thread : threads) - thread.join(); - - receiver_helper.finish(); -} - -template -void testSenderReceiver(int concurrency, int source_num, int block_rows, int seconds) -{ - ReceiverHelper receiver_helper(source_num); - BlockInputStreamPtr union_receive_stream; - if constexpr (with_join) - union_receive_stream = receiver_helper.buildUnionStreamWithHashJoinBuildStream(concurrency); - else - union_receive_stream = receiver_helper.buildUnionStream(concurrency); - - StopFlag stop_flag(false); - auto blocks = makeBlocks(100, block_rows); - - SenderHelper sender_helper(source_num, concurrency, receiver_helper.queues, receiver_helper.fields); - auto union_send_stream = sender_helper.buildUnionStream(stop_flag, blocks); - - auto write_thread = std::thread(readBlock, union_send_stream); - auto read_thread = std::thread(readBlock, union_receive_stream); - - std::this_thread::sleep_for(std::chrono::seconds(seconds)); - stop_flag.store(true); - - write_thread.join(); - sender_helper.finish(); - - read_thread.join(); - receiver_helper.finish(); -} - -void testOnlySender(int concurrency, int source_num, int block_rows, int seconds) -{ - auto queues = makePacketQueues(source_num, 10); - auto fields = makeFields(); - - StopFlag stop_flag(false); - auto blocks = makeBlocks(100, block_rows); - - SenderHelper sender_helper(source_num, concurrency, queues, fields); - auto union_send_stream = sender_helper.buildUnionStream(stop_flag, blocks); - - auto write_thread = std::thread(readBlock, union_send_stream); - std::vector read_threads; - for (int i = 0; i < source_num; ++i) - read_threads.emplace_back(receivePacket, queues[i]); - - std::this_thread::sleep_for(std::chrono::seconds(seconds)); - stop_flag.store(true); - - write_thread.join(); - sender_helper.finish(); - - for (auto & t : read_threads) - t.join(); -} - -} // namespace -} // namespace DB::tests - -int main(int argc [[maybe_unused]], char ** argv [[maybe_unused]]) -{ - if (argc < 2 || argc > 6) - { - std::cerr << fmt::format("Usage: {} [receiver|sender|sender_receiver|sender_receiver_join] ", argv[0]) << std::endl; - exit(1); - } - - String method = argv[1]; - int concurrency = argc >= 3 ? atoi(argv[2]) : 5; - int source_num = argc >= 4 ? atoi(argv[3]) : 2; - int block_rows = argc >= 5 ? atoi(argv[4]) : 5000; - int seconds = argc >= 6 ? atoi(argv[5]) : 10; - - using TestHandler = std::function; - std::unordered_map handlers = { - {"receiver", DB::tests::testOnlyReceiver}, - {"sender", DB::tests::testOnlySender}, - {"sender_receiver", DB::tests::testSenderReceiver}, - {"sender_receiver_join", DB::tests::testSenderReceiver}, - }; - - auto it = handlers.find(method); - if (it != handlers.end()) - { - std::cout - << fmt::format( - "{}. concurrency = {}. source_num = {}. block_rows = {}. seconds = {}", - method, - concurrency, - source_num, - block_rows, - seconds) - << std::endl; - it->second(concurrency, source_num, block_rows, seconds); - } - else - { - std::cerr << "Unknown method: " << method << std::endl; - exit(1); - } -} diff --git a/dbms/src/Flash/tests/gtest_executor.cpp b/dbms/src/Flash/tests/gtest_executor.cpp index 64c60f14bb6..b4ba1a75563 100644 --- a/dbms/src/Flash/tests/gtest_executor.cpp +++ b/dbms/src/Flash/tests/gtest_executor.cpp @@ -227,4 +227,4 @@ try CATCH } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_limit_executor.cpp b/dbms/src/Flash/tests/gtest_limit_executor.cpp new file mode 100644 index 00000000000..e4a3aa5db5e --- /dev/null +++ b/dbms/src/Flash/tests/gtest_limit_executor.cpp @@ -0,0 +1,77 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorLimitTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColDataType = std::optional::FieldType>; + using ColumnWithData = std::vector; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_name}, + {{col_name, TiDB::TP::TypeString}}, + {toNullableVec(col_name, col0)}); + } + + std::shared_ptr buildDAGRequest(size_t limit_num) + { + return context.scan(db_name, table_name).limit(limit_num).build(context); + } + + /// Prepare some names + const String db_name{"test_db"}; + const String table_name{"projection_test_table"}; + const String col_name{"limit_col"}; + const ColumnWithData col0{"col0-0", {}, "col0-2", "col0-3", {}, "col0-5", "col0-6", "col0-7"}; +}; + +TEST_F(ExecutorLimitTestRunner, Limit) +try +{ + std::shared_ptr request; + ColumnsWithTypeAndName expect_cols; + + /// Check limit result with various parameters + const size_t col_data_num = col0.size(); + for (size_t limit_num = 0; limit_num <= col_data_num + 3; ++limit_num) + { + if (limit_num == col_data_num + 3) + limit_num = INT_MAX; + request = buildDAGRequest(limit_num); + + if (limit_num == 0) + expect_cols = {}; + else if (limit_num > col_data_num) + expect_cols = {toNullableVec(col_name, ColumnWithData(col0.begin(), col0.end()))}; + else + expect_cols = {toNullableVec(col_name, ColumnWithData(col0.begin(), col0.begin() + limit_num))}; + + executeStreams(request, expect_cols); + } +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_projection_executor.cpp b/dbms/src/Flash/tests/gtest_projection_executor.cpp new file mode 100644 index 00000000000..4f6401eb483 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_projection_executor.cpp @@ -0,0 +1,225 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorProjectionTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColDataString = std::vector::FieldType>>; + using ColDataInt32 = std::vector::FieldType>>; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_name}, + {{col_names[0], TiDB::TP::TypeString}, + {col_names[1], TiDB::TP::TypeString}, + {col_names[2], TiDB::TP::TypeString}, + {col_names[3], TiDB::TP::TypeLong}, + {col_names[4], TiDB::TP::TypeLong}}, + {toNullableVec(col_names[0], col0), + toNullableVec(col_names[1], col1), + toNullableVec(col_names[2], col2), + toNullableVec(col_names[3], col3), + toNullableVec(col_names[4], col4)}); + } + + template + std::shared_ptr buildDAGRequest(T param, const String & sort_col) + { + /// topN is introduced, so that we can get stable results in concurrency environment. + return context.scan(db_name, table_name).project(param).topN(sort_col, false, 100).build(context); + }; + + void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) + { + for (size_t i = 1; i < 10; i += 2) + { + executeStreams(request, expect_columns, i); + } + } + + /// Prepare column data + const ColDataString col0{"col0-0", "col0-1", "", "col0-2", {}, "col0-3", ""}; + const ColDataString col1{"col1-0", {}, "", "col1-1", "", "col1-2", "col1-3"}; + const ColDataString col2{"", "col2-0", "col2-1", {}, "col2-3", {}, "col2-4"}; + const ColDataInt32 col3{1, {}, 0, -111111, {}, 0, 9999}; + + /** Each value in col4 should be different from each other so that topn + * could sort the columns into an unique result, or multi-results could + * be right. + */ + const ColDataInt32 col4{0, 5, -123, -234, {}, 24353, 9999}; + + /// Results after sorted by col4 + const ColDataString col0_sorted_asc{{}, "col0-2", "", "col0-0", "col0-1", "", "col0-3"}; + const ColDataString col1_sorted_asc{"", "col1-1", "", "col1-0", {}, "col1-3", "col1-2"}; + const ColDataString col2_sorted_asc{"col2-3", {}, "col2-1", "", "col2-0", "col2-4", {}}; + const ColDataInt32 col3_sorted_asc{{}, -111111, 0, 1, {}, 9999, 0}; + const ColDataInt32 col4_sorted_asc{{}, -234, -123, 0, 5, 9999, 24353}; + + /// Prepare some names + std::vector col_names{"col0", "col1", "col2", "col3", "col4"}; + const String db_name{"test_db"}; + const String table_name{"projection_test_table"}; +}; + +TEST_F(ExecutorProjectionTestRunner, Projection) +try +{ + /// Check single column + auto request = buildDAGRequest({col_names[4]}, col_names[4]); + executeWithConcurrency(request, {toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Check multi columns + request = buildDAGRequest({col_names[0], col_names[4]}, col_names[4]); + executeWithConcurrency(request, + { + toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc), + }); + + /// Check multi columns + request = buildDAGRequest({col_names[0], col_names[1], col_names[4]}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[1], col1_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Check duplicate columns + request = buildDAGRequest({col_names[4], col_names[4], col_names[4]}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); + + { + /// Check large number of columns + const size_t col_num = 100; + MockColumnNamesVec projection_input; + ColumnsWithTypeAndName columns; + auto expect_column = toNullableVec(col_names[4], col4_sorted_asc); + + for (size_t i = 0; i < col_num; ++i) + { + projection_input.push_back(col_names[4]); + columns.push_back(expect_column); + } + + request = buildDAGRequest(projection_input, col_names[4]); + executeWithConcurrency(request, columns); + } +} +CATCH + +TEST_F(ExecutorProjectionTestRunner, ProjectionFunction) +try +{ + std::shared_ptr request; + + /// Test "equal" function + + /// Data type: TypeString + request = buildDAGRequest({eq(col(col_names[0]), col(col_names[0])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({eq(col(col_names[0]), col(col_names[1])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({eq(col(col_names[3]), col(col_names[4])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + + /// Test "greater" function + + /// Data type: TypeString + request = buildDAGRequest({gt(col(col_names[0]), col(col_names[1])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({gt(col(col_names[1]), col(col_names[0])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 1, {}, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({gt(col(col_names[3]), col(col_names[4])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 1, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({gt(col(col_names[4]), col(col_names[3])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 0, {}, 0, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + + /// Test "and" function + + /// Data type: TypeString + request = buildDAGRequest({And(col(col_names[0]), col(col_names[0])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({And(col(col_names[0]), col(col_names[1])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({0, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({And(col(col_names[3]), col(col_names[4])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Test "not" function + + /// Data type: TypeString + request = buildDAGRequest({NOT(col(col_names[0])), NOT(col(col_names[1])), NOT(col(col_names[2])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, {}, 1, 1}), + toNullableVec({1, {}, 1, 1, 1, 1, {}}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({NOT(col(col_names[3])), NOT(col(col_names[4])), col(col_names[4])}, col_names[4]); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 1}), + toNullableVec({{}, 0, 0, 1, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// TODO more functions... +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_topn_executor.cpp b/dbms/src/Flash/tests/gtest_topn_executor.cpp new file mode 100644 index 00000000000..0e55702795d --- /dev/null +++ b/dbms/src/Flash/tests/gtest_topn_executor.cpp @@ -0,0 +1,221 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorTopNTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColStringType = std::optional::FieldType>; + using ColInt32Type = std::optional::FieldType>; + using ColumnWithString = std::vector; + using ColumnWithInt32 = std::vector; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_single_name}, + {{single_col_name, TiDB::TP::TypeString}}, + {toNullableVec(single_col_name, col0)}); + + context.addMockTable({db_name, table_name}, + {{col_name[0], TiDB::TP::TypeLong}, + {col_name[1], TiDB::TP::TypeString}, + {col_name[2], TiDB::TP::TypeString}, + {col_name[3], TiDB::TP::TypeLong}}, + {toNullableVec(col_name[0], col_age), + toNullableVec(col_name[1], col_gender), + toNullableVec(col_name[2], col_country), + toNullableVec(col_name[3], c0l_salary)}); + } + + std::shared_ptr buildDAGRequest(const String & table_name, const String & col_name, bool is_desc, int limit_num) + { + return context.scan(db_name, table_name).topN(col_name, is_desc, limit_num).build(context); + } + + std::shared_ptr buildDAGRequest(const String & table_name, MockOrderByItems order_by_items, int limit, MockAsts func_proj_ast = {}, MockColumnNames out_proj_ast = {}) + { + if (func_proj_ast.size() == 0) + return context.scan(db_name, table_name).topN(order_by_items, limit).build(context); + else + return context.scan(db_name, table_name).project(func_proj_ast).topN(order_by_items, limit).project(out_proj_ast).build(context); + } + + /// Prepare some names + const String db_name{"test_db"}; + + const String table_single_name{"topn_single_table"}; /// For single column test + const String single_col_name{"single_col"}; + ColumnWithString col0{"col0-0", "col0-1", "col0-2", {}, "col0-4", {}, "col0-6", "col0-7"}; + + const String table_name{"clerk"}; + const std::vector col_name{"age", "gender", "country", "salary"}; + ColumnWithInt32 col_age{{}, 27, 32, 36, {}, 34}; + ColumnWithString col_gender{"female", "female", "male", "female", "male", "male"}; + ColumnWithString col_country{"korea", "usa", "usa", "china", "china", "china"}; + ColumnWithInt32 c0l_salary{1300, 0, {}, 900, {}, -300}; +}; + +TEST_F(ExecutorTopNTestRunner, TopN) +try +{ + std::shared_ptr request; + std::vector expect_cols; + + { + /// Test single column + size_t col_data_num = col0.size(); + for (size_t i = 1; i <= 1; ++i) + { + bool is_desc; + is_desc = static_cast(i); /// Set descent or ascent + if (is_desc) + sort(col0.begin(), col0.end(), std::greater()); /// Sort col0 for the following comparison + else + sort(col0.begin(), col0.end()); + + for (size_t limit_num = 0; limit_num <= col_data_num + 5; ++limit_num) + { + request = buildDAGRequest(table_single_name, single_col_name, is_desc, limit_num); + + expect_cols.clear(); + if (limit_num == 0 || limit_num > col_data_num) + expect_cols.push_back({toNullableVec(single_col_name, ColumnWithString(col0.begin(), col0.end()))}); + else + expect_cols.push_back({toNullableVec(single_col_name, ColumnWithString(col0.begin(), col0.begin() + limit_num))}); + + executeStreams(request, expect_cols[0]); + executeStreams(request, expect_cols[0], 2); + executeStreams(request, expect_cols[0], 4); + executeStreams(request, expect_cols[0], 8); + } + } + } + + { + /// Test multi-columns + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{36, 34, 32, 27, {}, {}}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "male", "female"}), + toNullableVec(col_name[2], ColumnWithString{"china", "china", "usa", "usa", "china", "korea"}), + toNullableVec(col_name[3], ColumnWithInt32{900, -300, {}, 0, {}, 1300})}, + {toNullableVec(col_name[0], ColumnWithInt32{32, {}, 34, 27, 36, {}}), + toNullableVec(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}), + toNullableVec(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "china", "korea"}), + toNullableVec(col_name[3], ColumnWithInt32{{}, {}, -300, 0, 900, 1300})}, + {toNullableVec(col_name[0], ColumnWithInt32{34, {}, 32, 36, {}, 27}), + toNullableVec(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}), + toNullableVec(col_name[2], ColumnWithString{"china", "china", "usa", "china", "korea", "usa"}), + toNullableVec(col_name[3], ColumnWithInt32{-300, {}, {}, 900, 1300, 0})}}; + + std::vector order_by_items{ + /// select * from clerk order by age DESC, gender DESC; + {MockOrderByItem(col_name[0], true), MockOrderByItem(col_name[1], true)}, + /// select * from clerk order by gender DESC, salary ASC; + {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[3], false)}, + /// select * from clerk order by gender DESC, country ASC, salary DESC; + {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[2], false), MockOrderByItem(col_name[3], true)}}; + + size_t test_num = expect_cols.size(); + + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest(table_name, order_by_items[i], 100); + executeStreams(request, expect_cols[i]); + } + } +} +CATCH + +TEST_F(ExecutorTopNTestRunner, TopNFunction) +try +{ + std::shared_ptr request; + std::vector expect_cols; + MockColumnNames output_projection{col_name[0], col_name[1], col_name[2], col_name[3]}; + MockAsts func_projection; // Do function operation for topn + MockOrderByItems order_by_items; + ASTPtr col0_ast = col(col_name[0]); + ASTPtr col1_ast = col(col_name[1]); + ASTPtr col2_ast = col(col_name[2]); + ASTPtr col3_ast = col(col_name[3]); + ASTPtr func_ast; + + { + /// "and" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{{}, {}, 32, 27, 36, 34}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"korea", "china", "usa", "usa", "china", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{1300, {}, {}, 0, 900, -300})}}; + + { + /// select * from clerk order by age and salary ASC limit 100; + order_by_items = {MockOrderByItem("and(age, salary)", false)}; + func_ast = And(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + executeStreams(request, expect_cols[0]); + } + } + + { + /// "equal" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{27, 36, 34, 32, {}, {}}), + toNullableVec(col_name[1], ColumnWithString{"female", "female", "male", "male", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "korea", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{0, 900, -300, {}, 1300, {}})}}; + + { + /// select age, salary from clerk order by age = salary DESC limit 100; + order_by_items = {MockOrderByItem("equals(age, salary)", true)}; + func_ast = eq(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + executeStreams(request, expect_cols[0]); + } + } + + { + /// "greater" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{{}, 32, {}, 36, 27, 34}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"korea", "usa", "china", "china", "usa", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{1300, {}, {}, 900, 0, -300})}}; + + { + /// select age, gender, country, salary from clerk order by age > salary ASC limit 100; + order_by_items = {MockOrderByItem("greater(age, salary)", false)}; + func_ast = gt(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + executeStreams(request, expect_cols[0]); + } + } + + /// TODO more functions... +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/FunctionsGeo.cpp index a6fd2ff522e..02e11b66d77 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/FunctionsGeo.cpp @@ -28,13 +28,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event PolygonsAddedToPool; -extern const Event PolygonsInPoolAllocatedBytes; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -60,9 +53,6 @@ ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, P /// To allocate memory. ptr->init(); - ProfileEvents::increment(ProfileEvents::PolygonsAddedToPool); - ProfileEvents::increment(ProfileEvents::PolygonsInPoolAllocatedBytes, ptr->getAllocatedBytes()); - return ptr.release(); }; @@ -121,30 +111,30 @@ class FunctionPointInPolygon : public IFunction throw Exception("Too few arguments", ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION); } - auto getMsgPrefix = [this](size_t i) { + auto get_msg_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); }; for (size_t i = 1; i < arguments.size(); ++i) { - auto * array = checkAndGetDataType(arguments[i].get()); + const auto * array = checkAndGetDataType(arguments[i].get()); if (array == nullptr && i != 1) - throw Exception(getMsgPrefix(i) + " must be array of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(get_msg_prefix(i) + " must be array of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - auto * tuple = checkAndGetDataType(array ? array->getNestedType().get() : arguments[i].get()); + const auto * tuple = checkAndGetDataType(array ? array->getNestedType().get() : arguments[i].get()); if (tuple == nullptr) - throw Exception(getMsgPrefix(i) + " must contains tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(get_msg_prefix(i) + " must contains tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const DataTypes & elements = tuple->getElements(); if (elements.size() != 2) - throw Exception(getMsgPrefix(i) + " must have exactly two elements.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(get_msg_prefix(i) + " must have exactly two elements.", ErrorCodes::BAD_ARGUMENTS); for (auto j : ext::range(0, elements.size())) { if (!elements[j]->isNumber()) { - throw Exception(getMsgPrefix(i) + " must contains numeric tuple at position " + toString(j + 1), + throw Exception(get_msg_prefix(i) + " must contains numeric tuple at position " + toString(j + 1), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } @@ -156,10 +146,10 @@ class FunctionPointInPolygon : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * point_col = block.getByPosition(arguments[0]).column.get(); - auto const_tuple_col = checkAndGetColumn(point_col); + const auto * const_tuple_col = checkAndGetColumn(point_col); if (const_tuple_col) point_col = &const_tuple_col->getDataColumn(); - auto tuple_col = checkAndGetColumn(point_col); + const auto * tuple_col = checkAndGetColumn(point_col); if (!tuple_col) { @@ -207,18 +197,18 @@ class FunctionPointInPolygon : public IFunction { Polygon polygon; - auto getMsgPrefix = [this](size_t i) { + auto get_msg_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); }; for (size_t i = 1; i < arguments.size(); ++i) { - auto const_col = checkAndGetColumn(block.getByPosition(arguments[i]).column.get()); - auto array_col = const_col ? checkAndGetColumn(&const_col->getDataColumn()) : nullptr; - auto tuple_col = array_col ? checkAndGetColumn(&array_col->getData()) : nullptr; + const auto * const_col = checkAndGetColumn(block.getByPosition(arguments[i]).column.get()); + const auto * array_col = const_col ? checkAndGetColumn(&const_col->getDataColumn()) : nullptr; + const auto * tuple_col = array_col ? checkAndGetColumn(&array_col->getData()) : nullptr; if (!tuple_col) - throw Exception(getMsgPrefix(i) + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(get_msg_prefix(i) + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN); const auto & tuple_columns = tuple_col->getColumns(); const auto & column_x = tuple_columns[0]; @@ -232,7 +222,7 @@ class FunctionPointInPolygon : public IFunction auto size = column_x->size(); if (size == 0) - throw Exception(getMsgPrefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(get_msg_prefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN); for (auto j : ext::range(0, size)) { @@ -246,11 +236,11 @@ class FunctionPointInPolygon : public IFunction container.push_back(container.front()); } - auto callImpl = use_object_pool + auto call_impl = use_object_pool ? FunctionPointInPolygonDetail::callPointInPolygonImplWithPool, PointInPolygonImpl> : FunctionPointInPolygonDetail::callPointInPolygonImpl, PointInPolygonImpl>; - return callImpl(x, y, polygon); + return call_impl(x, y, polygon); } }; diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index 119169be8b5..3eddd383cfb 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -18,13 +18,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event RegexpCreated; -} - - namespace DB { namespace Regexps @@ -54,7 +47,6 @@ inline Pool::Pointer get(const std::string & pattern, int flags) if (no_capture) flags |= OptimizedRegularExpression::RE_NO_CAPTURE; - ProfileEvents::increment(ProfileEvents::RegexpCreated); return new Regexp{createRegexp(pattern, flags)}; }); } diff --git a/dbms/src/IO/BufferWithOwnMemory.h b/dbms/src/IO/BufferWithOwnMemory.h index 272f4fc5c01..babe2541b33 100644 --- a/dbms/src/IO/BufferWithOwnMemory.h +++ b/dbms/src/IO/BufferWithOwnMemory.h @@ -21,14 +21,6 @@ #include - -namespace ProfileEvents -{ -extern const Event IOBufferAllocs; -extern const Event IOBufferAllocBytes; -} // namespace ProfileEvents - - namespace DB { /** Replacement for std::vector to use in buffers. @@ -119,9 +111,6 @@ struct Memory return; } - ProfileEvents::increment(ProfileEvents::IOBufferAllocs); - ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, m_capacity); - size_t new_capacity = align(m_capacity, alignment); m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; diff --git a/dbms/src/IO/ChecksumBuffer.h b/dbms/src/IO/ChecksumBuffer.h index f6d60677a12..b095545ea6e 100644 --- a/dbms/src/IO/ChecksumBuffer.h +++ b/dbms/src/IO/ChecksumBuffer.h @@ -27,7 +27,6 @@ namespace ProfileEvents { // no need to update sync, since write buffers inherit that directly from `WriteBufferFromFileDescriptor` extern const Event WriteBufferFromFileDescriptorWrite; -extern const Event WriteBufferFromFileDescriptorWriteFailed; extern const Event WriteBufferFromFileDescriptorWriteBytes; extern const Event ReadBufferFromFileDescriptorRead; extern const Event ReadBufferFromFileDescriptorReadBytes; @@ -107,7 +106,6 @@ class FramedChecksumWriteBuffer : public WriteBufferFromFileDescriptor } if (unlikely(count == -1)) { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); if (errno == EINTR) continue; else @@ -386,8 +384,6 @@ class FramedChecksumReadBuffer : public ReadBufferFromFileDescriptor off_t doSeek(off_t offset, int whence) override { - ProfileEvents::increment(ProfileEvents::Seek); - auto & frame = reinterpret_cast &>( *(this->working_buffer.begin() - sizeof(ChecksumFrame))); // align should not fail diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index dd54c1b47a8..58bf47a9298 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -28,14 +28,6 @@ #include - -namespace ProfileEvents -{ -extern const Event ReadCompressedBytes; -extern const Event CompressedReadBufferBlocks; -extern const Event CompressedReadBufferBytes; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -83,8 +75,6 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_ if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) throw Exception("Too large size_compressed. Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); - ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed + sizeof(checksum)); - /// Is whole compressed block located in 'compressed_in' buffer? if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE && compressed_in->position() + size_compressed - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end()) @@ -115,9 +105,6 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_ template void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum) { - ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks); - ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed); - UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h if (method == static_cast(CompressionMethodByte::LZ4)) diff --git a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp index 90cc6e3ca76..4b3d52f3741 100644 --- a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp @@ -77,7 +77,7 @@ bool ReadBufferFromFileDescriptor::nextImpl() if (profile_callback) { - ProfileInfo info; + ProfileInfo info; // NOLINT info.bytes_requested = internal_buffer.size(); info.bytes_read = res; info.nanoseconds = watch->elapsed(); @@ -120,8 +120,6 @@ off_t ReadBufferFromFileDescriptor::doSeek(off_t offset, int whence) } else { - ProfileEvents::increment(ProfileEvents::Seek); - pos = working_buffer.end(); off_t res = doSeekInFile(new_pos, SEEK_SET); if (-1 == res) @@ -145,7 +143,7 @@ bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) FD_SET(fd, &fds); timeval timeout = {time_t(timeout_microseconds / 1000000), suseconds_t(timeout_microseconds % 1000000)}; - int res = select(1, &fds, 0, 0, &timeout); + int res = select(1, &fds, nullptr, nullptr, &timeout); if (-1 == res) throwFromErrno("Cannot select", ErrorCodes::CANNOT_SELECT); diff --git a/dbms/src/IO/WriteBuffer.h b/dbms/src/IO/WriteBuffer.h index 361081d1176..0c0fa2cb545 100644 --- a/dbms/src/IO/WriteBuffer.h +++ b/dbms/src/IO/WriteBuffer.h @@ -96,6 +96,24 @@ class WriteBuffer : public BufferBase } } + template + __attribute__((always_inline)) void writeFixed(const T * __restrict from) + { + if (likely(working_buffer.end() - pos >= static_cast(sizeof(T)))) + { + tiflash_compiler_builtin_memcpy(pos, from, sizeof(T)); + pos += sizeof(T); + } + else + { + [&]() __attribute__((noinline)) + { + write(reinterpret_cast(from), sizeof(T)); + } + (); + } + } + inline void write(char x) { diff --git a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp index c18337497b7..49b6d871870 100644 --- a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp @@ -24,7 +24,6 @@ namespace ProfileEvents { extern const Event FileFSync; extern const Event WriteBufferFromFileDescriptorWrite; -extern const Event WriteBufferFromFileDescriptorWriteFailed; extern const Event WriteBufferFromFileDescriptorWriteBytes; } // namespace ProfileEvents @@ -57,7 +56,6 @@ void WriteBufferFromFileDescriptor::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); throwFromErrno("Cannot write to file " + getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } diff --git a/dbms/src/IO/createReadBufferFromFileBase.cpp b/dbms/src/IO/createReadBufferFromFileBase.cpp index 24c9dfb204c..0d129d03a1a 100644 --- a/dbms/src/IO/createReadBufferFromFileBase.cpp +++ b/dbms/src/IO/createReadBufferFromFileBase.cpp @@ -20,13 +20,6 @@ #endif #include - -namespace ProfileEvents -{ -extern const Event CreatedReadBufferOrdinary; -extern const Event CreatedReadBufferAIO; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -46,13 +39,11 @@ createReadBufferFromFileBase( { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); return std::make_unique(filename_, buffer_size_, flags_, existing_memory_, alignment); } else { #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) - ProfileEvents::increment(ProfileEvents::CreatedReadBufferAIO); return std::make_unique(filename_, buffer_size_, flags_, existing_memory_); #else throw Exception("AIO is not implemented yet on MacOS X", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/IO/createWriteBufferFromFileBase.cpp b/dbms/src/IO/createWriteBufferFromFileBase.cpp index 96bf3e65558..0e741eb3e5d 100644 --- a/dbms/src/IO/createWriteBufferFromFileBase.cpp +++ b/dbms/src/IO/createWriteBufferFromFileBase.cpp @@ -19,13 +19,6 @@ #endif #include - -namespace ProfileEvents -{ -extern const Event CreatedWriteBufferOrdinary; -extern const Event CreatedWriteBufferAIO; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -45,13 +38,11 @@ WriteBufferFromFileBase * createWriteBufferFromFileBase( { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferOrdinary); return new WriteBufferFromFile(filename_, buffer_size_, flags_, mode, existing_memory_, alignment); } else { #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferAIO); return new WriteBufferAIO(filename_, buffer_size_, flags_, mode, existing_memory_); #else throw Exception("AIO is not implemented yet on MacOS X", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 6e067b88d81..6cb947a1bfa 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -37,19 +38,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalAggregationWritePart; -extern const Event ExternalAggregationCompressedBytes; -extern const Event ExternalAggregationUncompressedBytes; -} // namespace ProfileEvents - -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - namespace DB { namespace ErrorCodes @@ -61,6 +49,11 @@ extern const int CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS; extern const int LOGICAL_ERROR; } // namespace ErrorCodes +namespace FailPoints +{ +extern const char random_aggregate_create_state_failpoint[]; +extern const char random_aggregate_merge_failpoint[]; +} // namespace FailPoints AggregatedDataVariants::~AggregatedDataVariants() { @@ -330,6 +323,7 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const * In order that then everything is properly destroyed, we "roll back" some of the created states. * The code is not very convenient. */ + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_aggregate_create_state_failpoint); aggregate_functions[j]->create(aggregate_data + offsets_of_aggregate_states[j]); } catch (...) @@ -658,7 +652,6 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, co NativeBlockOutputStream block_out(compressed_buf, ClickHouseRevision::get(), getHeader(false)); LOG_FMT_DEBUG(log, "Writing part of aggregation data into temporary file {}.", path); - ProfileEvents::increment(ProfileEvents::ExternalAggregationWritePart); /// Flush only two-level data and possibly overflow data. @@ -694,9 +687,6 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, co temporary_files.sum_size_compressed += compressed_bytes; } - ProfileEvents::increment(ProfileEvents::ExternalAggregationCompressedBytes, compressed_bytes); - ProfileEvents::increment(ProfileEvents::ExternalAggregationUncompressedBytes, uncompressed_bytes); - LOG_FMT_TRACE( log, "Written part in {:.3f} sec., {} rows, " @@ -1016,7 +1006,7 @@ Block Aggregator::prepareBlockAndFill( aggregate_columns[i] = header.getByName(aggregate_column_name).type->createColumn(); /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. - ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); + auto & column_aggregate_func = assert_cast(*aggregate_columns[i]); for (auto & pool : data_variants.aggregates_pools) column_aggregate_func.addArena(pool); @@ -1502,7 +1492,7 @@ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream Block getHeader() const override { return aggregator.getHeader(final); } - ~MergingAndConvertingBlockInputStream() + ~MergingAndConvertingBlockInputStream() override { LOG_FMT_TRACE(&Poco::Logger::get(__PRETTY_FUNCTION__), "Waiting for threads to finish"); @@ -1521,6 +1511,8 @@ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream if (current_bucket_num >= NUM_BUCKETS) return {}; + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_aggregate_merge_failpoint); + AggregatedDataVariantsPtr & first = data[0]; if (current_bucket_num == -1) @@ -1636,8 +1628,6 @@ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream void thread(Int32 bucket_num) { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - try { /// TODO: add no_more_keys support maybe diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index a0adef5b50d..44699a324f4 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -77,10 +78,10 @@ namespace ProfileEvents extern const Event ContextLock; } +#include + namespace CurrentMetrics { -extern const Metric ContextLockWait; -extern const Metric MemoryTrackingForMerges; extern const Metric GlobalStorageRunMode; } // namespace CurrentMetrics @@ -308,8 +309,6 @@ Context::~Context() std::unique_lock Context::getLock() const { - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; return std::unique_lock(shared->mutex); } @@ -1879,6 +1878,30 @@ SharedQueriesPtr Context::getSharedQueries() return shared->shared_queries; } +size_t Context::getMaxStreams() const +{ + size_t max_streams = settings.max_threads; + bool is_cop_request = false; + if (dag_context != nullptr) + { + if (dag_context->isTest()) + max_streams = dag_context->initialize_concurrency; + else if (!dag_context->isBatchCop() && !dag_context->isMPPTask()) + { + is_cop_request = true; + max_streams = 1; + } + } + if (max_streams > 1) + max_streams *= settings.max_streams_to_max_threads_ratio; + if (max_streams == 0) + max_streams = 1; + if (unlikely(max_streams != 1 && is_cop_request)) + /// for cop request, the max_streams should be 1 + throw Exception("Cop request only support running with max_streams = 1"); + return max_streams; +} + SessionCleaner::~SessionCleaner() { try diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 5d5c39263c6..b6e759e364b 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -459,6 +459,8 @@ class Context void reloadDeltaTreeConfig(const Poco::Util::AbstractConfiguration & config); + size_t getMaxStreams() const; + private: /** Check if the current client has access to the specified database. * If access is denied, throw an exception. diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 8e75a64427c..0ab8519e4d0 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -27,12 +27,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event FunctionExecute; -} - namespace DB { namespace ErrorCodes @@ -339,7 +333,6 @@ void ExpressionAction::execute(Block & block) const size_t num_columns_without_result = block.columns(); block.insert({nullptr, result_type, result_name}); - ProfileEvents::increment(ProfileEvents::FunctionExecute); function->execute(block, arguments, num_columns_without_result); break; diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index aa64cf8ca94..782a254925a 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -30,12 +30,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event InsertQuery; -} - namespace DB { namespace ErrorCodes @@ -54,7 +48,6 @@ InterpreterInsertQuery::InterpreterInsertQuery( , context(context_) , allow_materialized(allow_materialized_) { - ProfileEvents::increment(ProfileEvents::InsertQuery); } @@ -62,7 +55,7 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) { if (query.table_function) { - auto table_function = typeid_cast(query.table_function.get()); + const auto * table_function = typeid_cast(query.table_function.get()); const auto & factory = TableFunctionFactory::instance(); return factory.get(table_function->name, context)->execute(query.table_function, context); } @@ -71,7 +64,7 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) return context.getTable(query.database, query.table); } -Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) +Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) // NOLINT { Block table_sample_non_materialized; if (query.is_import) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 51b55f65bd4..fe8f04427a0 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -93,6 +94,12 @@ extern const int SCHEMA_VERSION_ERROR; extern const int UNKNOWN_EXCEPTION; } // namespace ErrorCodes + +namespace FailPoints +{ +extern const char pause_query_init[]; +} // namespace FailPoints + InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, @@ -131,7 +138,14 @@ InterpreterSelectQuery::~InterpreterSelectQuery() = default; void InterpreterSelectQuery::init(const Names & required_result_column_names) { - ProfileEvents::increment(ProfileEvents::SelectQuery); + /// the failpoint pause_query_init should use with the failpoint unblock_query_init_after_write, + /// to fulfill that the select query action will be blocked before init state to wait the write action finished. + /// In using, we need enable unblock_query_init_after_write in our test code, + /// and before each write statement take effect, we need enable pause_query_init. + /// When the write action finished, the pause_query_init will be disabled automatically, + /// and then the select query could be continued. + /// you can refer multi_alter_with_write.test for an example. + FAIL_POINT_PAUSE(FailPoints::pause_query_init); if (!context.hasQueryContext()) context.setQueryContext(context); diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 820618a6e8b..181ebcaaa64 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -26,9 +27,17 @@ #include #include #include +#include + namespace DB { +namespace FailPoints +{ +extern const char random_join_build_failpoint[]; +extern const char random_join_prob_failpoint[]; +} // namespace FailPoints + namespace ErrorCodes { extern const int UNKNOWN_SET_DATA_VARIANT; @@ -621,6 +630,7 @@ void NO_INLINE insertFromBlockImplTypeCaseWithLock( } for (size_t insert_index = 0; insert_index < segment_index_info.size(); insert_index++) { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_build_failpoint); size_t segment_index = (insert_index + stream_index) % segment_index_info.size(); if (segment_index == segment_size) { @@ -1513,7 +1523,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const default: throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } - + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint); for (size_t i = 0; i < num_columns_to_add; ++i) { const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i); diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index fdc009237aa..5ed586c263d 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -31,12 +31,6 @@ #include #include - -namespace CurrentMetrics -{ -extern const Metric Query; -} - namespace DB { class IStorage; @@ -90,8 +84,6 @@ class ProcessListElement QueryPriorities::Handle priority_handle; - CurrentMetrics::Increment num_queries{CurrentMetrics::Query}; - std::atomic is_killed{false}; /// Be careful using it. For example, queries field could be modified concurrently. diff --git a/dbms/src/Interpreters/QueryPriorities.h b/dbms/src/Interpreters/QueryPriorities.h index 5f34ae616c7..ca01e4f0a6c 100644 --- a/dbms/src/Interpreters/QueryPriorities.h +++ b/dbms/src/Interpreters/QueryPriorities.h @@ -23,13 +23,6 @@ #include #include - -namespace CurrentMetrics -{ -extern const Metric QueryPreempted; -} - - namespace DB { /** Implements query priorities in very primitive way. @@ -95,7 +88,6 @@ class QueryPriorities if (!found) return true; - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryPreempted}; if (std::cv_status::timeout == condvar.wait_for(lock, cur_timeout)) return false; else diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 96cfc0a58ae..78ad4b41ce6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -53,7 +54,10 @@ extern const int LOGICAL_ERROR; extern const int QUERY_IS_TOO_LARGE; extern const int INTO_OUTFILE_NOT_ALLOWED; } // namespace ErrorCodes - +namespace FailPoints +{ +extern const char random_interpreter_failpoint[]; +} // namespace FailPoints namespace { void checkASTSizeLimits(const IAST & ast, const Settings & settings) @@ -226,6 +230,7 @@ std::tuple executeQueryImpl( context.setProcessListElement(&process_list_entry->get()); } + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_interpreter_failpoint); auto interpreter = query_src.interpreter(context, stage); res = interpreter->execute(); diff --git a/dbms/src/Server/HTTPHandler.h b/dbms/src/Server/HTTPHandler.h index bd06d56bd4e..74b5cc4b4c7 100644 --- a/dbms/src/Server/HTTPHandler.h +++ b/dbms/src/Server/HTTPHandler.h @@ -14,24 +14,20 @@ #pragma once -#include "IServer.h" - -#include - #include #include +#include +#include "IServer.h" -namespace CurrentMetrics + +namespace Poco { - extern const Metric HTTPConnection; +class Logger; } -namespace Poco { class Logger; } - namespace DB { - class WriteBufferFromHTTPServerResponse; @@ -69,11 +65,9 @@ class HTTPHandler : public Poco::Net::HTTPRequestHandler IServer & server; Poco::Logger * log; - /// It is the name of the server that will be sent in an http-header X-ClickHouse-Server-Display-Name. + /// It is the name of the server that will be sent in an http-header X-ClickHouse-Server-Display-Name. String server_display_name; - CurrentMetrics::Increment metric_increment{CurrentMetrics::HTTPConnection}; - /// Also initializes 'used_output'. void processQuery( Poco::Net::HTTPServerRequest & request, @@ -91,4 +85,4 @@ class HTTPHandler : public Poco::Net::HTTPRequestHandler void pushDelayedResults(Output & used_output); }; -} +} // namespace DB diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 3e2c29de76c..3358ae2a60e 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -151,6 +152,7 @@ void loadMiConfig(Logger * log) } #undef TRY_LOAD_CONF #endif + namespace { [[maybe_unused]] void tryLoadBoolConfigFromEnv(Poco::Logger * log, bool & target, const char * name) @@ -176,11 +178,6 @@ namespace } } // namespace -namespace CurrentMetrics -{ -extern const Metric Revision; -} - namespace DB { namespace ErrorCodes @@ -189,6 +186,7 @@ extern const int NO_ELEMENTS_IN_CONFIG; extern const int SUPPORT_IS_DISABLED; extern const int ARGUMENT_OUT_OF_BOUND; extern const int INVALID_CONFIG_PARAMETER; +extern const int IP_ADDRESS_NOT_ALLOWED; } // namespace ErrorCodes namespace Debug @@ -626,6 +624,10 @@ class Server::FlashGrpcServerHolder } } flash_grpc_server = builder.BuildAndStart(); + if (!flash_grpc_server) + { + throw Exception("Exception happens when start grpc server, the flash.service_addr may be invalid, flash.service_addr is " + raft_config.flash_server_addr, ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + } LOG_FMT_INFO(log, "Flash grpc server listening on [{}]", raft_config.flash_server_addr); Debug::setServiceAddr(raft_config.flash_server_addr); if (enable_async_server) @@ -966,7 +968,10 @@ class Server::TcpHttpServersHolder LOG_DEBUG(log, debug_msg); } - const std::vector> & getServers() const { return servers; } + const std::vector> & getServers() const + { + return servers; + } private: Server & server; @@ -982,6 +987,7 @@ int Server::main(const std::vector & /*args*/) Poco::Logger * log = &logger(); #ifdef FIU_ENABLE fiu_init(0); // init failpoint + FailPointHelper::initRandomFailPoints(config(), log); #endif UpdateMallocConfig(log); @@ -1001,7 +1007,6 @@ int Server::main(const std::vector & /*args*/) #ifdef TIFLASH_ENABLE_SVE_SUPPORT tryLoadBoolConfigFromEnv(log, simd_option::ENABLE_SVE, "TIFLASH_ENABLE_SVE"); #endif - registerFunctions(); registerAggregateFunctions(); registerWindowFunctions(); @@ -1065,8 +1070,6 @@ int Server::main(const std::vector & /*args*/) LOG_FMT_INFO(log, "TiFlashRaftProxyHelper is null, failed to get server info"); } - CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get()); - // print necessary grpc log. grpc_log = &Poco::Logger::get("grpc"); gpr_set_log_verbosity(GPR_LOG_SEVERITY_DEBUG); diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h index ed0af52dc98..2fde0b11d9b 100644 --- a/dbms/src/Server/TCPHandler.h +++ b/dbms/src/Server/TCPHandler.h @@ -29,11 +29,6 @@ #include "IServer.h" -namespace CurrentMetrics -{ -extern const Metric TCPConnection; -} - namespace Poco { class Logger; @@ -131,8 +126,6 @@ class TCPHandler : public Poco::Net::TCPServerConnection /// At the moment, only one ongoing query in the connection is supported at a time. QueryState state; - CurrentMetrics::Increment metric_increment{CurrentMetrics::TCPConnection}; - /// It is the name of the server that will be sent to the client. String server_display_name; diff --git a/dbms/src/Storages/BackgroundProcessingPool.cpp b/dbms/src/Storages/BackgroundProcessingPool.cpp index 96c2c6cc622..45ba032bf53 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/BackgroundProcessingPool.cpp @@ -42,7 +42,6 @@ inline static pid_t getTid() namespace CurrentMetrics { -extern const Metric BackgroundPoolTask; extern const Metric MemoryTrackingInBackgroundProcessingPool; } // namespace CurrentMetrics @@ -215,8 +214,6 @@ void BackgroundProcessingPool::threadFunction() continue; { - CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundPoolTask}; - bool done_work = false; if (!task->multi) { diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.h b/dbms/src/Storages/DeltaMerge/DeltaTree.h index 47674ab2cfc..29e127fe35f 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaTree.h +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -810,6 +811,20 @@ class DeltaTree template InternPtr afterNodeUpdated(T * node); +#ifdef __x86_64__ + template + InternPtr afterNodeUpdatedGeneric(T * node); + + template + InternPtr afterNodeUpdatedAVX512(T * node); + + template + InternPtr afterNodeUpdatedAVX(T * node); + + template + InternPtr afterNodeUpdatedSSE4(T * node); +#endif + inline void afterLeafUpdated(LeafPtr leaf) { if (leaf->count == 0 && isRootOnly()) @@ -1348,158 +1363,86 @@ typename DT_CLASS::InterAndSid DT_CLASS::submitMinSid(T * node, UInt64 subtree_m } } -DT_TEMPLATE -template -typename DT_CLASS::InternPtr DT_CLASS::afterNodeUpdated(T * node) +#ifndef __x86_64__ +#define TIFLASH_DT_IMPL_NAME afterNodeUpdated +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +#else + +// generic implementation +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedGeneric +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME + +// avx512 implementation +TIFLASH_BEGIN_AVX512_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedAVX512 +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +// avx implementation +TIFLASH_BEGIN_AVX_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedAVX +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +// sse4 implementation +TIFLASH_BEGIN_SSE4_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedSSE4 +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +namespace Impl { - if (!node) - return {}; - - constexpr bool is_leaf = std::is_same::value; +enum class DeltaTreeVariant +{ + Generic, + SSE4, + AVX, + AVX512 +}; - if (root == asNode(node) && !isLeaf(root) && node->count == 1) +static inline DeltaTreeVariant resolveDeltaTreeVariant() +{ + if (DB::TargetSpecific::AVX512Checker::runtimeSupport()) { - /// Decrease tree height. - root = as(Intern, root)->children[0]; - - --(node->count); - freeNode(node); - - if (isLeaf(root)) - as(Leaf, root)->parent = nullptr; - else - as(Intern, root)->parent = nullptr; - --height; - - LOG_FMT_TRACE(log, "height {} -> {}", (height + 1), height); - - return {}; + return DeltaTreeVariant::AVX512; } - - auto parent = node->parent; - bool parent_updated = false; - - if (T::overflow(node->count)) // split + if (DB::TargetSpecific::AVXChecker::runtimeSupport()) { - if (!parent) - { - /// Increase tree height. - parent = createNode(); - root = asNode(parent); - - parent->deltas[0] = checkDelta(node->getDelta()); - parent->children[0] = asNode(node); - ++(parent->count); - parent->refreshChildParent(); - - ++height; - - LOG_FMT_TRACE(log, "height {} -> {}", (height - 1), height); - } - - auto pos = parent->searchChild(asNode(node)); - - T * next_n = createNode(); - - UInt64 sep_sid = node->split(next_n); - - // handle parent update - parent->shiftEntries(pos + 1, 1); - // for current node - parent->deltas[pos] = checkDelta(node->getDelta()); - // for next node - parent->sids[pos] = sep_sid; - parent->deltas[pos + 1] = checkDelta(next_n->getDelta()); - parent->children[pos + 1] = asNode(next_n); - - ++(parent->count); - - if constexpr (is_leaf) - { - if (as(Leaf, node) == right_leaf) - right_leaf = as(Leaf, next_n); - } - - parent_updated = true; + return DeltaTreeVariant::AVX; } - else if (T::underflow(node->count) && root != asNode(node)) // adopt or merge + if (DB::TargetSpecific::SSE4Checker::runtimeSupport()) { - auto pos = parent->searchChild(asNode(node)); - - // currently we always adopt from the right one if possible - bool is_sibling_left; - size_t sibling_pos; - T * sibling; - - if (unlikely(parent->count <= 1)) - throw Exception("Unexpected parent entry count: " + DB::toString(parent->count)); - - if (pos == parent->count - 1) - { - is_sibling_left = true; - sibling_pos = pos - 1; - sibling = as(T, parent->children[sibling_pos]); - } - else - { - is_sibling_left = false; - sibling_pos = pos + 1; - sibling = as(T, parent->children[sibling_pos]); - } - - if (unlikely(sibling->parent != node->parent)) - throw Exception("parent not the same"); - - auto after_adopt = (node->count + sibling->count) / 2; - if (T::underflow(after_adopt)) - { - // Do merge. - // adoption won't work because the sibling doesn't have enough entries. - - node->merge(sibling, is_sibling_left, pos); - freeNode(sibling); - - pos = std::min(pos, sibling_pos); - parent->deltas[pos] = checkDelta(node->getDelta()); - parent->children[pos] = asNode(node); - parent->shiftEntries(pos + 2, -1); - - if constexpr (is_leaf) - { - if (is_sibling_left && (as(Leaf, sibling) == left_leaf)) - left_leaf = as(Leaf, node); - else if (!is_sibling_left && as(Leaf, sibling) == right_leaf) - right_leaf = as(Leaf, node); - } - --(parent->count); - } - else - { - // Do adoption. - - auto adopt_count = after_adopt - node->count; - auto new_sep_sid = node->adopt(sibling, is_sibling_left, adopt_count, pos); + return DeltaTreeVariant::SSE4; + } + return DeltaTreeVariant::Generic; +} - parent->sids[std::min(pos, sibling_pos)] = new_sep_sid; - parent->deltas[pos] = checkDelta(node->getDelta()); - parent->deltas[sibling_pos] = checkDelta(sibling->getDelta()); - } +static inline DeltaTreeVariant DELTA_TREE_VARIANT = resolveDeltaTreeVariant(); +} // namespace Impl - parent_updated = true; - } - else if (parent) +DT_TEMPLATE +template +typename DT_CLASS::InternPtr DT_CLASS::afterNodeUpdated(T * node) +{ + switch (Impl::DELTA_TREE_VARIANT) { - auto pos = parent->searchChild(asNode(node)); - auto delta = node->getDelta(); - parent_updated = parent->deltas[pos] != delta; - parent->deltas[pos] = checkDelta(delta); + case Impl::DeltaTreeVariant::Generic: + return afterNodeUpdatedGeneric(node); + case Impl::DeltaTreeVariant::SSE4: + return afterNodeUpdatedSSE4(node); + case Impl::DeltaTreeVariant::AVX: + return afterNodeUpdatedAVX(node); + case Impl::DeltaTreeVariant::AVX512: + return afterNodeUpdatedAVX512(node); } - - if (parent_updated) - return parent; - else - return {}; } +#endif + #undef as #undef asNode diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.ipp b/dbms/src/Storages/DeltaMerge/DeltaTree.ipp new file mode 100644 index 00000000000..27b8a3b96f1 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.ipp @@ -0,0 +1,165 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +DT_TEMPLATE +template +__attribute__((noinline, flatten)) typename DT_CLASS::InternPtr DT_CLASS::TIFLASH_DT_IMPL_NAME(T * node) +{ + if (!node) + return {}; + + constexpr bool is_leaf = std::is_same::value; + + if (root == asNode(node) && !isLeaf(root) && node->count == 1) + { + /// Decrease tree height. + root = as(Intern, root)->children[0]; + + --(node->count); + freeNode(node); + + if (isLeaf(root)) + as(Leaf, root)->parent = nullptr; + else + as(Intern, root)->parent = nullptr; + --height; + + LOG_FMT_TRACE(log, "height {} -> {}", (height + 1), height); + + return {}; + } + + auto parent = node->parent; + bool parent_updated = false; + + if (T::overflow(node->count)) // split + { + if (!parent) + { + /// Increase tree height. + parent = createNode(); + root = asNode(parent); + + parent->deltas[0] = checkDelta(node->getDelta()); + parent->children[0] = asNode(node); + ++(parent->count); + parent->refreshChildParent(); + + ++height; + + LOG_FMT_TRACE(log, "height {} -> {}", (height - 1), height); + } + + auto pos = parent->searchChild(asNode(node)); + + T * next_n = createNode(); + + UInt64 sep_sid = node->split(next_n); + + // handle parent update + parent->shiftEntries(pos + 1, 1); + // for current node + parent->deltas[pos] = checkDelta(node->getDelta()); + // for next node + parent->sids[pos] = sep_sid; + parent->deltas[pos + 1] = checkDelta(next_n->getDelta()); + parent->children[pos + 1] = asNode(next_n); + + ++(parent->count); + + if constexpr (is_leaf) + { + if (as(Leaf, node) == right_leaf) + right_leaf = as(Leaf, next_n); + } + + parent_updated = true; + } + else if (T::underflow(node->count) && root != asNode(node)) // adopt or merge + { + auto pos = parent->searchChild(asNode(node)); + + // currently we always adopt from the right one if possible + bool is_sibling_left; + size_t sibling_pos; + T * sibling; + + if (unlikely(parent->count <= 1)) + throw Exception("Unexpected parent entry count: " + DB::toString(parent->count)); + + if (pos == parent->count - 1) + { + is_sibling_left = true; + sibling_pos = pos - 1; + sibling = as(T, parent->children[sibling_pos]); + } + else + { + is_sibling_left = false; + sibling_pos = pos + 1; + sibling = as(T, parent->children[sibling_pos]); + } + + if (unlikely(sibling->parent != node->parent)) + throw Exception("parent not the same"); + + auto after_adopt = (node->count + sibling->count) / 2; + if (T::underflow(after_adopt)) + { + // Do merge. + // adoption won't work because the sibling doesn't have enough entries. + + node->merge(sibling, is_sibling_left, pos); + freeNode(sibling); + + pos = std::min(pos, sibling_pos); + parent->deltas[pos] = checkDelta(node->getDelta()); + parent->children[pos] = asNode(node); + parent->shiftEntries(pos + 2, -1); + + if constexpr (is_leaf) + { + if (is_sibling_left && (as(Leaf, sibling) == left_leaf)) + left_leaf = as(Leaf, node); + else if (!is_sibling_left && as(Leaf, sibling) == right_leaf) + right_leaf = as(Leaf, node); + } + --(parent->count); + } + else + { + // Do adoption. + + auto adopt_count = after_adopt - node->count; + auto new_sep_sid = node->adopt(sibling, is_sibling_left, adopt_count, pos); + + parent->sids[std::min(pos, sibling_pos)] = new_sep_sid; + parent->deltas[pos] = checkDelta(node->getDelta()); + parent->deltas[sibling_pos] = checkDelta(sibling->getDelta()); + } + + parent_updated = true; + } + else if (parent) + { + auto pos = parent->searchChild(asNode(node)); + auto delta = node->getDelta(); + parent_updated = parent->deltas[pos] != delta; + parent->deltas[pos] = checkDelta(delta); + } + + if (parent_updated) + return parent; + else + return {}; +} \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp index 3bff05ef19f..272d548eee1 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp @@ -72,10 +72,9 @@ DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, for (auto & cd : write_columns) { // TODO: currently we only generate index for Integers, Date, DateTime types, and this should be configurable by user. - // TODO: If column type is nullable, we won't generate index for it /// for handle column always generate index - bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || cd.type->isInteger() || cd.type->isDateOrDateTime(); - + auto type = removeNullable(cd.type); + bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || type->isInteger() || type->isDateOrDateTime(); if (options.flags.isSingleFile()) { if (do_index) @@ -122,7 +121,7 @@ void DMFileWriter::addStreams(ColId col_id, DataTypePtr type, bool do_index) void DMFileWriter::write(const Block & block, const BlockProperty & block_property) { is_empty_file = false; - DMFile::PackStat stat; + DMFile::PackStat stat{}; stat.rows = block.rows(); stat.not_clean = block_property.not_clean_rows; stat.bytes = block.bytes(); // This is bytes of pack data in memory. @@ -219,7 +218,7 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu "Type shouldn be nullable when substream_path's type is NullMap.", Errors::DeltaTree::Internal); - const ColumnNullable & col = static_cast(column); + const auto & col = static_cast(column); col.checkConsistency(); DataTypeUInt8().serializeBinaryBulk(col.getNullMapColumn(), single_file_stream->original_layer, 0, rows); } @@ -230,8 +229,8 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu "Type shouldn be nullable when substream_path's type is NullableElements.", Errors::DeltaTree::Internal); - const DataTypeNullable & nullable_type = static_cast(type); - const ColumnNullable & col = static_cast(column); + const auto & nullable_type = static_cast(type); + const auto & col = static_cast(column); nullable_type.getNestedType()->serializeBinaryBulk(col.getNestedColumn(), single_file_stream->original_layer, 0, rows); } else diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 2681284948c..6229d54c169 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -61,7 +61,6 @@ inline std::pair minmax(const IColumn & column, const ColumnVect void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * del_mark) { - const IColumn * column_ptr = &column; auto size = column.size(); bool has_null = false; if (column.isColumnNullable()) @@ -70,7 +69,6 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de const auto & nullable_column = static_cast(column); const auto & null_mark_data = nullable_column.getNullMapColumn().getData(); - column_ptr = &nullable_column.getNestedColumn(); for (size_t i = 0; i < size; ++i) { @@ -82,14 +80,13 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de } } - const IColumn & updated_column = *column_ptr; - auto [min_index, max_index] = details::minmax(updated_column, del_mark, 0, updated_column.size()); + auto [min_index, max_index] = details::minmax(column, del_mark, 0, column.size()); if (min_index != NONE_EXIST) { has_null_marks->push_back(has_null); has_value_marks->push_back(1); - minmaxes->insertFrom(updated_column, min_index); - minmaxes->insertFrom(updated_column, max_index); + minmaxes->insertFrom(column, min_index); + minmaxes->insertFrom(column, max_index); } else { @@ -158,6 +155,62 @@ std::pair MinMaxIndex::getUInt64MinMax(size_t pack_index) return {minmaxes->get64(pack_index * 2), minmaxes->get64(pack_index * 2 + 1)}; } +RSResult MinMaxIndex::checkNullableEqual(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const auto & column_nullable = static_cast(*minmaxes); + + const auto * raw_type = type.get(); + +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkEqual(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + return RoughCheck::checkEqual(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const DataTypePtr & type) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -165,7 +218,13 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D if (!(*has_value_marks)[pack_index]) return RSResult::None; + // if minmaxes_data has null value, the value of minmaxes_data[i] is meaningless and maybe just some random value. + // But we have checked the has_null_marks above and ensured that there is no null value in MinMax Indexes. const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableEqual(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ @@ -215,6 +274,62 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D } return RSResult::Some; } + +RSResult MinMaxIndex::checkNullableGreater(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const auto & column_nullable = static_cast(*minmaxes); + const auto * raw_type = type.get(); + +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkGreater(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + return RoughCheck::checkGreater(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const DataTypePtr & type, int /*nan_direction_hint*/) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -223,6 +338,10 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const return RSResult::None; const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableGreater(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ @@ -272,6 +391,62 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const } return RSResult::Some; } + +RSResult MinMaxIndex::checkNullableGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const auto & column_nullable = static_cast(*minmaxes); + + const auto * raw_type = type.get(); +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkGreaterEqual(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type, int /*nan_direction_hint*/) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -280,6 +455,10 @@ RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, return RSResult::None; const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableGreaterEqual(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h index 7efd37fafa4..73284333c73 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h @@ -81,6 +81,9 @@ class MinMaxIndex RSResult checkGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type, int nan_direction); static String toString(); + RSResult checkNullableEqual(size_t pack_index, const Field & value, const DataTypePtr & type); + RSResult checkNullableGreater(size_t pack_index, const Field & value, const DataTypePtr & type); + RSResult checkNullableGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type); }; diff --git a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h index b35dae0cbe2..84fafbc46ef 100644 --- a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h +++ b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h @@ -273,7 +273,8 @@ class DMTestEnv DataTypePtr pk_type = EXTRA_HANDLE_COLUMN_INT_TYPE, bool is_common_handle = false, size_t rowkey_column_size = 1, - bool with_internal_columns = true) + bool with_internal_columns = true, + bool is_deleted = false) { Block block; const size_t num_rows = (end - beg); @@ -324,7 +325,7 @@ class DMTestEnv VERSION_COLUMN_ID)); // tag_col block.insert(DB::tests::createColumn( - std::vector(num_rows, 0), + std::vector(num_rows, is_deleted), TAG_COLUMN_NAME, TAG_COLUMN_ID)); } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 96c0070b73b..bb31b687186 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -214,14 +214,6 @@ try ASSERT_EQ(true, checkMatch(case_name, *context, "MyDateTime", "2020-09-27", createLessEqual(attr("MyDateTime"), parseMyDateTime("2020-09-27"), 0))); ASSERT_EQ(false, checkMatch(case_name, *context, "MyDateTime", "2020-09-27", createLessEqual(attr("MyDateTime"), parseMyDateTime("2020-09-26"), 0))); - /// Currently we don't do filtering for null values. i.e. if a pack contains any null values, then the pack will pass the filter. - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); - ASSERT_EQ(false, checkDelMatch(case_name, *context, "Int64", "100", createEqual(attr("Int64"), Field((Int64)100)))); ASSERT_EQ(true, checkPkMatch(case_name, *context, "Int64", "100", createEqual(pkAttr(), Field((Int64)100)), true)); ASSERT_EQ(true, checkPkMatch(case_name, *context, "Int64", "100", createGreater(pkAttr(), Field((Int64)99), 0), true)); @@ -236,6 +228,80 @@ try } CATCH +TEST_F(DMMinMaxIndexTest, NullableToNullable) +try +{ + const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + // clang-format off + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createEqual(attr("Nullable(Int64)"), Field((Int64)100)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createIn(attr("Nullable(Int64)"), {Field((Int64)100)}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreater(attr("Nullable(Int64)"), Field((Int64)99), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLess(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLessEqual(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createEqual(attr("Nullable(Date)"), Field((String) "2020-09-27")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createEqual(attr("Nullable(Date)"), Field((String) "2020-09-28")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createIn(attr("Nullable(Date)"), {Field((String) "2020-09-27")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createIn(attr("Nullable(Date)"), {Field((String) "2020-09-28")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreater(attr("Nullable(Date)"), Field((String) "2020-09-26"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreater(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreaterEqual(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreaterEqual(attr("Nullable(Date)"), Field((String) "2020-09-28"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLess(attr("Nullable(Date)"), Field((String) "2020-09-28"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLess(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLessEqual(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLessEqual(attr("Nullable(Date)"), Field((String) "2020-09-26"), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createIn(attr("Nullable(DateTime)"), {Field((String) "2020-01-01 05:00:01")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createIn(attr("Nullable(DateTime)"), {Field((String) "2020-01-01 05:00:02")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreater(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:00"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreater(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreaterEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreaterEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLess(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLess(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLessEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLessEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:00"), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createIn(attr("Nullable(MyDateTime)"), {parseMyDateTime("2020-09-27")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createIn(attr("Nullable(MyDateTime)"), {parseMyDateTime("2020-09-28")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreater(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-26"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreater(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreaterEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreaterEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLess(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLess(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLessEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLessEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-26"), 0))); + + // has null + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); +} +CATCH + TEST_F(DMMinMaxIndexTest, Logical) try { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp new file mode 100644 index 00000000000..1c68ba3bb2a --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -0,0 +1,86 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace DM +{ +namespace tests +{ +class SegmentOperationTest : public SegmentTestBasic +{ +protected: + static void SetUpTestCase() {} +}; + +TEST_F(SegmentOperationTest, Issue4956) +try +{ + SegmentTestOptions options; + reloadWithOptions(options); + + // flush data, make the segment can be split. + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + // write data to cache, reproduce the https://github.com/pingcap/tiflash/issues/4956 + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + deleteRangeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto segment_id = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_TRUE(segment_id.has_value()); + + mergeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, *segment_id); +} +CATCH + +TEST_F(SegmentOperationTest, TestSegment) +try +{ + SegmentTestOptions options; + reloadWithOptions(options); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + auto segment_id = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_TRUE(segment_id.has_value()); + + size_t origin_rows = getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(*segment_id); + flushSegmentCache(*segment_id); + deleteRangeSegment(*segment_id); + writeSegmentWithDeletedPack(*segment_id); + mergeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, *segment_id); + + EXPECT_EQ(getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID), origin_rows); +} +CATCH + +TEST_F(SegmentOperationTest, TestSegmentRandom) +try +{ + SegmentTestOptions options; + options.is_common_handle = true; + reloadWithOptions(options); + randomSegmentTest(100); +} +CATCH +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp new file mode 100644 index 00000000000..c676f2e08d5 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -0,0 +1,430 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ +void SegmentTestBasic::reloadWithOptions(SegmentTestOptions config) +{ + TiFlashStorageTestBasic::SetUp(); + options = config; + table_columns = std::make_shared(); + + root_segment = reload(config.is_common_handle); + ASSERT_EQ(root_segment->segmentId(), DELTA_MERGE_FIRST_SEGMENT_ID); + segments.clear(); + segments[DELTA_MERGE_FIRST_SEGMENT_ID] = root_segment; +} + +PageId SegmentTestBasic::createNewSegmentWithSomeData() +{ + SegmentPtr new_segment; + std::tie(root_segment, new_segment) = root_segment->split(dmContext(), tableColumns()); + + const size_t num_rows_write_per_batch = 100; + { + // write to segment and flush + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write_per_batch, false); + new_segment->write(dmContext(), std::move(block), true); + } + { + // write to segment and don't flush + Block block = DMTestEnv::prepareSimpleWriteBlock(num_rows_write_per_batch, 2 * num_rows_write_per_batch, false); + new_segment->write(dmContext(), std::move(block), false); + } + return new_segment->segmentId(); +} + +size_t SegmentTestBasic::getSegmentRowNumWithoutMVCC(PageId segment_id) +{ + auto segment = segments[segment_id]; + auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + return num_rows_read; +} + +size_t SegmentTestBasic::getSegmentRowNum(PageId segment_id) +{ + auto segment = segments[segment_id]; + auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + return num_rows_read; +} + +void SegmentTestBasic::checkSegmentRow(PageId segment_id, size_t expected_row_num) +{ + auto segment = segments[segment_id]; + // read written data + auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, expected_row_num); +} + +std::optional SegmentTestBasic::splitSegment(PageId segment_id) +{ + auto origin_segment = segments[segment_id]; + size_t origin_segment_row_num = getSegmentRowNum(segment_id); + SegmentPtr segment, new_segment; + std::tie(segment, new_segment) = origin_segment->split(dmContext(), tableColumns()); + if (new_segment) + { + segments[new_segment->segmentId()] = new_segment; + segments[segment_id] = segment; + + EXPECT_EQ(origin_segment_row_num, getSegmentRowNum(segment_id) + getSegmentRowNum(new_segment->segmentId())); + return new_segment->segmentId(); + } + return std::nullopt; +} + +void SegmentTestBasic::mergeSegment(PageId left_segment_id, PageId right_segment_id) +{ + auto left_segment = segments[left_segment_id]; + auto right_segment = segments[right_segment_id]; + + size_t left_segment_row_num = getSegmentRowNum(left_segment_id); + size_t right_segment_row_num = getSegmentRowNum(right_segment_id); + LOG_FMT_TRACE(&Poco::Logger::root(), "merge in segment:{}:{} and {}:{}", left_segment->segmentId(), left_segment_row_num, right_segment->segmentId(), right_segment_row_num); + + SegmentPtr merged_segment = Segment::merge(dmContext(), tableColumns(), left_segment, right_segment); + segments[merged_segment->segmentId()] = merged_segment; + auto it = segments.find(right_segment->segmentId()); + if (it != segments.end()) + { + segments.erase(it); + } + EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), left_segment_row_num + right_segment_row_num); +} + +void SegmentTestBasic::mergeSegmentDelta(PageId segment_id) +{ + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNum(segment_id); + SegmentPtr merged_segment = segment->mergeDelta(dmContext(), tableColumns()); + segments[merged_segment->segmentId()] = merged_segment; + EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), segment_row_num); +} + +void SegmentTestBasic::flushSegmentCache(PageId segment_id) +{ + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNum(segment_id); + segment->flushCache(dmContext()); + EXPECT_EQ(getSegmentRowNum(segment_id), segment_row_num); +} + +std::pair SegmentTestBasic::getSegmentKeyRange(SegmentPtr segment) +{ + Int64 start_key, end_key; + if (!options.is_common_handle) + { + start_key = segment->getRowKeyRange().getStart().int_value; + end_key = segment->getRowKeyRange().getEnd().int_value; + return {start_key, end_key}; + } + EXPECT_EQ(segment->getRowKeyRange().getStart().data[0], TiDB::CodecFlagInt); + EXPECT_EQ(segment->getRowKeyRange().getEnd().data[0], TiDB::CodecFlagInt); + { + size_t cursor = 1; + start_key = DecodeInt64(cursor, String(segment->getRowKeyRange().getStart().data, segment->getRowKeyRange().getStart().size)); + } + { + size_t cursor = 1; + end_key = DecodeInt64(cursor, String(segment->getRowKeyRange().getEnd().data, segment->getRowKeyRange().getEnd().size)); + } + return {start_key, end_key}; +} + +void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows) +{ + if (write_rows == 0) + { + return; + } + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); + std::pair keys = getSegmentKeyRange(segment); + Int64 start_key = keys.first; + Int64 end_key = keys.second; + UInt64 remain_row_num = 0; + if (static_cast(end_key - start_key) > write_rows) + { + end_key = start_key + write_rows; + } + else + { + remain_row_num = write_rows - static_cast(end_key - start_key); + } + { + // write to segment and not flush + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); + segment->write(dmContext(), std::move(block), false); + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, end_key); + version++; + } + while (remain_row_num > 0) + { + UInt64 write_num = std::min(remain_row_num, static_cast(end_key - start_key)); + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); + segment->write(dmContext(), std::move(block), false); + remain_row_num -= write_num; + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, write_num + start_key); + version++; + } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); +} + +void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id) +{ + UInt64 write_rows = DEFAULT_MERGE_BLOCK_SIZE; + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); + std::pair keys = getSegmentKeyRange(segment); + Int64 start_key = keys.first; + Int64 end_key = keys.second; + UInt64 remain_row_num = 0; + if (static_cast(end_key - start_key) > write_rows) + { + end_key = start_key + write_rows; + } + else + { + remain_row_num = write_rows - static_cast(end_key - start_key); + } + { + // write to segment and not flush + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); + segment->write(dmContext(), std::move(block), true); + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, end_key); + version++; + } + while (remain_row_num > 0) + { + UInt64 write_num = std::min(remain_row_num, static_cast(end_key - start_key)); + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); + segment->write(dmContext(), std::move(block), true); + remain_row_num -= write_num; + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, write_num + start_key); + version++; + } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); +} + +void SegmentTestBasic::deleteRangeSegment(PageId segment_id) +{ + auto segment = segments[segment_id]; + segment->write(dmContext(), /*delete_range*/ segment->getRowKeyRange()); + EXPECT_EQ(getSegmentRowNum(segment_id), 0); +} + +void SegmentTestBasic::writeRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start write segment:{}", random_segment_id); + writeSegment(random_segment_id); +} +void SegmentTestBasic::writeRandomSegmentWithDeletedPack() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start write segment with deleted pack:{}", random_segment_id); + writeSegmentWithDeletedPack(random_segment_id); +} + +void SegmentTestBasic::deleteRangeRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start delete range segment:{}", random_segment_id); + deleteRangeSegment(random_segment_id); +} + +void SegmentTestBasic::splitRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start split segment:{}", random_segment_id); + splitSegment(random_segment_id); +} + +void SegmentTestBasic::mergeRandomSegment() +{ + if (segments.empty() || segments.size() == 1) + { + return; + } + std::pair segment_pair; + segment_pair = getRandomMergeablePair(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start merge segment:{} and {}", segment_pair.first, segment_pair.second); + mergeSegment(segment_pair.first, segment_pair.second); +} + +void SegmentTestBasic::mergeDeltaRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start merge delta in segment:{}", random_segment_id); + mergeSegmentDelta(random_segment_id); +} + +void SegmentTestBasic::flushCacheRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start flush cache in segment:{}", random_segment_id); + flushSegmentCache(random_segment_id); +} + +void SegmentTestBasic::randomSegmentTest(size_t operator_count) +{ + for (size_t i = 0; i < operator_count; i++) + { + auto op = static_cast(random() % SegmentOperaterMax); + segment_operator_entries[op](); + } +} + +PageId SegmentTestBasic::getRandomSegmentId() +{ + auto max_segment_id = segments.rbegin()->first; + PageId random_segment_id = random() % (max_segment_id + 1); + auto it = segments.find(random_segment_id); + while (it == segments.end()) + { + random_segment_id = random() % (max_segment_id + 1); + it = segments.find(random_segment_id); + } + return random_segment_id; +} + +std::pair SegmentTestBasic::getRandomMergeablePair() +{ + while (true) + { + PageId random_left_segment_id = getRandomSegmentId(); + PageId random_right_segment_id = random_left_segment_id; + while (random_right_segment_id == random_left_segment_id) + { + random_right_segment_id = getRandomSegmentId(); + } + auto left_segment = segments[random_left_segment_id]; + auto right_segment = segments[random_right_segment_id]; + if (compare(left_segment->getRowKeyRange().getEnd(), right_segment->getRowKeyRange().getStart()) != 0 || left_segment->nextSegmentId() != right_segment->segmentId()) + { + continue; + } + return {random_left_segment_id, random_right_segment_id}; + } +} + +RowKeyRange SegmentTestBasic::commanHandleKeyRange() +{ + String start_key, end_key; + { + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(std::numeric_limits::min(), ss); + start_key = ss.releaseStr(); + } + { + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(std::numeric_limits::max(), ss); + end_key = ss.releaseStr(); + } + return RowKeyRange(RowKeyValue(true, std::make_shared(start_key), 0), RowKeyValue(true, std::make_shared(end_key), 0), true, 1); +} + +SegmentPtr SegmentTestBasic::reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns, DB::Settings && db_settings) +{ + TiFlashStorageTestBasic::reload(std::move(db_settings)); + storage_path_pool = std::make_unique(db_context->getPathPool().withTable("test", "t1", false)); + storage_pool = std::make_unique(*db_context, /*ns_id*/ 100, *storage_path_pool, "test.t1"); + storage_pool->restore(); + ColumnDefinesPtr cols = (!pre_define_columns) ? DMTestEnv::getDefaultColumns(is_common_handle ? DMTestEnv::PkType::CommonHandle : DMTestEnv::PkType::HiddenTiDBRowID) : pre_define_columns; + setColumns(cols); + + return Segment::newSegment(*dm_context, table_columns, is_common_handle ? commanHandleKeyRange() : RowKeyRange::newAll(is_common_handle, 1), storage_pool->newMetaPageId(), 0); +} + +void SegmentTestBasic::setColumns(const ColumnDefinesPtr & columns) +{ + *table_columns = *columns; + + dm_context = std::make_unique(*db_context, + *storage_path_pool, + *storage_pool, + 0, + /*min_version_*/ 0, + settings.not_compress_columns, + options.is_common_handle, + 1, + db_context->getSettingsRef()); +} +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h new file mode 100644 index 00000000000..ab0c7d6d0be --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h @@ -0,0 +1,123 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ +class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic +{ +public: + struct SegmentTestOptions + { + bool is_common_handle = false; + }; + +public: + void reloadWithOptions(SegmentTestOptions config); + + std::optional splitSegment(PageId segment_id); + void mergeSegment(PageId left_segment_id, PageId right_segment_id); + void mergeSegmentDelta(PageId segment_id); + void flushSegmentCache(PageId segment_id); + void writeSegment(PageId segment_id, UInt64 write_rows = 100); + void writeSegmentWithDeletedPack(PageId segment_id); + void deleteRangeSegment(PageId segment_id); + + + void writeRandomSegment(); + void writeRandomSegmentWithDeletedPack(); + void deleteRangeRandomSegment(); + void splitRandomSegment(); + void mergeRandomSegment(); + void mergeDeltaRandomSegment(); + void flushCacheRandomSegment(); + + void randomSegmentTest(size_t operator_count); + + PageId createNewSegmentWithSomeData(); + size_t getSegmentRowNumWithoutMVCC(PageId segment_id); + size_t getSegmentRowNum(PageId segment_id); + void checkSegmentRow(PageId segment_id, size_t expected_row_num); + std::pair getSegmentKeyRange(SegmentPtr segment); + +protected: + // + std::map segments; + + enum SegmentOperaterType + { + Write = 0, + DeleteRange, + Split, + Merge, + MergeDelta, + FlushCache, + WriteDeletedPack, + SegmentOperaterMax + }; + + const std::vector> segment_operator_entries = { + [this] { writeRandomSegment(); }, + [this] { deleteRangeRandomSegment(); }, + [this] { splitRandomSegment(); }, + [this] { mergeRandomSegment(); }, + [this] { mergeDeltaRandomSegment(); }, + [this] { flushCacheRandomSegment(); }, + [this] { + writeRandomSegmentWithDeletedPack(); + }}; + + PageId getRandomSegmentId(); + + std::pair getRandomMergeablePair(); + + RowKeyRange commanHandleKeyRange(); + + SegmentPtr reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns = {}, DB::Settings && db_settings = DB::Settings()); + + // setColumns should update dm_context at the same time + void setColumns(const ColumnDefinesPtr & columns); + + const ColumnDefinesPtr & tableColumns() const { return table_columns; } + + DMContext & dmContext() { return *dm_context; } + +protected: + /// all these var lives as ref in dm_context + std::unique_ptr storage_path_pool; + std::unique_ptr storage_pool; + /// dm_context + std::unique_ptr dm_context; + ColumnDefinesPtr table_columns; + DM::DeltaMergeStore::Settings settings; + + SegmentPtr root_segment; + UInt64 version = 0; + SegmentTestOptions options; +}; +} // namespace tests +} // namespace DM +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/MarkCache.h b/dbms/src/Storages/MarkCache.h index 5816b0c1bba..728f830e0d0 100644 --- a/dbms/src/Storages/MarkCache.h +++ b/dbms/src/Storages/MarkCache.h @@ -14,24 +14,23 @@ #pragma once -#include - #include #include #include -#include #include +#include + +#include namespace ProfileEvents { - extern const Event MarkCacheHits; - extern const Event MarkCacheMisses; -} +extern const Event MarkCacheHits; +extern const Event MarkCacheMisses; +} // namespace ProfileEvents namespace DB { - /// Estimate of number of bytes in cache for marks. struct MarksWeightFunction { @@ -53,7 +52,8 @@ class MarkCache : public LRUCache MappedPtr getOrSet(const Key & key, LoadFunc && load) @@ -70,4 +70,4 @@ class MarkCache : public LRUCache; -} +} // namespace DB diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 37a4fd429f4..3bd0bd9c4fa 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -851,8 +851,8 @@ struct BlobStoreGCInfo toTypeString("Read-Only Blob", 0), toTypeString("No GC Blob", 1), toTypeString("Full GC Blob", 2), - toTypeString("Truncated Blob", 3), - toTypeString("Big Blob", 4)); + toTypeString("Big Blob", 3), + toTypeTruncateString("Truncated Blob")); } void appendToReadOnlyBlob(const BlobFileId blob_id, double valid_rate) @@ -870,23 +870,24 @@ struct BlobStoreGCInfo blob_gc_info[2].emplace_back(std::make_pair(blob_id, valid_rate)); } - void appendToTruncatedBlob(const BlobFileId blob_id, double valid_rate) + void appendToBigBlob(const BlobFileId blob_id, double valid_rate) { blob_gc_info[3].emplace_back(std::make_pair(blob_id, valid_rate)); } - void appendToBigBlob(const BlobFileId blob_id, double valid_rate) + void appendToTruncatedBlob(const BlobFileId blob_id, UInt64 origin_size, UInt64 truncated_size, double valid_rate) { - blob_gc_info[4].emplace_back(std::make_pair(blob_id, valid_rate)); + blob_gc_truncate_info.emplace_back(std::make_tuple(blob_id, origin_size, truncated_size, valid_rate)); } private: // 1. read only blob // 2. no need gc blob // 3. full gc blob - // 4. need truncate blob - // 5. big blob - std::vector> blob_gc_info[5]; + // 4. big blob + std::vector> blob_gc_info[4]; + + std::vector> blob_gc_truncate_info; String toTypeString(const std::string_view prefix, const size_t index) const { @@ -911,6 +912,32 @@ struct BlobStoreGCInfo return fmt_buf.toString(); } + + String toTypeTruncateString(const std::string_view prefix) const + { + FmtBuffer fmt_buf; + if (blob_gc_truncate_info.empty()) + { + fmt_buf.fmtAppend("{}: [null]", prefix); + } + else + { + fmt_buf.fmtAppend("{}: [", prefix); + fmt_buf.joinStr( + blob_gc_truncate_info.begin(), + blob_gc_truncate_info.end(), + [](const auto arg, FmtBuffer & fb) { + fb.fmtAppend("{} origin: {} truncate: {} rate: {:.2f}", // + std::get<0>(arg), // blob id + std::get<1>(arg), // origin size + std::get<2>(arg), // truncated size + std::get<3>(arg)); // valid rate + }, + ", "); + fmt_buf.append("]"); + } + return fmt_buf.toString(); + } }; std::vector BlobStore::getGCStats() @@ -953,7 +980,7 @@ std::vector BlobStore::getGCStats() } auto lock = stat->lock(); - auto right_margin = stat->smap->getRightMargin(); + auto right_margin = stat->smap->getUsedBoundary(); // Avoid divide by zero if (right_margin == 0) @@ -966,14 +993,13 @@ std::vector BlobStore::getGCStats() stat->sm_valid_rate)); } - LOG_FMT_TRACE(log, "Current blob is empty [blob_id={}, total size(all invalid)={}] [valid_rate={}].", stat->id, stat->sm_total_size, stat->sm_valid_rate); - // If current blob empty, the size of in disk blob may not empty // So we need truncate current blob, and let it be reused. auto blobfile = getBlobFile(stat->id); - LOG_FMT_TRACE(log, "Truncate empty blob file [blob_id={}] to 0.", stat->id); + LOG_FMT_INFO(log, "Current blob file is empty, truncated to zero [blob_id={}] [total_size={}] [valid_rate={}]", stat->id, stat->sm_total_size, stat->sm_valid_rate); blobfile->truncate(right_margin); - blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate); + stat->sm_total_size = right_margin; continue; } @@ -1014,9 +1040,10 @@ std::vector BlobStore::getGCStats() auto blobfile = getBlobFile(stat->id); LOG_FMT_TRACE(log, "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", stat->id, stat->sm_total_size, right_margin); blobfile->truncate(right_margin); + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate); + stat->sm_total_size = right_margin; stat->sm_valid_rate = stat->sm_valid_size * 1.0 / stat->sm_total_size; - blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); } } } diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index e9b754854b8..951da42de1c 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -478,7 +478,7 @@ PageSize VersionedPageEntries::getEntriesByBlobIds( bool VersionedPageEntries::cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & /*page_lock*/) { if (type == EditRecordType::VAR_EXTERNAL) @@ -541,7 +541,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( { if (iter->second.being_ref_count == 1) { - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } // The `being_ref_count` for this version is valid. While for older versions, @@ -551,7 +554,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( else { // else there are newer "entry" in the version list, the outdated entries should be removed - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } } @@ -564,7 +570,7 @@ bool VersionedPageEntries::cleanOutdatedEntries( return entries.empty() || (entries.size() == 1 && entries.begin()->second.isDelete()); } -bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 & entries_removed) +bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 * entries_removed) { auto page_lock = acquireLock(); if (type == EditRecordType::VAR_EXTERNAL) @@ -1223,7 +1229,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W // `being_ref_count` by the function `createSnapshot()`. assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true auto log_num = files_snap.persisted_log_files.rbegin()->log_num; - auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num); + auto identifier = fmt::format("{}.dump_{}", wal->name(), log_num); auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter); PageDirectoryFactory factory; // we just use the `collapsed_dir` to dump edit of the snapshot, should never call functions like `apply` that @@ -1239,7 +1245,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W return done_any_io; } -PageEntriesV3 PageDirectory::gcInMemEntries() +PageEntriesV3 PageDirectory::gcInMemEntries(bool return_removed_entries) { UInt64 lowest_seq = sequence.load(); @@ -1303,7 +1309,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() const bool all_deleted = iter->second->cleanOutdatedEntries( lowest_seq, &normal_entries_to_deref, - all_del_entries, + return_removed_entries ? &all_del_entries : nullptr, iter->second->acquireLock()); { @@ -1342,7 +1348,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() page_id, /*deref_ver=*/deref_counter.first, /*deref_count=*/deref_counter.second, - all_del_entries); + return_removed_entries ? &all_del_entries : nullptr); if (all_deleted) { diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index bd7c433022f..2f0f09f4e42 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -223,14 +223,14 @@ class VersionedPageEntries bool cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & page_lock); bool derefAndClean( UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, Int64 deref_count, - PageEntriesV3 & entries_removed); + PageEntriesV3 * entries_removed); void collapseTo(UInt64 seq, PageIdV3Internal page_id, PageEntriesEdit & edit); @@ -360,7 +360,9 @@ class PageDirectory bool tryDumpSnapshot(const ReadLimiterPtr & read_limiter = nullptr, const WriteLimiterPtr & write_limiter = nullptr); - PageEntriesV3 gcInMemEntries(); + // Perform a GC for in-memory entries and return the removed entries. + // If `return_removed_entries` is false, then just return an empty set. + PageEntriesV3 gcInMemEntries(bool return_removed_entries = true); std::set getAliveExternalIds(NamespaceId ns_id) const; diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 483c5073ab5..968049a3273 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -44,7 +44,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WAL // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore, so skip filling removed_entries to imporve performance. + dir->gcInMemEntries(/*return_removed_entries=*/false); LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); if (blob_stats) @@ -84,7 +85,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromEdit(String storage_name, FileP // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore when restore, so no need to fill removed_entries. + dir->gcInMemEntries(/*return_removed_entries=*/false); if (blob_stats) { diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h index ae44b608de0..d230b2f3e35 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h @@ -95,9 +95,11 @@ class SpaceMap virtual std::tuple searchInsertOffset(size_t size) = 0; /** - * Get the offset of the last free block. `[margin_offset, +∞)` is not used at all. + * Get the used boundary of this SpaceMap. + * The return value (`used_boundary`) means that `[used_bounary + 1, +∞)` is safe to be truncated. + * If the `used_boundary` is equal to the `end` of this SpaceMap, it means that there is no space to be truncated. */ - virtual UInt64 getRightMargin() = 0; + virtual UInt64 getUsedBoundary() = 0; /** * Get the accurate max capacity of the space map. diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h index 22128a09f30..81c2a5cb786 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h @@ -74,7 +74,7 @@ class BigSpaceMap return std::make_pair(size_in_used, size_in_used); } - UInt64 getRightMargin() override + UInt64 getUsedBoundary() override { return end; } diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp index 54275574060..4bd53b93e07 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp @@ -84,7 +84,7 @@ static void rb_get_new_entry(struct SmapRbEntry ** entry, UInt64 start, UInt64 c { struct SmapRbEntry * new_entry; - new_entry = static_cast(calloc(1, sizeof(struct SmapRbEntry))); + new_entry = static_cast(calloc(1, sizeof(struct SmapRbEntry))); // NOLINT if (new_entry == nullptr) { return; @@ -115,7 +115,7 @@ inline static void rb_free_entry(struct RbPrivate * private_data, struct SmapRbE private_data->read_index_next = nullptr; } - free(entry); + free(entry); // NOLINT } @@ -419,7 +419,7 @@ std::shared_ptr RBTreeSpaceMap::create(UInt64 start, UInt64 end) { auto ptr = std::shared_ptr(new RBTreeSpaceMap(start, end)); - ptr->rb_tree = static_cast(calloc(1, sizeof(struct RbPrivate))); + ptr->rb_tree = static_cast(calloc(1, sizeof(struct RbPrivate))); // NOLINT if (ptr->rb_tree == nullptr) { return nullptr; @@ -435,7 +435,7 @@ std::shared_ptr RBTreeSpaceMap::create(UInt64 start, UInt64 end) if (!rb_insert_entry(start, end, ptr->rb_tree, ptr->log)) { LOG_FMT_ERROR(ptr->log, "Erorr happend, when mark all space free. [start={}] , [end={}]", start, end); - free(ptr->rb_tree); + free(ptr->rb_tree); // NOLINT return nullptr; } return ptr; @@ -451,7 +451,7 @@ static void rb_free_tree(struct rb_root * root) next = rb_tree_next(node); entry = node_to_entry(node); rb_node_remove(node, root); - free(entry); + free(entry); // NOLINT } } @@ -460,7 +460,7 @@ void RBTreeSpaceMap::freeSmap() if (rb_tree) { rb_free_tree(&rb_tree->root); - free(rb_tree); + free(rb_tree); // NOLINT } } @@ -734,7 +734,7 @@ std::pair RBTreeSpaceMap::getSizes() const } } -UInt64 RBTreeSpaceMap::getRightMargin() +UInt64 RBTreeSpaceMap::getUsedBoundary() { struct rb_node * node = rb_tree_last(&rb_tree->root); if (node == nullptr) @@ -743,6 +743,20 @@ UInt64 RBTreeSpaceMap::getRightMargin() } auto * entry = node_to_entry(node); + + // If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used, + // then we should return `end` as the used boundary. + // + // eg. + // 1. The spacemap manage a space of `[0, 100]` + // 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)` + // 3. The return value should be 100 + if (entry->start + entry->count != end) + { + return end; + } + + // Else we should return the offset of last free node return entry->start; } diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h index 0393fda081b..04691007a47 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h @@ -46,7 +46,7 @@ class RBTreeSpaceMap std::pair getSizes() const override; - UInt64 getRightMargin() override; + UInt64 getUsedBoundary() override; protected: RBTreeSpaceMap(UInt64 start, UInt64 end) diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index b6ff8797f0f..41ddd77d03a 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -111,13 +111,29 @@ class STDMapSpaceMap } } - UInt64 getRightMargin() override + UInt64 getUsedBoundary() override { if (free_map.empty()) { - return end - start; + return end; } - return free_map.rbegin()->first; + + const auto & last_node_it = free_map.rbegin(); + + // If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used, + // then we should return `end` as the used boundary. + // + // eg. + // 1. The spacemap manage a space of `[0, 100]` + // 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)` + // 3. The return value should be 100 + if (last_node_it->first + last_node_it->second != end) + { + return end; + } + + // Else we should return the offset of last free node + return last_node_it->first; } bool isMarkUnused(UInt64 offset, size_t length) override diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index fdd08c7cb8e..f9daacc4cce 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -306,6 +306,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 128, + .padded_size = 0, .tag = 0, .offset = 1024, .checksum = 0x4567, @@ -313,6 +314,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, @@ -320,6 +322,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = file_id2, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, @@ -402,6 +405,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = id, .size = 1024, + .padded_size = 0, .tag = 0, .offset = 0, .checksum = 0x4567, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp index f7120f000b2..faec139920b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp @@ -427,6 +427,43 @@ TEST_P(SpaceMapTest, TestGetMaxCap) } } + +TEST_P(SpaceMapTest, TestGetUsedBoundary) +{ + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_TRUE(smap->markUsed(50, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 60); + ASSERT_TRUE(smap->markUsed(80, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 90); + + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } + + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + + ASSERT_TRUE(smap->markUsed(20, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + + ASSERT_TRUE(smap->markFree(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 30); + + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } + + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_EQ(smap->getUsedBoundary(), 0); + ASSERT_TRUE(smap->markUsed(0, 100)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } +} + INSTANTIATE_TEST_CASE_P( Type, SpaceMapTest, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 83e07f75d37..6d6ef41630f 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -644,14 +644,14 @@ class VersionedEntriesTest : public ::testing::Test { DerefCounter deref_counter; PageEntriesV3 removed_entries; - bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, removed_entries, entries.acquireLock()); + bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, &removed_entries, entries.acquireLock()); return {all_removed, removed_entries, deref_counter}; } std::tuple runDeref(UInt64 seq, PageVersion ver, Int64 decrease_num) { PageEntriesV3 removed_entries; - bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, removed_entries); + bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, &removed_entries); return {all_removed, removed_entries}; } diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp index 078daa3e5b4..74e56c929d8 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp @@ -85,6 +85,16 @@ class PageStorageMixedTest : public DB::base::TiFlashStorageTestBasic return run_mode; } + PageReader newMixedPageReader(PageStorage::SnapshotPtr & snapshot) + { + return storage_pool_mix->newLogReader(nullptr, snapshot); + } + + PageReader newMixedPageReader() + { + return storage_pool_mix->newLogReader(nullptr, true, "PageStorageMixedTest"); + } + void reloadV2StoragePool() { db_context->setPageStorageRunMode(PageStorageRunMode::ONLY_V2); @@ -1035,7 +1045,7 @@ try // Thread A create snapshot for read auto snapshot_mix_before_merge_delta = page_reader_mix->getSnapshot("ReadWithSnapshotAfterMergeDelta"); { - auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_before_merge_delta); + auto page_reader_mix_with_snap = newMixedPageReader(snapshot_mix_before_merge_delta); const auto & page1 = page_reader_mix_with_snap.read(1); const auto & page2 = page_reader_mix_with_snap.read(2); const auto & page3 = page_reader_mix_with_snap.read(3); @@ -1044,7 +1054,7 @@ try ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); } { - auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, true, "ReadWithSnapshotAfterMergeDelta"); + auto page_reader_mix_with_snap = newMixedPageReader(); const auto & page1 = page_reader_mix_with_snap.read(1); const auto & page2 = page_reader_mix_with_snap.read(2); const auto & page3 = page_reader_mix_with_snap.read(3); @@ -1063,7 +1073,7 @@ try } // Thread A continue to read 1, 3 { - auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_before_merge_delta); + auto page_reader_mix_with_snap = newMixedPageReader(snapshot_mix_before_merge_delta); // read 1, 3 with snapshot, should be success const auto & page1 = page_reader_mix_with_snap.read(1); const auto & page3 = page_reader_mix_with_snap.read(3); @@ -1071,6 +1081,7 @@ try ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); ASSERT_THROW(page_reader_mix_with_snap.read(4), DB::Exception); } + { // Revert v3 WriteBatch batch; @@ -1081,6 +1092,290 @@ try } CATCH +TEST_F(PageStorageMixedTest, refWithSnapshot2) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + auto snapshot_mix = page_reader_mix->getSnapshot(""); + { + WriteBatch batch; + batch.delPage(1); + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page_maps = newMixedPageReader(snapshot_mix).read({1, 2}); + ASSERT_EQ(page_maps.size(), 2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[1], 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[2], 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot3) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + // to keep mix mode + batch.putExternal(10, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(1); + batch.delPage(2); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_mix->write(std::move(batch), nullptr); + } + + auto snapshot_mix = page_reader_mix->getSnapshot(""); + { + WriteBatch batch; + batch.delPage(1); + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page_maps = newMixedPageReader(snapshot_mix).read({1, 2}); + ASSERT_EQ(page_maps.size(), 2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[1], 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[2], 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot4) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + WriteBatch batch; + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page1 = page_reader_mix->read(1); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot5) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + auto page1 = page_reader_mix->read(2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot6) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page1 = page_reader_mix->read(2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, ReadWithSnapshot2) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1; + char c_buff1[buf_sz]; + c_buff1[0] = 1; + + char c_buff2[buf_sz]; + c_buff2[0] = 2; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff1, buf_sz); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + auto snapshot_mix = page_reader_mix->getSnapshot(""); + { + WriteBatch batch; + batch.delPage(1); + ReadBufferPtr buff = std::make_shared(c_buff2, buf_sz); + batch.putPage(1, tag, buff, buf_sz); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page1 = newMixedPageReader(snapshot_mix).read(1); + ASSERT_PAGE_EQ(c_buff1, buf_sz, page1, 1); + } + + { + auto page1 = page_reader_mix->read(1); + ASSERT_PAGE_EQ(c_buff2, buf_sz, page1, 1); + } + + { + // Revert v3 + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } +} +CATCH + } // namespace PS::V3::tests } // namespace DB diff --git a/dbms/src/Storages/Page/workload/CMakeLists.txt b/dbms/src/Storages/Page/workload/CMakeLists.txt index 5c8ecb34d97..adf94c75f11 100644 --- a/dbms/src/Storages/Page/workload/CMakeLists.txt +++ b/dbms/src/Storages/Page/workload/CMakeLists.txt @@ -14,8 +14,7 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR}) -set (page-workload-src HeavyMemoryCostInGC.cpp HeavyRead.cpp HeavySkewWriteRead.cpp HeavyWrite.cpp HighValidBigFileGC.cpp HoldSnapshotsLongTime.cpp Normal.cpp - PageStorageInMemoryCapacity.cpp ThousandsOfOffset.cpp MainEntry.cpp Normal.cpp PageStorageInMemoryCapacity.cpp PSBackground.cpp PSRunnable.cpp PSStressEnv.cpp PSWorkload.cpp) +set (page-workload-src MainEntry.cpp PSBackground.cpp PSRunnable.cpp PSStressEnv.cpp PSWorkload.cpp) add_library (page-workload-lib ${page-workload-src}) target_link_libraries (page-workload-lib dbms clickhouse_functions clickhouse-server-lib) diff --git a/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.cpp b/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.h similarity index 98% rename from dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.cpp rename to dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.h index 7e745e29fc2..3daaf10ffb3 100644 --- a/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.cpp +++ b/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.h @@ -81,6 +81,4 @@ class HeavyMemoryCostInGC fmt::format("Memory Peak is {} , it should not bigger than {} ", metrics_dumper->getMemoryPeak(), 5 * 1024 * 1024)); } }; - -REGISTER_WORKLOAD(HeavyMemoryCostInGC) } // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/workload/HeavyRead.cpp b/dbms/src/Storages/Page/workload/HeavyRead.h similarity index 98% rename from dbms/src/Storages/Page/workload/HeavyRead.cpp rename to dbms/src/Storages/Page/workload/HeavyRead.h index a67c435e84c..80023f95988 100644 --- a/dbms/src/Storages/Page/workload/HeavyRead.cpp +++ b/dbms/src/Storages/Page/workload/HeavyRead.h @@ -70,6 +70,4 @@ class HeavyRead : public StressWorkload } } }; - -REGISTER_WORKLOAD(HeavyRead) } // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/HeavySkewWriteRead.cpp b/dbms/src/Storages/Page/workload/HeavySkewWriteRead.h similarity index 98% rename from dbms/src/Storages/Page/workload/HeavySkewWriteRead.cpp rename to dbms/src/Storages/Page/workload/HeavySkewWriteRead.h index 805bf105358..0e75bc0d3e5 100644 --- a/dbms/src/Storages/Page/workload/HeavySkewWriteRead.cpp +++ b/dbms/src/Storages/Page/workload/HeavySkewWriteRead.h @@ -85,6 +85,4 @@ class HeavySkewWriteRead : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(HeavySkewWriteRead) } // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/HeavyWrite.cpp b/dbms/src/Storages/Page/workload/HeavyWrite.h similarity index 98% rename from dbms/src/Storages/Page/workload/HeavyWrite.cpp rename to dbms/src/Storages/Page/workload/HeavyWrite.h index 8dfd7f810f7..54b7585ee20 100644 --- a/dbms/src/Storages/Page/workload/HeavyWrite.cpp +++ b/dbms/src/Storages/Page/workload/HeavyWrite.h @@ -72,6 +72,4 @@ class HeavyWrite : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(HeavyWrite) } // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/HighValidBigFileGC.cpp b/dbms/src/Storages/Page/workload/HighValidBigFileGC.h similarity index 98% rename from dbms/src/Storages/Page/workload/HighValidBigFileGC.cpp rename to dbms/src/Storages/Page/workload/HighValidBigFileGC.h index a9af6aebb76..cc3b5b45135 100644 --- a/dbms/src/Storages/Page/workload/HighValidBigFileGC.cpp +++ b/dbms/src/Storages/Page/workload/HighValidBigFileGC.h @@ -129,6 +129,4 @@ class HighValidBigFileGCWorkload private: UInt64 gc_time_ms = 0; }; - -REGISTER_WORKLOAD(HighValidBigFileGCWorkload) } // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.cpp b/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.h similarity index 98% rename from dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.cpp rename to dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.h index f02fbf65bcd..071a104010c 100644 --- a/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.cpp +++ b/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.h @@ -94,6 +94,4 @@ class HoldSnapshotsLongTime : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(HoldSnapshotsLongTime) } // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/MainEntry.cpp b/dbms/src/Storages/Page/workload/MainEntry.cpp index ac82e1ea4bc..18e42106c90 100644 --- a/dbms/src/Storages/Page/workload/MainEntry.cpp +++ b/dbms/src/Storages/Page/workload/MainEntry.cpp @@ -11,44 +11,32 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include +#include +#include +#include +#include +#include +#include #include #include +#include +#include using namespace DB::PS::tests; int StressWorkload::mainEntry(int argc, char ** argv) { { - // maybe due to sequence of linking, REGISTER_WORKLOAD is not visible to main function in dbms/src/Server/main.cpp - // cause that REGISTER_WORKLOAD will not be triggered before mainEntry - // we do this to trigger REGISTER_WORKLOAD explicitly. - void _work_load_register_named_HeavyMemoryCostInGC(); - void (*f)() = _work_load_register_named_HeavyMemoryCostInGC; - (void)f; - void _work_load_register_named_HeavyRead(); - f = _work_load_register_named_HeavyRead; - (void)f; - void _work_load_register_named_HeavySkewWriteRead(); - f = _work_load_register_named_HeavySkewWriteRead; - (void)f; - void _work_load_register_named_HeavyWrite(); - f = _work_load_register_named_HeavyWrite; - (void)f; - void _work_load_register_named_HighValidBigFileGCWorkload(); - f = _work_load_register_named_HighValidBigFileGCWorkload; - (void)f; - void _work_load_register_named_HoldSnapshotsLongTime(); - f = _work_load_register_named_HoldSnapshotsLongTime; - (void)f; - void _work_load_register_named_PageStorageInMemoryCapacity(); - f = _work_load_register_named_PageStorageInMemoryCapacity; - (void)f; - void _work_load_register_named_NormalWorkload(); - f = _work_load_register_named_NormalWorkload; - (void)f; - void _work_load_register_named_ThousandsOfOffset(); - f = _work_load_register_named_ThousandsOfOffset; - (void)f; + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); } try { diff --git a/dbms/src/Storages/Page/workload/Normal.cpp b/dbms/src/Storages/Page/workload/Normal.h similarity index 98% rename from dbms/src/Storages/Page/workload/Normal.cpp rename to dbms/src/Storages/Page/workload/Normal.h index 57229395809..164f17b9d61 100644 --- a/dbms/src/Storages/Page/workload/Normal.cpp +++ b/dbms/src/Storages/Page/workload/Normal.h @@ -77,6 +77,4 @@ class NormalWorkload stop_watch.stop(); } }; - -REGISTER_WORKLOAD(NormalWorkload) } // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/workload/PSWorkload.h b/dbms/src/Storages/Page/workload/PSWorkload.h index eaaaf4eba5b..26a9c24d6da 100644 --- a/dbms/src/Storages/Page/workload/PSWorkload.h +++ b/dbms/src/Storages/Page/workload/PSWorkload.h @@ -193,15 +193,15 @@ class StressWorkloadManger StressEnv options; }; -#define REGISTER_WORKLOAD(WORKLOAD) \ - void __attribute__((constructor)) _work_load_register_named_##WORKLOAD(void) \ - { \ - StressWorkloadManger::getInstance().reg( \ - WORKLOAD::nameFunc(), \ - WORKLOAD::maskFunc(), \ - [](const StressEnv & opts) -> std::shared_ptr { \ - return std::make_shared(opts); \ - }); \ - } +template +void work_load_register() +{ + StressWorkloadManger::getInstance().reg( + Workload::nameFunc(), + Workload::maskFunc(), + [](const StressEnv & opts) -> std::shared_ptr { + return std::make_shared(opts); + }); +} } // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.cpp b/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.h similarity index 99% rename from dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.cpp rename to dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.h index 6ab321d1a10..337c732e6f7 100644 --- a/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.cpp +++ b/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.h @@ -174,6 +174,4 @@ class PageStorageInMemoryCapacity : public StressWorkload std::round(resident_used) ? (total_mem / ((double)resident_used / page_writen)) : 0)); } }; - -REGISTER_WORKLOAD(PageStorageInMemoryCapacity) } // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/ThousandsOfOffset.cpp b/dbms/src/Storages/Page/workload/ThousandsOfOffset.h similarity index 99% rename from dbms/src/Storages/Page/workload/ThousandsOfOffset.cpp rename to dbms/src/Storages/Page/workload/ThousandsOfOffset.h index 5a02ef48d68..0232ea235f1 100644 --- a/dbms/src/Storages/Page/workload/ThousandsOfOffset.cpp +++ b/dbms/src/Storages/Page/workload/ThousandsOfOffset.h @@ -169,6 +169,4 @@ class ThousandsOfOffset : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(ThousandsOfOffset) } // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 0dc05674696..1d7c0ace57f 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -34,24 +34,6 @@ #include - -namespace ProfileEvents -{ -extern const Event StorageBufferFlush; -extern const Event StorageBufferErrorOnFlush; -extern const Event StorageBufferPassedAllMinThresholds; -extern const Event StorageBufferPassedTimeMaxThreshold; -extern const Event StorageBufferPassedRowsMaxThreshold; -extern const Event StorageBufferPassedBytesMaxThreshold; -} // namespace ProfileEvents - -namespace CurrentMetrics -{ -extern const Metric StorageBufferRows; -extern const Metric StorageBufferBytes; -} // namespace CurrentMetrics - - namespace DB { namespace ErrorCodes @@ -170,10 +152,6 @@ static void appendBlock(const Block & from, Block & to) to.checkNumberOfRows(); size_t rows = from.rows(); - size_t bytes = from.bytes(); - - CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows); - CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, bytes); size_t old_rows = to.rows(); @@ -430,25 +408,21 @@ bool StorageBuffer::checkThresholdsImpl(size_t rows, size_t bytes, time_t time_p { if (time_passed > min_thresholds.time && rows > min_thresholds.rows && bytes > min_thresholds.bytes) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedAllMinThresholds); return true; } if (time_passed > max_thresholds.time) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedTimeMaxThreshold); return true; } if (rows > max_thresholds.rows) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedRowsMaxThreshold); return true; } if (bytes > max_thresholds.bytes) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedBytesMaxThreshold); return true; } @@ -495,11 +469,6 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) buffer.data.swap(block_to_write); buffer.first_write_time = 0; - CurrentMetrics::sub(CurrentMetrics::StorageBufferRows, block_to_write.rows()); - CurrentMetrics::sub(CurrentMetrics::StorageBufferBytes, block_to_write.bytes()); - - ProfileEvents::increment(ProfileEvents::StorageBufferFlush); - LOG_FMT_TRACE(log, "Flushing buffer with {} rows, {} bytes, age {} seconds.", rows, bytes, time_passed); if (no_destination) @@ -517,13 +486,7 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) } catch (...) { - ProfileEvents::increment(ProfileEvents::StorageBufferErrorOnFlush); - /// Return the block to its place in the buffer. - - CurrentMetrics::add(CurrentMetrics::StorageBufferRows, block_to_write.rows()); - CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, block_to_write.bytes()); - buffer.data.swap(block_to_write); if (!buffer.first_write_time) diff --git a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h index c636d9e60ab..b0cacefe6f4 100644 --- a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h +++ b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h @@ -77,10 +77,12 @@ struct DecodingStorageSchemaSnapshot , decoding_schema_version{decoding_schema_version_} { std::unordered_map column_lut; + std::unordered_map column_name_id_map; for (size_t i = 0; i < table_info_.columns.size(); i++) { const auto & ci = table_info_.columns[i]; column_lut.emplace(ci.id, i); + column_name_id_map.emplace(ci.name, ci.id); } for (size_t i = 0; i < column_defines->size(); i++) { @@ -88,7 +90,7 @@ struct DecodingStorageSchemaSnapshot sorted_column_id_with_pos.insert({cd.id, i}); if (cd.id != TiDBPkColumnID && cd.id != VersionColumnID && cd.id != DelMarkColumnID) { - auto & columns = table_info_.columns; + const auto & columns = table_info_.columns; column_infos.push_back(columns[column_lut.at(cd.id)]); } else @@ -100,10 +102,14 @@ struct DecodingStorageSchemaSnapshot // create pk related metadata if needed if (is_common_handle) { - const auto & primary_index_info = table_info_.getPrimaryIndexInfo(); - for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + /// we will not update the IndexInfo except Rename DDL. + /// When the add column / drop column action happenes, the offset of each column may change + /// Thus, we should not use offset to get the column we want, + /// but use to compare the column name to get the column id. + const auto & primary_index_cols = table_info_.getPrimaryIndexInfo().idx_cols; + for (const auto & col : primary_index_cols) { - auto pk_column_id = table_info_.columns[primary_index_info.idx_cols[i].offset].id; + auto pk_column_id = column_name_id_map[col.name]; pk_column_ids.emplace_back(pk_column_id); pk_pos_map.emplace(pk_column_id, reinterpret_cast(std::numeric_limits::max())); } @@ -125,11 +131,11 @@ struct DecodingStorageSchemaSnapshot { auto pk_pos_iter = pk_pos_map.begin(); size_t column_pos_in_block = 0; - for (auto iter = sorted_column_id_with_pos.begin(); iter != sorted_column_id_with_pos.end(); iter++) + for (auto & column_id_with_pos : sorted_column_id_with_pos) { if (pk_pos_iter == pk_pos_map.end()) break; - if (pk_pos_iter->first == iter->first) + if (pk_pos_iter->first == column_id_with_pos.first) { pk_pos_iter->second = column_pos_in_block; pk_pos_iter++; diff --git a/dbms/src/Storages/Transaction/PDTiKVClient.cpp b/dbms/src/Storages/Transaction/PDTiKVClient.cpp index 5a4b751fd9c..a06f1a3ae64 100644 --- a/dbms/src/Storages/Transaction/PDTiKVClient.cpp +++ b/dbms/src/Storages/Transaction/PDTiKVClient.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Timestamp PDClientHelper::cached_gc_safe_point = 0; -std::chrono::time_point PDClientHelper::safe_point_last_update_time; +std::atomic PDClientHelper::cached_gc_safe_point = 0; +std::atomic> PDClientHelper::safe_point_last_update_time; } // namespace DB diff --git a/dbms/src/Storages/Transaction/PDTiKVClient.h b/dbms/src/Storages/Transaction/PDTiKVClient.h index 4986c28f4ac..e5801cc7fae 100644 --- a/dbms/src/Storages/Transaction/PDTiKVClient.h +++ b/dbms/src/Storages/Transaction/PDTiKVClient.h @@ -29,6 +29,8 @@ #include #include +#include + // We define a shared ptr here, because TMTContext / SchemaSyncer / IndexReader all need to // `share` the resource of cluster. using KVClusterPtr = std::shared_ptr; @@ -49,7 +51,7 @@ struct PDClientHelper { // In case we cost too much to update safe point from PD. std::chrono::time_point now = std::chrono::system_clock::now(); - const auto duration = std::chrono::duration_cast(now - safe_point_last_update_time); + const auto duration = std::chrono::duration_cast(now - safe_point_last_update_time.load()); const auto min_interval = std::max(Int64(1), safe_point_update_interval_seconds); // at least one second if (duration.count() < min_interval) return cached_gc_safe_point; @@ -73,8 +75,8 @@ struct PDClientHelper } private: - static Timestamp cached_gc_safe_point; - static std::chrono::time_point safe_point_last_update_time; + static std::atomic cached_gc_safe_point; + static std::atomic> safe_point_last_update_time; }; diff --git a/dbms/src/Storages/Transaction/PartitionStreams.cpp b/dbms/src/Storages/Transaction/PartitionStreams.cpp index 4b2ca6c07a8..cf151c4270d 100644 --- a/dbms/src/Storages/Transaction/PartitionStreams.cpp +++ b/dbms/src/Storages/Transaction/PartitionStreams.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -39,6 +40,8 @@ namespace FailPoints extern const char pause_before_apply_raft_cmd[]; extern const char pause_before_apply_raft_snapshot[]; extern const char force_set_safepoint_when_decode_block[]; +extern const char unblock_query_init_after_write[]; +extern const char pause_query_init[]; } // namespace FailPoints namespace ErrorCodes @@ -150,6 +153,7 @@ static void writeRegionDataToStorage( default: throw Exception("Unknown StorageEngine: " + toString(static_cast(storage->engineType())), ErrorCodes::LOGICAL_ERROR); } + write_part_cost = watch.elapsedMilliseconds(); GET_METRIC(tiflash_raft_write_data_to_storage_duration_seconds, type_write).Observe(write_part_cost / 1000.0); if (need_decode) @@ -164,10 +168,20 @@ static void writeRegionDataToStorage( /// decoding data. Check the test case for more details. FAIL_POINT_PAUSE(FailPoints::pause_before_apply_raft_cmd); + /// disable pause_query_init when the write action finish, to make the query action continue. + /// the usage of unblock_query_init_after_write and pause_query_init can refer to InterpreterSelectQuery::init + SCOPE_EXIT({ + fiu_do_on(FailPoints::unblock_query_init_after_write, { + FailPointHelper::disableFailPoint(FailPoints::pause_query_init); + }); + }); + /// Try read then write once. { if (atomic_read_write(false)) + { return; + } } /// If first try failed, sync schema and force read then write. @@ -176,10 +190,12 @@ static void writeRegionDataToStorage( tmt.getSchemaSyncer()->syncSchemas(context); if (!atomic_read_write(true)) + { // Failure won't be tolerated this time. // TODO: Enrich exception message. throw Exception("Write region " + std::to_string(region->id()) + " to table " + std::to_string(table_id) + " failed", ErrorCodes::LOGICAL_ERROR); + } } } diff --git a/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp b/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp index dafacd8947d..792f149f588 100644 --- a/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp @@ -22,26 +22,6 @@ namespace DB { -HttpRequestRes HandleHttpRequestTestShow( - EngineStoreServerWrap *, - std::string_view path, - const std::string & api_name, - std::string_view query, - std::string_view body) -{ - auto * res = RawCppString::New(fmt::format( - "api_name: {}\npath: {}\nquery: {}\nbody: {}", - api_name, - path, - query, - body)); - return HttpRequestRes{ - .status = HttpRequestStatus::Ok, - .res = CppStrWithView{ - .inner = GenRawCppPtr(res, RawCppPtrTypeImpl::String), - .view = BaseBuffView{res->data(), res->size()}}}; -} - HttpRequestRes HandleHttpRequestSyncStatus( EngineStoreServerWrap * server, std::string_view path, @@ -112,8 +92,7 @@ using HANDLE_HTTP_URI_METHOD = HttpRequestRes (*)(EngineStoreServerWrap *, std:: static const std::map AVAILABLE_HTTP_URI = { {"/tiflash/sync-status/", HandleHttpRequestSyncStatus}, - {"/tiflash/store-status", HandleHttpRequestStoreStatus}, - {"/tiflash/test-show", HandleHttpRequestTestShow}}; + {"/tiflash/store-status", HandleHttpRequestStoreStatus}}; uint8_t CheckHttpUriAvailable(BaseBuffView path_) { diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index af351f4a6b0..2ec690c467b 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -186,6 +186,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } else { + // For common handle, sometimes we need to decode the value from encoded key instead of encoded value auto * raw_extra_column = const_cast((block.getByPosition(extra_handle_column_pos)).column.get()); raw_extra_column->insertData(pk->data(), pk->size()); /// decode key and insert pk columns if needed @@ -207,6 +208,8 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } index++; } + block.checkNumberOfRows(); + return true; } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.h b/dbms/src/Storages/Transaction/RegionBlockReader.h index ec633e805c0..004d9f40447 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.h +++ b/dbms/src/Storages/Transaction/RegionBlockReader.h @@ -41,7 +41,7 @@ class Block; class RegionBlockReader : private boost::noncopyable { public: - RegionBlockReader(DecodingStorageSchemaSnapshotConstPtr schema_snapshot_); + explicit RegionBlockReader(DecodingStorageSchemaSnapshotConstPtr schema_snapshot_); /// Read `data_list` as a block. /// diff --git a/dbms/src/Storages/Transaction/RowCodec.cpp b/dbms/src/Storages/Transaction/RowCodec.cpp index 427544a0467..ea7f6b7c2da 100644 --- a/dbms/src/Storages/Transaction/RowCodec.cpp +++ b/dbms/src/Storages/Transaction/RowCodec.cpp @@ -314,7 +314,7 @@ bool appendRowV2ToBlock( ColumnID pk_handle_id, bool force_decode) { - UInt8 row_flag = readLittleEndian(&raw_value[1]); + auto row_flag = readLittleEndian(&raw_value[1]); bool is_big = row_flag & RowV2::BigRowMask; return is_big ? appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode) : appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode); @@ -360,9 +360,10 @@ bool appendRowV2ToBlockImpl( decodeUInts::ColumnIDType>(cursor, raw_value, num_null_columns, null_column_ids); decodeUInts::ValueOffsetType>(cursor, raw_value, num_not_null_columns, value_offsets); size_t values_start_pos = cursor; - size_t id_not_null = 0, id_null = 0; + size_t idx_not_null = 0; + size_t idx_null = 0; // Merge ordered not null/null columns to keep order. - while (id_not_null < not_null_column_ids.size() || id_null < null_column_ids.size()) + while (idx_not_null < not_null_column_ids.size() || idx_null < null_column_ids.size()) { if (column_ids_iter == column_ids_iter_end) { @@ -371,24 +372,31 @@ bool appendRowV2ToBlockImpl( } bool is_null; - if (id_not_null < not_null_column_ids.size() && id_null < null_column_ids.size()) - is_null = not_null_column_ids[id_not_null] > null_column_ids[id_null]; + if (idx_not_null < not_null_column_ids.size() && idx_null < null_column_ids.size()) + is_null = not_null_column_ids[idx_not_null] > null_column_ids[idx_null]; else - is_null = id_null < null_column_ids.size(); + is_null = idx_null < null_column_ids.size(); - auto next_datum_column_id = is_null ? null_column_ids[id_null] : not_null_column_ids[id_not_null]; + auto next_datum_column_id = is_null ? null_column_ids[idx_null] : not_null_column_ids[idx_not_null]; if (column_ids_iter->first > next_datum_column_id) { - // extra column + // The next column id to read is bigger than the column id of next datum in encoded row. + // It means this is the datum of extra column. May happen when reading after dropping + // a column. if (!force_decode) return false; + // Ignore the extra column and continue to parse other datum if (is_null) - id_null++; + idx_null++; else - id_not_null++; + idx_not_null++; } else if (column_ids_iter->first < next_datum_column_id) { + // The next column id to read is less than the column id of next datum in encoded row. + // It means this is the datum of missing column. May happen when reading after adding + // a column. + // Fill with default value and continue to read data for next column id. const auto & column_info = column_infos[column_ids_iter->second]; if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, force_decode)) return false; @@ -397,7 +405,7 @@ bool appendRowV2ToBlockImpl( } else { - // if pk_handle_id is a valid column id, then it means the table's pk_is_handle is true + // If pk_handle_id is a valid column id, then it means the table's pk_is_handle is true // we can just ignore the pk value encoded in value part if (unlikely(column_ids_iter->first == pk_handle_id)) { @@ -405,15 +413,16 @@ bool appendRowV2ToBlockImpl( block_column_pos++; if (is_null) { - id_null++; + idx_null++; } else { - id_not_null++; + idx_not_null++; } continue; } + // Parse the datum. auto * raw_column = const_cast((block.getByPosition(block_column_pos)).column.get()); const auto & column_info = column_infos[column_ids_iter->second]; if (is_null) @@ -432,15 +441,15 @@ bool appendRowV2ToBlockImpl( } // ColumnNullable::insertDefault just insert a null value raw_column->insertDefault(); - id_null++; + idx_null++; } else { - size_t start = id_not_null ? value_offsets[id_not_null - 1] : 0; - size_t length = value_offsets[id_not_null] - start; + size_t start = idx_not_null ? value_offsets[idx_not_null - 1] : 0; + size_t length = value_offsets[idx_not_null] - start; if (!raw_column->decodeTiDBRowV2Datum(values_start_pos + start, raw_value, length, force_decode)) return false; - id_not_null++; + idx_not_null++; } column_ids_iter++; block_column_pos++; diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 15bf2a3fb58..dc7f1f3e348 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -631,8 +631,8 @@ catch (const Poco::Exception & e) /////////////////////// IndexColumnInfo::IndexColumnInfo(Poco::JSON::Object::Ptr json) - : offset(0) - , length(0) + : length(0) + , offset(0) { deserialize(json); } diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index f67bfb332c7..4c28a614857 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -179,7 +179,6 @@ struct ColumnInfo ColumnID id = -1; String name; - Int32 offset = -1; Poco::Dynamic::Var origin_default_value; Poco::Dynamic::Var default_value; Poco::Dynamic::Var default_bit_value; @@ -212,6 +211,12 @@ struct ColumnInfo static Int64 getTimeValue(const String &); static Int64 getYearValue(const String &); static UInt64 getBitValue(const String &); + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset = -1; }; enum PartitionType @@ -298,8 +303,13 @@ struct IndexColumnInfo void deserialize(Poco::JSON::Object::Ptr json); String name; - Int32 offset; Int32 length; + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset; }; struct IndexInfo { @@ -385,7 +395,12 @@ struct TableInfo bool isLogicalPartitionTable() const { return is_partition_table && belonging_table_id == DB::InvalidTableID && partition.enable; } - /// should not be called if is_common_handle = false + /// should not be called if is_common_handle = false. + /// when use IndexInfo, please avoid to use the offset info + /// the offset value may be wrong in some cases, + /// due to we will not update IndexInfo except RENAME DDL action, + /// but DDL like add column / drop column may change the offset of columns + /// Thus, please be very careful when you must have to use offset information !!!!! const IndexInfo & getPrimaryIndexInfo() const { return index_infos[0]; } IndexInfo & getPrimaryIndexInfo() { return index_infos[0]; } diff --git a/dbms/src/Storages/Transaction/TiKVRecordFormat.h b/dbms/src/Storages/Transaction/TiKVRecordFormat.h index 4a25b6d9292..10a7f7220e9 100644 --- a/dbms/src/Storages/Transaction/TiKVRecordFormat.h +++ b/dbms/src/Storages/Transaction/TiKVRecordFormat.h @@ -30,7 +30,6 @@ namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -38,7 +37,6 @@ extern const int LOGICAL_ERROR; namespace RecordKVFormat { - enum CFModifyFlag : UInt8 { PutFlag = 'P', @@ -83,17 +81,35 @@ inline TiKVKey encodeAsTiKVKey(const String & ori_str) return TiKVKey(ss.releaseStr()); } -inline UInt64 encodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 encodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 encodeInt64(const Int64 x) { return encodeUInt64(static_cast(x) ^ SIGN_MASK); } +inline UInt64 encodeInt64(const Int64 x) +{ + return encodeUInt64(static_cast(x) ^ SIGN_MASK); +} -inline UInt64 encodeUInt64Desc(const UInt64 x) { return encodeUInt64(~x); } +inline UInt64 encodeUInt64Desc(const UInt64 x) +{ + return encodeUInt64(~x); +} -inline UInt64 decodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 decodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 decodeUInt64Desc(const UInt64 x) { return ~decodeUInt64(x); } +inline UInt64 decodeUInt64Desc(const UInt64 x) +{ + return ~decodeUInt64(x); +} -inline Int64 decodeInt64(const UInt64 x) { return static_cast(decodeUInt64(x) ^ SIGN_MASK); } +inline Int64 decodeInt64(const UInt64 x) +{ + return static_cast(decodeUInt64(x) ^ SIGN_MASK); +} inline void encodeInt64(const Int64 x, WriteBuffer & ss) { @@ -125,7 +141,10 @@ inline DecodedTiKVKey genRawKey(const TableID tableId, const HandleID handleId) return key; } -inline TiKVKey genKey(const TableID tableId, const HandleID handleId) { return encodeAsTiKVKey(genRawKey(tableId, handleId)); } +inline TiKVKey genKey(const TableID tableId, const HandleID handleId) +{ + return encodeAsTiKVKey(genRawKey(tableId, handleId)); +} inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector keys) { @@ -135,9 +154,16 @@ inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector key memcpy(key.data() + 1, reinterpret_cast(&big_endian_table_id), 8); memcpy(key.data() + 1 + 8, RecordKVFormat::RECORD_PREFIX_SEP, 2); WriteBufferFromOwnString ss; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < keys.size(); i++) { - DB::EncodeDatum(keys[i], table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset].getCodecFlag(), ss); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + DB::EncodeDatum(keys[i], table_info.columns[idx].getCodecFlag(), ss); } return encodeAsTiKVKey(key + ss.releaseStr()); } @@ -176,29 +202,50 @@ inline std::tuple decodeTiKVKeyFull(const TiKVKey & key) } } -inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) { return std::get<0>(decodeTiKVKeyFull(key)); } +inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) +{ + return std::get<0>(decodeTiKVKeyFull(key)); +} -inline Timestamp getTs(const TiKVKey & key) { return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); } +inline Timestamp getTs(const TiKVKey & key) +{ + return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); +} -inline TableID getTableId(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + 1)); } +inline TableID getTableId(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + 1)); +} -inline HandleID getHandle(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); } +inline HandleID getHandle(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); +} inline RawTiDBPK getRawTiDBPK(const DecodedTiKVKey & key) { return std::make_shared(key.begin() + RAW_KEY_NO_HANDLE_SIZE, key.end()); } -inline TableID getTableId(const TiKVKey & key) { return getTableId(decodeTiKVKey(key)); } +inline TableID getTableId(const TiKVKey & key) +{ + return getTableId(decodeTiKVKey(key)); +} -inline HandleID getHandle(const TiKVKey & key) { return getHandle(decodeTiKVKey(key)); } +inline HandleID getHandle(const TiKVKey & key) +{ + return getHandle(decodeTiKVKey(key)); +} inline bool isRecord(const DecodedTiKVKey & raw_key) { return raw_key.size() >= RAW_KEY_SIZE && raw_key[0] == TABLE_PREFIX && memcmp(raw_key.data() + 9, RECORD_PREFIX_SEP, 2) == 0; } -inline TiKVKey truncateTs(const TiKVKey & key) { return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); } +inline TiKVKey truncateTs(const TiKVKey & key) +{ + return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); +} inline TiKVKey appendTs(const TiKVKey & key, Timestamp ts) { @@ -215,7 +262,12 @@ inline TiKVKey genKey(TableID tableId, HandleID handleId, Timestamp ts) } inline TiKVValue encodeLockCfValue( - UInt8 lock_type, const String & primary, Timestamp ts, UInt64 ttl, const String * short_value = nullptr, Timestamp min_commit_ts = 0) + UInt8 lock_type, + const String & primary, + Timestamp ts, + UInt64 ttl, + const String * short_value = nullptr, + Timestamp min_commit_ts = 0) { WriteBufferFromOwnString res; res.write(lock_type); @@ -275,7 +327,10 @@ inline R readVarInt(const char *& data, size_t & len) return res; } -inline UInt64 readVarUInt(const char *& data, size_t & len) { return readVarInt(data, len); } +inline UInt64 readVarUInt(const char *& data, size_t & len) +{ + return readVarInt(data, len); +} inline UInt8 readUInt8(const char *& data, size_t & len) { @@ -347,30 +402,29 @@ inline DecodedWriteCFValue decodeWriteCfValue(const TiKVValue & value) auto flag = RecordKVFormat::readUInt8(data, len); switch (flag) { - case RecordKVFormat::SHORT_VALUE_PREFIX: - { - size_t slen = RecordKVFormat::readUInt8(data, len); - if (slen > len) - throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); - short_value = RecordKVFormat::readRawString(data, len, slen); - break; - } - case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: - // ignore - break; - case RecordKVFormat::GC_FENCE_PREFIX: - /** + case RecordKVFormat::SHORT_VALUE_PREFIX: + { + size_t slen = RecordKVFormat::readUInt8(data, len); + if (slen > len) + throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); + short_value = RecordKVFormat::readRawString(data, len, slen); + break; + } + case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: + // ignore + break; + case RecordKVFormat::GC_FENCE_PREFIX: + /** * according to https://github.com/tikv/tikv/pull/9207, when meet `GC fence` flag, it is definitely a * rewriting record and there must be a complete row written to tikv, just ignore it in tiflash. */ - return std::nullopt; - default: - throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); + return std::nullopt; + default: + throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); } } - return InnerDecodedWriteCFValue{write_type, prewrite_ts, - short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; + return InnerDecodedWriteCFValue{write_type, prewrite_ts, short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; } inline TiKVValue encodeWriteCfValue(UInt8 write_type, Timestamp ts, std::string_view short_value = {}, bool gc_fence = false) diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 20b395a9952..34e0d3d4104 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -237,14 +237,14 @@ std::pair> getTableInfoAndFields(ColumnIDs handle_ { table_info.is_common_handle = true; TiDB::IndexInfo index_info; - for (size_t i = 0; i < handle_ids.size(); i++) + for (auto handle_id : handle_ids) { TiDB::IndexColumnInfo index_column_info; - for (size_t pos = 0; pos < table_info.columns.size(); pos++) + for (auto & column : table_info.columns) { - if (table_info.columns[pos].id == handle_ids[i]) + if (column.id == handle_id) { - index_column_info.offset = pos; + index_column_info.name = column.name; break; } } diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp new file mode 100644 index 00000000000..05ab637de7f --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "RowCodecTestUtils.h" + +using TableInfo = TiDB::TableInfo; +namespace DB::tests +{ +using ColumnIDs = std::vector; +class RegionBlockReaderBenchTest : public benchmark::Fixture +{ +protected: + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; + + enum RowEncodeVersion + { + RowV1, + RowV2 + }; + +protected: + void SetUp(const benchmark::State & /*state*/) override + { + data_list_read.clear(); + fields_map.clear(); + } + + void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version, size_t num_rows) + { + // for later check + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + + std::vector value_fields; + std::vector pk_fields; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + if (!table_info.columns[i].hasPriKeyFlag()) + value_fields.emplace_back(fields[i]); + else + pk_fields.emplace_back(fields[i]); + } + + // create PK + WriteBufferFromOwnString pk_buf; + if (table_info.is_common_handle) + { + auto & primary_index_info = table_info.getPrimaryIndexInfo(); + for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + { + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); + } + } + else + { + DB::EncodeInt64(handle_value, pk_buf); + } + RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; + // create value + WriteBufferFromOwnString value_buf; + if (row_version == RowEncodeVersion::RowV1) + { + encodeRowV1(table_info, value_fields, value_buf); + } + else if (row_version == RowEncodeVersion::RowV2) + { + encodeRowV2(table_info, value_fields, value_buf); + } + else + { + throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); + } + auto row_value = std::make_shared(std::move(value_buf.str())); + for (size_t i = 0; i < num_rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); + } + + bool decodeColumns(DecodingStorageSchemaSnapshotConstPtr decoding_schema, bool force_decode) const + { + RegionBlockReader reader{decoding_schema}; + Block block = createBlockSortByColumnID(decoding_schema); + return reader.read(block, data_list_read, force_decode); + } + + std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const + { + return getTableInfoAndFields( + handle_ids, + is_common_handle, + ColumnIDValue(2, handle_value), + ColumnIDValue(3, std::numeric_limits::max()), + ColumnIDValue(4, std::numeric_limits::min()), + ColumnIDValue(9, String("aaa")), + ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4)), + ColumnIDValueNull(11)); + } +}; + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +constexpr size_t num_iterations_test = 1000; + +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp new file mode 100644 index 00000000000..1de9809ecad --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -0,0 +1,65 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "RowCodecTestUtils.h" + +namespace DB::tests +{ +static TableInfo getTableInfoByJson(const String & json_table_info) +{ + return TableInfo(json_table_info); +} +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) +{ + // table with column [A,B,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":2,"name":{"O":"B","L":"b"},"offset":1,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":0,"Flen":20,"Decimal":0,"Charset":"utf8mb4","Collate":"utf8mb4_bin","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,5 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 5); +} + +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndexAfterDropColumn) +{ + // drop column B for [A,B,C,D]; table with column [A,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,4 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 4); +} + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index f0cafce3914..36a91522bb6 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -18,10 +18,9 @@ #include #include #include +#include #include -#include "region_helper.h" - namespace DB { namespace RegionBench @@ -37,13 +36,6 @@ extern void ChangeRegionStateRange(RegionState & region_state, bool source_at_le namespace tests { -RegionPtr makeRegion(UInt64 id, const std::string start_key, const std::string end_key, const TiFlashRaftProxyHelper * proxy_helper = nullptr) -{ - return std::make_shared( - RegionMeta(createPeer(2, true), createRegionInfo(id, std::move(start_key), std::move(end_key)), initialApplyState()), - proxy_helper); -} - class RegionKVStoreTest : public ::testing::Test { public: diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index 6a883230854..d08b4dd3738 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -26,13 +26,13 @@ using ColumnIDs = std::vector; class RegionBlockReaderTestFixture : public ::testing::Test { protected: - Int64 handle_value_ = 100; - UInt8 del_mark_value_ = 0; - UInt64 version_value_ = 100; - size_t rows_ = 3; + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + size_t rows = 3; - RegionDataReadInfoList data_list_read_; - std::unordered_map fields_map_; + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; enum RowEncodeVersion { @@ -43,8 +43,8 @@ class RegionBlockReaderTestFixture : public ::testing::Test protected: void SetUp() override { - data_list_read_.clear(); - fields_map_.clear(); + data_list_read.clear(); + fields_map.clear(); } void TearDown() override {} @@ -52,8 +52,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) { // for later check + std::unordered_map column_name_columns_index_map; for (size_t i = 0; i < table_info.columns.size(); i++) - fields_map_.emplace(table_info.columns[i].id, fields[i]); + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } std::vector value_fields; std::vector pk_fields; @@ -72,13 +76,13 @@ class RegionBlockReaderTestFixture : public ::testing::Test auto & primary_index_info = table_info.getPrimaryIndexInfo(); for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { - size_t pk_offset = primary_index_info.idx_cols[i].offset; - EncodeDatum(pk_fields[i], table_info.columns[pk_offset].getCodecFlag(), pk_buf); + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); } } else { - DB::EncodeInt64(handle_value_, pk_buf); + DB::EncodeInt64(handle_value, pk_buf); } RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; // create value @@ -96,44 +100,44 @@ class RegionBlockReaderTestFixture : public ::testing::Test throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } auto row_value = std::make_shared(std::move(value_buf.str())); - for (size_t i = 0; i < rows_; i++) - data_list_read_.emplace_back(pk, del_mark_value_, version_value_, row_value); + for (size_t i = 0; i < rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } void checkBlock(DecodingStorageSchemaSnapshotConstPtr decoding_schema, const Block & block) const { ASSERT_EQ(block.columns(), decoding_schema->column_defines->size()); - for (size_t row = 0; row < rows_; row++) + for (size_t row = 0; row < rows; row++) { for (size_t pos = 0; pos < block.columns(); pos++) { - auto & column_element = block.getByPosition(pos); + const auto & column_element = block.getByPosition(pos); if (row == 0) { - ASSERT_EQ(column_element.column->size(), rows_); + ASSERT_EQ(column_element.column->size(), rows); } if (column_element.name == EXTRA_HANDLE_COLUMN_NAME) { if (decoding_schema->is_common_handle) { - ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read_[row]))); + ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))); } else { - ASSERT_EQ((*column_element.column)[row], Field(handle_value_)); + ASSERT_EQ((*column_element.column)[row], Field(handle_value)); } } else if (column_element.name == VERSION_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(version_value_)); + ASSERT_EQ((*column_element.column)[row], Field(version_value)); } else if (column_element.name == TAG_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value_))); + ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))); } else { - ASSERT_EQ((*column_element.column)[row], fields_map_.at(column_element.column_id)); + ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); } } } @@ -143,7 +147,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test { RegionBlockReader reader{decoding_schema}; Block block = createBlockSortByColumnID(decoding_schema); - if (!reader.read(block, data_list_read_, force_decode)) + if (!reader.read(block, data_list_read, force_decode)) return false; checkBlock(decoding_schema, block); @@ -155,7 +159,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test return getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -170,7 +174,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test handle_ids, is_common_handle, ColumnIDValue(1, String("")), - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(8, String("")), @@ -182,12 +186,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test // add default value for missing column std::vector missing_column_ids{1, 8, 13}; String missing_column_default_value = String("default"); - for (size_t i = 0; i < table_info.columns.size(); i++) + for (auto & column : table_info.columns) { - if (std::find(missing_column_ids.begin(), missing_column_ids.end(), table_info.columns[i].id) != missing_column_ids.end()) + if (std::find(missing_column_ids.begin(), missing_column_ids.end(), column.id) != missing_column_ids.end()) { - table_info.columns[i].origin_default_value = missing_column_default_value; - fields_map_.emplace(table_info.columns[i].id, Field(missing_column_default_value)); + column.origin_default_value = missing_column_default_value; + fields_map.emplace(column.id, Field(missing_column_default_value)); } } return table_info; @@ -199,7 +203,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4))); @@ -212,7 +216,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -227,7 +231,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), diff --git a/dbms/src/Storages/Transaction/tests/region_helper.h b/dbms/src/Storages/Transaction/tests/region_helper.h index 2808ace0ecb..39bae2669ab 100644 --- a/dbms/src/Storages/Transaction/tests/region_helper.h +++ b/dbms/src/Storages/Transaction/tests/region_helper.h @@ -18,8 +18,10 @@ #include -using namespace DB; - +namespace DB +{ +namespace tests +{ #define ASSERT_CHECK(cond, res) \ do \ { \ @@ -37,7 +39,7 @@ using namespace DB; #define ASSERT_CHECK_EQUAL(a, b, res) \ do \ { \ - if (!(a == b)) \ + if (!((a) == (b))) \ { \ std::cerr << __FILE__ << ":" << __LINE__ << ":" \ << " Assertion " << #a << " == " << #b << " failed.\n"; \ @@ -76,3 +78,16 @@ inline RegionMeta createRegionMeta(UInt64 id, DB::TableID table_id, std::optiona /*region=*/createRegionInfo(id, RecordKVFormat::genKey(table_id, 0), RecordKVFormat::genKey(table_id, 300)), /*apply_state_=*/apply_state.value_or(initialApplyState())); } + +inline RegionPtr makeRegion(UInt64 id, const std::string start_key, const std::string end_key, const TiFlashRaftProxyHelper * proxy_helper = nullptr) +{ + return std::make_shared( + RegionMeta( + createPeer(2, true), + createRegionInfo(id, std::move(start_key), std::move(end_key)), + initialApplyState()), + proxy_helper); +} + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Storages/tests/gtest_filter_parser.cpp b/dbms/src/Storages/tests/gtest_filter_parser.cpp index 8820c05d2da..3a554fcf4b6 100644 --- a/dbms/src/Storages/tests/gtest_filter_parser.cpp +++ b/dbms/src/Storages/tests/gtest_filter_parser.cpp @@ -98,7 +98,7 @@ DM::RSOperatorPtr FilterParserTest::generateRsOperator(const String table_info_j DM::ColumnDefines columns_to_read; { NamesAndTypes source_columns; - std::tie(source_columns, std::ignore) = parseColumnsFromTableInfo(table_info, log->getLog()); + std::tie(source_columns, std::ignore) = parseColumnsFromTableInfo(table_info); dag_query = std::make_unique( conditions, DAGPreparedSets(), diff --git a/dbms/src/TableFunctions/ITableFunction.cpp b/dbms/src/TableFunctions/ITableFunction.cpp index ca05075cac0..d262a5637f7 100644 --- a/dbms/src/TableFunctions/ITableFunction.cpp +++ b/dbms/src/TableFunctions/ITableFunction.cpp @@ -15,17 +15,10 @@ #include #include - -namespace ProfileEvents -{ -extern const Event TableFunctionExecute; -} - namespace DB { StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context & context) const { - ProfileEvents::increment(ProfileEvents::TableFunctionExecute); return executeImpl(ast_function, context); } diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index 637fbf51c00..7fb526aeb01 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -108,14 +108,15 @@ void blockEqual( const Block & actual) { size_t columns = actual.columns(); + size_t expected_columns = expected.columns(); - ASSERT_TRUE(expected.columns() == columns); + ASSERT_EQ(expected_columns, columns); for (size_t i = 0; i < columns; ++i) { const auto & expected_col = expected.getByPosition(i); const auto & actual_col = actual.getByPosition(i); - ASSERT_TRUE(actual_col.type->getName() == expected_col.type->getName()); + ASSERT_EQ(actual_col.type->getName(), expected_col.type->getName()); ASSERT_COLUMN_EQ(expected_col.column, actual_col.column); } } diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index e1ccbdbb010..9a6e92dd9c1 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -219,6 +219,11 @@ DAGRequestBuilder & DAGRequestBuilder::project(MockAsts exprs) } DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNames col_names) +{ + return project(MockColumnNamesVec(col_names)); +} + +DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNamesVec col_names) { assert(root); auto exp_list = std::make_shared(); @@ -440,4 +445,5 @@ DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name) } return builder; } -} // namespace DB::tests \ No newline at end of file + +} // namespace DB::tests diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index d52b5ec674a..bad92c4226d 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -31,6 +31,7 @@ using MockOrderByItems = std::initializer_list; using MockPartitionByItem = std::pair; using MockPartitionByItems = std::initializer_list; using MockColumnNames = std::initializer_list; +using MockColumnNamesVec = std::vector; using MockAsts = std::initializer_list; using MockWindowFrame = mock::MockWindowFrame; @@ -84,6 +85,7 @@ class DAGRequestBuilder DAGRequestBuilder & project(const String & col_name); DAGRequestBuilder & project(MockAsts expr); DAGRequestBuilder & project(MockColumnNames col_names); + DAGRequestBuilder & project(MockColumnNamesVec col_names); DAGRequestBuilder & exchangeSender(tipb::ExchangeType exchange_type); @@ -181,11 +183,11 @@ MockWindowFrame buildDefaultRowsFrame(); #define gt(expr1, expr2) makeASTFunction("greater", (expr1), (expr2)) #define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2)) #define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2)) -#define NOT(expr) makeASTFunction("not", (expr1), (expr2)) -#define Max(expr) makeASTFunction("max", expr) +#define NOT(expr) makeASTFunction("not", (expr)) +#define Max(expr) makeASTFunction("max", (expr)) /// Window functions #define RowNumber() makeASTFunction("RowNumber") #define Rank() makeASTFunction("Rank") #define DenseRank() makeASTFunction("DenseRank") -} // namespace DB::tests \ No newline at end of file +} // namespace DB::tests diff --git a/dbms/src/TiDB/Schema/SchemaBuilder-internal.h b/dbms/src/TiDB/Schema/SchemaBuilder-internal.h index a331205ce8c..94edcbea204 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder-internal.h +++ b/dbms/src/TiDB/Schema/SchemaBuilder-internal.h @@ -35,7 +35,7 @@ struct TableInfo; } namespace DB { -std::tuple parseColumnsFromTableInfo(const TiDB::TableInfo & table_info, Poco::Logger * log); +std::tuple parseColumnsFromTableInfo(const TiDB::TableInfo & table_info); constexpr char tmpNamePrefix[] = "_tiflash_tmp_"; diff --git a/dbms/src/TiDB/Schema/SchemaBuilder.cpp b/dbms/src/TiDB/Schema/SchemaBuilder.cpp index 99e540e6c95..ae78923fc61 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder.cpp +++ b/dbms/src/TiDB/Schema/SchemaBuilder.cpp @@ -370,7 +370,15 @@ void SchemaBuilder::applyAlterPhysicalTable(DBInfoPtr db_inf const auto & schema_change = schema_changes[i]; /// Update column infos by applying schema change in this step. schema_change.second(orig_table_info); - /// Update schema version aggressively for the sake of correctness. + /// Update schema version aggressively for the sake of correctness(for read part). + /// In read action, we will use table_info.schema_version(storage_version) and TiDBSchemaSyncer.cur_version(global_version) to compare with query_version, to decide whether we can read under this query_version, or we need to make the schema newer. + /// In our comparison logic, we only serve the query when the query schema version meet the criterion: storage_version <= query_version <= global_version(The more detail info you can refer the comments in DAGStorageInterpreter::getAndLockStorages.) + /// And when apply multi diffs here, we only update global_version when all diffs have been applied. + /// So the global_version may be less than the actual "global_version" of the local schema in the process of applying schema changes. + /// And if we don't update the storage_version ahead of time, we may meet the following case when apply multiple diffs: storage_version <= global_version < actual "global_version". + /// If we receive a query with the same version as global_version, we can have the following scenario: storage_version <= global_version == query_version < actual "global_version". + /// And because storage_version <= global_version == query_version meet the criterion of serving the query, the query will be served. But query_version < actual "global_version" indicates that we use a newer schema to server an older query which may cause some inconsistency issue. + /// So we update storage_version aggressively to prevent the above scenario happens. orig_table_info.schema_version = target_version; auto alter_lock = storage->lockForAlter(getThreadName()); storage->alterFromTiDB( @@ -963,13 +971,12 @@ void SchemaBuilder::applyDropSchema(const String & db_name) } std::tuple -parseColumnsFromTableInfo(const TiDB::TableInfo & table_info, Poco::Logger * log) +parseColumnsFromTableInfo(const TiDB::TableInfo & table_info) { NamesAndTypes columns; std::vector primary_keys; for (const auto & column : table_info.columns) { - LOG_FMT_DEBUG(log, "Analyzing column: {}, type: {}", column.name, static_cast(column.tp)); DataTypePtr type = getDataTypeByColumnInfo(column); columns.emplace_back(column.name, type); if (column.hasPriKeyFlag()) @@ -999,7 +1006,7 @@ String createTableStmt( Poco::Logger * log) { LOG_FMT_DEBUG(log, "Analyzing table info : {}", table_info.serialize()); - auto [columns, pks] = parseColumnsFromTableInfo(table_info, log); + auto [columns, pks] = parseColumnsFromTableInfo(table_info); String stmt; WriteBufferFromString stmt_buf(stmt); diff --git a/dbms/src/TiDB/Schema/SchemaGetter.h b/dbms/src/TiDB/Schema/SchemaGetter.h index cfa5e1c6335..02d2f7a7c88 100644 --- a/dbms/src/TiDB/Schema/SchemaGetter.h +++ b/dbms/src/TiDB/Schema/SchemaGetter.h @@ -28,6 +28,7 @@ namespace DB { +// The enum results are completely the same as the DDL Action listed in the "parser/model/ddl.go" of TiDB codebase, which must be keeping in sync. enum class SchemaActionType : Int8 { None = 0, diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 5fd25c5d238..2bedb312d07 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -198,3 +198,7 @@ if (ARCH_AMD64) src/crc64_sse2_asimd.cpp APPEND COMPILE_FLAGS "-mpclmul") endif() + +if (ARCH_AARCH64 AND ARCH_LINUX) + target_link_libraries (common PUBLIC tiflash-aarch64-string tiflash-aarch64-math) +endif() diff --git a/libs/libcommon/include/common/types.h b/libs/libcommon/include/common/types.h index 139fc10e980..87c7215d91f 100644 --- a/libs/libcommon/include/common/types.h +++ b/libs/libcommon/include/common/types.h @@ -25,6 +25,7 @@ #if defined(__clang__) #pragma GCC diagnostic ignored "-Wunknown-warning-option" #pragma GCC diagnostic ignored "-Wdeprecated-copy" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #pragma GCC diagnostic ignored "-Wtautological-constant-out-of-range-compare" #endif #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" diff --git a/tests/fullstack-test-dt/clustered_index/ddl.test b/tests/fullstack-test-dt/clustered_index/ddl.test index 8abe450c11a..6c4925c9619 100644 --- a/tests/fullstack-test-dt/clustered_index/ddl.test +++ b/tests/fullstack-test-dt/clustered_index/ddl.test @@ -66,3 +66,89 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t_2 mysql> drop table test.t_1; mysql> drop table test.t_2; + +### about issue 5154 to check whether add column/drop column will effect the cluster index decode +### drop the column between two columns that are cluster index columns + +mysql> drop table if exists test.t_3; +mysql> create table test.t_3 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); +mysql> insert into test.t_3 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_3 set tiflash replica 1; + +func> wait_table test t_3 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_3 drop column B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | ++---+---+---+ + +# insert some rows +mysql> insert into test.t_3 values (3,3,3),(4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | +| 3 | 3 | 3 | +| 4 | 4 | 4 | ++---+---+---+ + +mysql> drop table test.t_3; + +### add the column between two columns that are cluster index columns +mysql> drop table if exists test.t_4 +mysql> create table test.t_4 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); + +mysql> insert into test.t_4 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_4 set tiflash replica 1; + +func> wait_table test t_4 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_4 Add column E int after B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+---+---+ +| A | B | E | C | D | ++---+---+------+---+---+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | ++---+---+------+---+---+ + +mysql> insert into test.t_4 values (3,'3',3,3,3),(4,'4',4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+------+------+ +| A | B | E | C | D | ++---+---+------+------+------+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | +| 3 | 3 | 3 | 3 | 3 | +| 4 | 4 | 4 | 4 | 4 | ++---+---+------+------+------+ + +mysql> drop table test.t_4; \ No newline at end of file diff --git a/tests/fullstack-test/mpp/issue_2471.test b/tests/fullstack-test/mpp/issue_2471.test index 4a1528595e8..497ce605893 100644 --- a/tests/fullstack-test/mpp/issue_2471.test +++ b/tests/fullstack-test/mpp/issue_2471.test @@ -35,7 +35,15 @@ mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_opt_bro => DBGInvoke __enable_fail_point(exception_in_creating_set_input_stream) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_opt_broadcast_cartesian_join=2; select * from a as t1 left join a as t2 on t1.id = t2.id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_in_creating_set_input_stream is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_in_creating_set_input_stream is triggered., e.what() = DB::Exception, Stack trace: +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} => DBGInvoke __disable_fail_point(exception_in_creating_set_input_stream) diff --git a/tests/fullstack-test/mpp/mpp_fail.test b/tests/fullstack-test/mpp/mpp_fail.test index 02259a90681..e03c6150be6 100644 --- a/tests/fullstack-test/mpp/mpp_fail.test +++ b/tests/fullstack-test/mpp/mpp_fail.test @@ -21,6 +21,7 @@ mysql> insert into test.t values(1,'a'),(2,'b'),(3,'c') mysql> alter table test.t set tiflash replica 1 func> wait_table test t +mysql> analyze table test.t # Data. @@ -70,20 +71,44 @@ ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_be ## exception during mpp run non root task => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered., e.what() = DB::Exception, Stack trace: +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) ## exception during mpp run root task => DBGInvoke __enable_fail_point(exception_during_mpp_root_task_run) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered., e.what() = DB::Exception, Stack trace: +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} => DBGInvoke __disable_fail_point(exception_during_mpp_root_task_run) ## exception during mpp write err to tunnel => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __enable_fail_point(exception_during_mpp_write_err_to_tunnel) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : Failed to write error msg to tunnel +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Failed to write error msg to tunnel, e.what() = DB::Exception, Stack trace: +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __disable_fail_point(exception_during_mpp_write_err_to_tunnel) @@ -91,7 +116,14 @@ ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchang => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __enable_fail_point(exception_during_mpp_close_tunnel) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered., e.what() = DB::Exception, Stack trace: +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __disable_fail_point(exception_during_mpp_close_tunnel) @@ -124,7 +156,16 @@ ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchang ## ensure build1, build2-probe1, probe2 in the CreatingSets, test the bug where build1 throw exception but not change the build state, thus block the build2-probe1, at last this query hangs. => DBGInvoke __enable_fail_point(exception_mpp_hash_build) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; set @@tidb_broadcast_join_threshold_count=0; set @@tidb_broadcast_join_threshold_size=0; select t1.id from test.t t1 join test.t t2 on t1.id = t2.id and t1.id <2 join (select id from test.t group by id) t3 on t2.id=t3.id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_mpp_hash_build is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_mpp_hash_build is triggered., e.what() = DB::Exception, Stack trace: +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} +{#LINE} => DBGInvoke __disable_fail_point(exception_mpp_hash_build) # Clean up. diff --git a/tests/fullstack-test2/ddl/multi_alter_with_write.test b/tests/fullstack-test2/ddl/multi_alter_with_write.test new file mode 100644 index 00000000000..3284511d775 --- /dev/null +++ b/tests/fullstack-test2/ddl/multi_alter_with_write.test @@ -0,0 +1,880 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# this test focus on the case when multi DDL actions happen closely +#( and these DDL actions will be fetched in the same regular sync schema duration.) +# and there are some corresponding insert(write) actions between these DDL actions. +# Considering that these write actions and these schema change will arrive at +# tiflash in a different order, we simulate these different order situation to check +# that our schema module was working correctly. + +# TiDB Timeline : write cmd 1 | alter cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 + +# stop regular schema sync +=> DBGInvoke __enable_schema_sync_service('false') + +# Enable the failpoint and make it pause before applying the raft cmd to write a row +>> DBGInvoke __init_fail_point() +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# Enable the failpoint to make our query only start when the write action finished +>> DBGInvoke __enable_fail_point(unblock_query_init_after_write) + +# ----------------------------------------------------------------------------- +# Order 1 : write cmd 1 | alter cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 +# ----------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# --------------------------------------------------------------------------------------------- +# Order 2 : write cmd 1 | alter cmd 1 | write cmd 2 | write cmd 3 --> sync schema(alter cmd 2) +# --------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> call sync schema and get alter cmd 2 happen +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 3 : write cmd 1 | alter cmd 1 | alter cmd 2 | write cmd 2 -->sync schema() | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 4 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1) | alter cmd 2 | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# check no schema change before write cmd 2 take effect +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# --------------------------------------------------------------------------------------------------------------------- +# Order 5 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1) | write cmd 3 --> sync schema(alter cmd 2) +# ---------------------------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> should call sync schema, get the alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 6 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1 alter cmd 2) | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 && alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ------------------------------------------------------------------------------- +# Order 7 : alter cmd 1 | write cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 +# ------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# -------------------------------------------------------------------------------------------------- +# Order 8 : alter cmd 1 | write cmd 1 | write cmd 2 | write cmd 3 --> sync schema(alter cmd 2) +# -------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> should call sync schema, get the alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# -------------------------------------------------------------------------------------------------- +# Order 9 : alter cmd 1 | write cmd 1 | alter cmd 2 | write cmd 2 -->sync schema() | write cmd 3 +# -------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ------------------------------------------------------------------------------------------------------------------ +# Order 10 : alter cmd 1 | alter cmd 2 | write cmd 1 -->sync schema() | write cmd 2 -->sync schema() | write cmd 3 +# ------------------------------------------------------------------------------------------------------------------ + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 and write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + + +## + +=> DBGInvoke __enable_schema_sync_service('true') +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) +>> DBGInvoke __disable_fail_point(unblock_query_init_after_write) +>> DBGInvoke __disable_fail_point(pause_query_init) \ No newline at end of file diff --git a/tests/fullstack-test2/ddl/rename_table_across_databases.test b/tests/fullstack-test2/ddl/rename_table_across_databases.test index c78c27138a0..bc27668bd0c 100644 --- a/tests/fullstack-test2/ddl/rename_table_across_databases.test +++ b/tests/fullstack-test2/ddl/rename_table_across_databases.test @@ -52,7 +52,7 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test_new +------+------+ # check if table info updated. ->> select tidb_database,tidb_name from system.tables where is_tombstone = 0 and (tidb_database = 'test' and tidb_name='t') or (tidb_database='test_new' and tidb_name='t2') +>> select tidb_database,tidb_name from system.tables where is_tombstone = 0 and ((tidb_database = 'test' and tidb_name='t') or (tidb_database='test_new' and tidb_name='t2')) ┌─tidb_database─┬─tidb_name─┐ │ test_new │ t2 │ └───────────────┴───────────┘ diff --git a/tests/run-test.py b/tests/run-test.py index 843fe7c79b4..a2bcee0ce99 100644 --- a/tests/run-test.py +++ b/tests/run-test.py @@ -29,6 +29,7 @@ UNFINISHED_1_PREFIX = '\t' UNFINISHED_2_PREFIX = ' ' WORD_PH = '{#WORD}' +LINE_PH = '{#LINE}' CURL_TIDB_STATUS_PREFIX = 'curl_tidb> ' verbose = False @@ -138,18 +139,22 @@ def match_ph_word(line): # TODO: Support more place holders, eg: {#NUMBER} def compare_line(line, template): - while True: - i = template.find(WORD_PH) - if i < 0: - return line == template - else: - if line[:i] != template[:i]: - return False - j = match_ph_word(line[i:]) - if j == 0: - return False - template = template[i + len(WORD_PH):] - line = line[i + j:] + l = template.find(LINE_PH) + if l >= 0: + return True + else: + while True: + i = template.find(WORD_PH) + if i < 0: + return line == template + else: + if line[:i] != template[:i]: + return False + j = match_ph_word(line[i:]) + if j == 0: + return False + template = template[i + len(WORD_PH):] + line = line[i + j:] class MySQLCompare: @@ -194,11 +199,14 @@ def matched(outputs, matches): b = MySQLCompare.parse_excepted_outputs(matches) return a == b else: - if len(outputs) != len(matches): + if len(outputs) > len(matches): return False for i in range(0, len(outputs)): if not compare_line(outputs[i], matches[i]): return False + for i in range(len(outputs), len(matches)): + if not compare_line("", matches[i]): + return False return True @@ -212,11 +220,14 @@ def matched(outputs, matches, fuzz): b = parse_table_parts(matches, fuzz) return a == b else: - if len(outputs) != len(matches): + if len(outputs) > len(matches): return False for i in range(0, len(outputs)): if not compare_line(outputs[i], matches[i]): return False + for i in range(len(outputs), len(matches)): + if not compare_line("", matches[i]): + return False return True