From 8e6f884958e09b68ccd33334340241f2a873a7ca Mon Sep 17 00:00:00 2001 From: Wish Date: Tue, 13 Sep 2022 14:11:43 +0800 Subject: [PATCH 01/14] cmake: Build rust deps using debug profile when build-type is not release --- contrib/tiflash-proxy-cmake/CMakeLists.txt | 13 +++++++++++-- libs/libsymbolization/CMakeLists.txt | 13 +++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt index cdbeaa6f11d..ead57014c96 100644 --- a/contrib/tiflash-proxy-cmake/CMakeLists.txt +++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt @@ -3,6 +3,14 @@ set(_TIFLASH_PROXY_LIBRARY "${_TIFLASH_PROXY_SOURCE_DIR}/target/release/${CMAKE_ file(GLOB_RECURSE _TIFLASH_PROXY_SRCS "${_TIFLASH_PROXY_SOURCE_DIR}/*.rs") list(FILTER _TIFLASH_PROXY_SRCS EXCLUDE REGEX ${_TIFLASH_PROXY_SOURCE_DIR}/target/.*) +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") + set(_TIFLASH_PROXY_BUILD_PROFILE "release") + set(_TIFLASH_PROXY_MAKE_COMMAND make release) +else() + set(_TIFLASH_PROXY_BUILD_PROFILE "debug") + set(_TIFLASH_PROXY_MAKE_COMMAND make debug) +endif() + # use `CFLAGS=-w CXXFLAGS=-w` to inhibit warning messages. if (TIFLASH_LLVM_TOOLCHAIN) set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} "CFLAGS=-w -fuse-ld=lld" "CXXFLAGS=-w -fuse-ld=lld -stdlib=libc++") @@ -34,10 +42,11 @@ endif() message(STATUS "Using rust env for tiflash-proxy: ${TIFLASH_RUST_ENV}") add_custom_command(OUTPUT ${_TIFLASH_PROXY_LIBRARY} - COMMENT "Building tiflash proxy" + COMMENT "Building TiFlash Proxy using ${_TIFLASH_PROXY_BUILD_PROFILE} profile" # `ENGINE_LABEL_VALUE` is used in proxy for copying `libraftstore_proxy.xx` to `lib${ENGINE_LABEL_VALUE}_proxy.xx` - COMMAND ${CMAKE_COMMAND} -E env ${TIFLASH_RUST_ENV} ENGINE_LABEL_VALUE=tiflash make release + COMMAND ${CMAKE_COMMAND} -E env ${TIFLASH_RUST_ENV} ENGINE_LABEL_VALUE=tiflash ${_TIFLASH_PROXY_MAKE_COMMAND} VERBATIM + USES_TERMINAL WORKING_DIRECTORY ${_TIFLASH_PROXY_SOURCE_DIR} DEPENDS "${_TIFLASH_PROXY_SRCS}" "${_TIFLASH_PROXY_SOURCE_DIR}/Cargo.lock" "${_TIFLASH_PROXY_SOURCE_DIR}/rust-toolchain") diff --git a/libs/libsymbolization/CMakeLists.txt b/libs/libsymbolization/CMakeLists.txt index 43c9a088547..48e9da9864e 100644 --- a/libs/libsymbolization/CMakeLists.txt +++ b/libs/libsymbolization/CMakeLists.txt @@ -16,10 +16,19 @@ set(_SYMBOLIZATION_SOURCE_DIR "${TiFlash_SOURCE_DIR}/libs/libsymbolization") set(_SYMBOLIZATION_LIBRARY "${CMAKE_CURRENT_BINARY_DIR}/release/${CMAKE_STATIC_LIBRARY_PREFIX}symbolization${CMAKE_STATIC_LIBRARY_SUFFIX}") file(GLOB_RECURSE _SYMBOLIZATION_SRCS "${_SYMBOLIZATION_SOURCE_DIR}/src/*.rs") file(GLOB_RECURSE _SYMBOLIZATION_HEADERS "${_SYMBOLIZATION_SOURCE_DIR}/include/*.h") + +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") + set(_SYMBOLIZATION_BUILD_PROFILE "release") + set(_SYMBOLIZATION_BUILD_ARGS --release) +else() + set(_SYMBOLIZATION_BUILD_PROFILE "debug") +endif() + add_custom_command(OUTPUT ${_SYMBOLIZATION_LIBRARY} - COMMENT "Building symbolization" - COMMAND cargo build --release --target-dir ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Building symbolization using ${_SYMBOLIZATION_BUILD_PROFILE} profile" + COMMAND cargo build ${_SYMBOLIZATION_BUILD_ARGS} --target-dir ${CMAKE_CURRENT_BINARY_DIR} VERBATIM + USES_TERMINAL WORKING_DIRECTORY ${_SYMBOLIZATION_SOURCE_DIR} DEPENDS "${_SYMBOLIZATION_SRCS}" "${_SYMBOLIZATION_HEADERS}" From 9d5b94ef690ae9c701dbbbeeb018663f9479ad6b Mon Sep 17 00:00:00 2001 From: Wish Date: Tue, 13 Sep 2022 14:21:03 +0800 Subject: [PATCH 02/14] Respect SAN_DEBUG Signed-off-by: Wish --- contrib/tiflash-proxy-cmake/CMakeLists.txt | 8 ++++---- libs/libsymbolization/CMakeLists.txt | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt index ead57014c96..c7372235cb3 100644 --- a/contrib/tiflash-proxy-cmake/CMakeLists.txt +++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt @@ -3,12 +3,12 @@ set(_TIFLASH_PROXY_LIBRARY "${_TIFLASH_PROXY_SOURCE_DIR}/target/release/${CMAKE_ file(GLOB_RECURSE _TIFLASH_PROXY_SRCS "${_TIFLASH_PROXY_SOURCE_DIR}/*.rs") list(FILTER _TIFLASH_PROXY_SRCS EXCLUDE REGEX ${_TIFLASH_PROXY_SOURCE_DIR}/target/.*) -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") - set(_TIFLASH_PROXY_BUILD_PROFILE "release") - set(_TIFLASH_PROXY_MAKE_COMMAND make release) -else() +if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR SAN_DEBUG) set(_TIFLASH_PROXY_BUILD_PROFILE "debug") set(_TIFLASH_PROXY_MAKE_COMMAND make debug) +else() + set(_TIFLASH_PROXY_BUILD_PROFILE "release") + set(_TIFLASH_PROXY_MAKE_COMMAND make release) endif() # use `CFLAGS=-w CXXFLAGS=-w` to inhibit warning messages. diff --git a/libs/libsymbolization/CMakeLists.txt b/libs/libsymbolization/CMakeLists.txt index 48e9da9864e..e036c62449b 100644 --- a/libs/libsymbolization/CMakeLists.txt +++ b/libs/libsymbolization/CMakeLists.txt @@ -17,11 +17,11 @@ set(_SYMBOLIZATION_LIBRARY "${CMAKE_CURRENT_BINARY_DIR}/release/${CMAKE_STATIC_L file(GLOB_RECURSE _SYMBOLIZATION_SRCS "${_SYMBOLIZATION_SOURCE_DIR}/src/*.rs") file(GLOB_RECURSE _SYMBOLIZATION_HEADERS "${_SYMBOLIZATION_SOURCE_DIR}/include/*.h") -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") +if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR SAN_DEBUG) + set(_SYMBOLIZATION_BUILD_PROFILE "debug") +else() set(_SYMBOLIZATION_BUILD_PROFILE "release") set(_SYMBOLIZATION_BUILD_ARGS --release) -else() - set(_SYMBOLIZATION_BUILD_PROFILE "debug") endif() add_custom_command(OUTPUT ${_SYMBOLIZATION_LIBRARY} From b65b1983ce20434fb3b8fe911b1102dbf1debbb8 Mon Sep 17 00:00:00 2001 From: Wish Date: Tue, 13 Sep 2022 14:44:10 +0800 Subject: [PATCH 03/14] Fix linking debug libraries --- contrib/tiflash-proxy-cmake/CMakeLists.txt | 10 +++++----- libs/libsymbolization/CMakeLists.txt | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt index c7372235cb3..556cf02ccf3 100644 --- a/contrib/tiflash-proxy-cmake/CMakeLists.txt +++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt @@ -1,8 +1,3 @@ -set(_TIFLASH_PROXY_SOURCE_DIR "${TiFlash_SOURCE_DIR}/contrib/tiflash-proxy") -set(_TIFLASH_PROXY_LIBRARY "${_TIFLASH_PROXY_SOURCE_DIR}/target/release/${CMAKE_SHARED_LIBRARY_PREFIX}tiflash_proxy${CMAKE_SHARED_LIBRARY_SUFFIX}") -file(GLOB_RECURSE _TIFLASH_PROXY_SRCS "${_TIFLASH_PROXY_SOURCE_DIR}/*.rs") -list(FILTER _TIFLASH_PROXY_SRCS EXCLUDE REGEX ${_TIFLASH_PROXY_SOURCE_DIR}/target/.*) - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR SAN_DEBUG) set(_TIFLASH_PROXY_BUILD_PROFILE "debug") set(_TIFLASH_PROXY_MAKE_COMMAND make debug) @@ -11,6 +6,11 @@ else() set(_TIFLASH_PROXY_MAKE_COMMAND make release) endif() +set(_TIFLASH_PROXY_SOURCE_DIR "${TiFlash_SOURCE_DIR}/contrib/tiflash-proxy") +set(_TIFLASH_PROXY_LIBRARY "${_TIFLASH_PROXY_SOURCE_DIR}/target/${_TIFLASH_PROXY_BUILD_PROFILE}/${CMAKE_SHARED_LIBRARY_PREFIX}tiflash_proxy${CMAKE_SHARED_LIBRARY_SUFFIX}") +file(GLOB_RECURSE _TIFLASH_PROXY_SRCS "${_TIFLASH_PROXY_SOURCE_DIR}/*.rs") +list(FILTER _TIFLASH_PROXY_SRCS EXCLUDE REGEX ${_TIFLASH_PROXY_SOURCE_DIR}/target/.*) + # use `CFLAGS=-w CXXFLAGS=-w` to inhibit warning messages. if (TIFLASH_LLVM_TOOLCHAIN) set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} "CFLAGS=-w -fuse-ld=lld" "CXXFLAGS=-w -fuse-ld=lld -stdlib=libc++") diff --git a/libs/libsymbolization/CMakeLists.txt b/libs/libsymbolization/CMakeLists.txt index e036c62449b..78dae44199a 100644 --- a/libs/libsymbolization/CMakeLists.txt +++ b/libs/libsymbolization/CMakeLists.txt @@ -12,11 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -set(_SYMBOLIZATION_SOURCE_DIR "${TiFlash_SOURCE_DIR}/libs/libsymbolization") -set(_SYMBOLIZATION_LIBRARY "${CMAKE_CURRENT_BINARY_DIR}/release/${CMAKE_STATIC_LIBRARY_PREFIX}symbolization${CMAKE_STATIC_LIBRARY_SUFFIX}") -file(GLOB_RECURSE _SYMBOLIZATION_SRCS "${_SYMBOLIZATION_SOURCE_DIR}/src/*.rs") -file(GLOB_RECURSE _SYMBOLIZATION_HEADERS "${_SYMBOLIZATION_SOURCE_DIR}/include/*.h") - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR SAN_DEBUG) set(_SYMBOLIZATION_BUILD_PROFILE "debug") else() @@ -24,6 +19,11 @@ else() set(_SYMBOLIZATION_BUILD_ARGS --release) endif() +set(_SYMBOLIZATION_SOURCE_DIR "${TiFlash_SOURCE_DIR}/libs/libsymbolization") +set(_SYMBOLIZATION_LIBRARY "${CMAKE_CURRENT_BINARY_DIR}/${_SYMBOLIZATION_BUILD_PROFILE}/${CMAKE_STATIC_LIBRARY_PREFIX}symbolization${CMAKE_STATIC_LIBRARY_SUFFIX}") +file(GLOB_RECURSE _SYMBOLIZATION_SRCS "${_SYMBOLIZATION_SOURCE_DIR}/src/*.rs") +file(GLOB_RECURSE _SYMBOLIZATION_HEADERS "${_SYMBOLIZATION_SOURCE_DIR}/include/*.h") + add_custom_command(OUTPUT ${_SYMBOLIZATION_LIBRARY} COMMENT "Building symbolization using ${_SYMBOLIZATION_BUILD_PROFILE} profile" COMMAND cargo build ${_SYMBOLIZATION_BUILD_ARGS} --target-dir ${CMAKE_CURRENT_BINARY_DIR} From 18296cee4e8bb0374ba1abaeee58db1120487db3 Mon Sep 17 00:00:00 2001 From: Wish Date: Sun, 18 Sep 2022 18:03:21 +0800 Subject: [PATCH 04/14] Disable overflow checks for libsymbolization Signed-off-by: Wish --- libs/libsymbolization/Cargo.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libs/libsymbolization/Cargo.toml b/libs/libsymbolization/Cargo.toml index ce8912dc347..31ee06e55c4 100644 --- a/libs/libsymbolization/Cargo.toml +++ b/libs/libsymbolization/Cargo.toml @@ -13,3 +13,10 @@ crate-type = ["staticlib"] backtrace = "0.3" findshlibs = "0.10" lazy_static = "1.4" + +[profile.dev] +overflow-checks = false + +[profile.release] +debug = true +overflow-checks = false From 5093e3a7e0ac8e6e3d85515938e3256fdb29b2fa Mon Sep 17 00:00:00 2001 From: Wish Date: Mon, 19 Sep 2022 11:04:18 +0800 Subject: [PATCH 05/14] cmake: Add find-rust Signed-off-by: Wish --- CMakeLists.txt | 2 ++ cmake/find_rust.cmake | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 cmake/find_rust.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index d7c00eb2e9b..3f603bf0f5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,8 @@ if(CMAKE_PREFIX_PATH) "${CMAKE_PREFIX_PATH}/lib:${CMAKE_PREFIX_PATH}/lib/x86_64-unknown-linux-gnu/:${CMAKE_PREFIX_PATH}/lib/aarch64-unknown-linux-gnu/") endif() +include (cmake/find_rust.cmake) + message (STATUS "Using CXX=${CMAKE_CXX_COMPILER}, ver=${CMAKE_CXX_COMPILER_VERSION};CC=${CMAKE_C_COMPILER}, ver=${CMAKE_C_COMPILER_VERSION}") if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # Require at least gcc 7 diff --git a/cmake/find_rust.cmake b/cmake/find_rust.cmake new file mode 100644 index 00000000000..f903b7cd079 --- /dev/null +++ b/cmake/find_rust.cmake @@ -0,0 +1,19 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +find_program (CARGO_FOUND cargo) + +if (NOT CARGO_FOUND) + message (FATAL_ERROR "Cargo not found in PATH. Maybe you have not installed the Rust toolchain? See https://rustup.rs") +endif () From 2c8c5fe280eb3ae2791f4bce00b8ff776e86a6a6 Mon Sep 17 00:00:00 2001 From: Wish Date: Mon, 19 Sep 2022 17:13:11 +0800 Subject: [PATCH 06/14] Refactor README Signed-off-by: Wish --- README.md | 227 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 134 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index ab996b6f3d6..88ce53e8a5e 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,9 @@ See [Quick Start with HTAP](https://docs.pingcap.com/tidb/stable/quick-start-wit ## Build TiFlash -TiFlash supports building on the following hardware architectures: +TiFlash can be built on the following hardware architectures: -- x86-64/amd64 +- x86-64 / amd64 - aarch64 And the following operating systems: @@ -32,116 +32,129 @@ And the following operating systems: - Linux - MacOS -### 1. Checkout Source Code +### 1. Prepare Prerequisites -Assume `$WORKSPACE` to be the directory under which the TiFlash repo is placed. - -```shell -cd $WORKSPACE -git clone https://github.com/pingcap/tiflash.git --recursive -j 20 -``` - -### 2. Prepare Prerequisites - -The following packages are needed for all platforms: +The following packages are required: - CMake 3.21.0+ - -- Rust: Recommended to use [rustup](https://rustup.rs) to install: - - ```shell - curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal --default-toolchain nightly - source $HOME/.cargo/env - ``` - +- Clang 13.0.0+ +- Rust - Python 3.0+ +- Ninja-Build or GNU Make -- Ninja or GNU Make - -The following are platform-specific prerequisites. Click to expand details: +Detailed steps for each platform are listed below.
-Linux specific prerequisites +Ubuntu / Debian -TiFlash can be built using either LLVM or GCC toolchain on Linux. LLVM toolchain is our official one for releasing. +```shell +sudo apt update -> But for GCC, only GCC 7.x is supported as far, and is not planned to be a long term support. So it may get broken some day, silently. +# Install Rust toolchain, see https://rustup.rs for details +curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain none +source $HOME/.cargo/env -- LLVM 13.0.0+ +# Install LLVM, see https://apt.llvm.org for details +# Clang will be available as /usr/bin/clang++-14 +wget https://apt.llvm.org/llvm.sh +chmod +x llvm.sh +sudo ./llvm.sh 14 all - TiFlash compiles using full LLVM toolchain (`clang/compiler-rt/libc++/libc++abi`) by default. You can use a system-wise toolchain if `clang/compiler-rt/libc++/libc++abi` can be installed in your environment. +# Install other dependencies +sudo apt install -y cmake ninja-build zlib1g-dev libcurl4-openssl-dev +``` - Click sections below to see detailed instructions: +**Note for Ubuntu 18.04 and Ubuntu 20.04:** -
- Set up LLVM via package managers in Debian/Ubuntu +The default installed cmake may be not recent enough. You can install a newer cmake from the [Kitware APT Repository](https://apt.kitware.com): - ```shell - # add LLVM repo key - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - +```shell +sudo apt install -y software-properties-common lsb-release +wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null +sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" +sudo apt update +sudo apt install -y cmake +``` - # install LLVM packages, and can find more detailed instructions in https://apt.llvm.org/ when failed - apt-get install clang-13 lldb-13 lld-13 clang-tools-13 clang-13-doc libclang-common-13-dev libclang-13-dev libclang1-13 clang-format-13 clangd-13 clang-tidy-13 libc++-13-dev libc++abi-13-dev libomp-13-dev llvm-13-dev libfuzzer-13-dev +**If you are facing "ld.lld: error: duplicate symbol: ssl3_cbc_digest_record":** - # install other dependencies - apt-get install lcov cmake ninja-build libssl-dev zlib1g-dev libcurl4-openssl-dev - ``` +It is likely caused by you have an pre-installed libssl3 where TiFlash prefers libssl1. TiFlash has vendored libssl1, so that you can simply remove the one in the system to make compiling work: -
+```shell +sudo apt remove libssl-dev +``` -
- Set up LLVM via package managers in Archlinux +
- ```shell - # install compilers and dependencies - sudo pacman -S clang libc++ libc++abi compiler-rt openmp lcov cmake ninja curl openssl zlib - ``` +
+Archlinux -
+```shell +# Install Rust toolchain, see https://rustup.rs for details +curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain none +source $HOME/.cargo/env -- GCC 7.x +# Install compilers and dependencies +sudo pacman -S clang libc++ libc++abi compiler-rt openmp lcov cmake ninja curl openssl zlib +``` - > **WARNING**: This support may not be maintained in the future. +
- TiFlash compiles on GCC 7.x (no older, nor newer) only because it hasn't been broken. If you have GCC 7.x, you are probably fine, for now. +
+CentOS 7 + +Please refer to [release-centos7-llvm/env/prepare-sysroot.sh](./release-centos7-llvm/env/prepare-sysroot.sh)
- MacOS specific prerequisites +MacOS -- Apple Clang 12.0.0+ -- OpenSSL 1.1 +```shell +# Install Rust toolchain, see https://rustup.rs for details +curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain none +source $HOME/.cargo/env - ```shell - brew install openssl@1.1 - ``` +# Install compilers +xcode-select --install + +# Install other dependencies +brew install ninja cmake openssl@1.1 +```
-### 3. Build +### 2. Checkout Source Code + +```shell +git clone https://github.com/pingcap/tiflash.git --recursive -j 20 +cd tiflash +``` -Assume `$BUILD` to be the directory under which you want to build TiFlash. +### 3. Build -For Ninja: +To build TiFlash for development: ```shell -cd $BUILD -cmake $WORKSPACE/tiflash -GNinja +# In the TiFlash repository root: +mkdir cmake-build # The directory name can be customized +cd cmake-build + +cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON + ninja tiflash ``` -For GNU Make: +Note: In Linux, usually you need to specify using LLVM instead of the default GCC: ```shell -cd $BUILD -cmake $WORKSPACE/tiflash -make tiflash -j +# In cmake-build directory: +cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON \ + -DCMAKE_C_COMPILER=/usr/bin/clang-14 \ + -DCMAKE_CXX_COMPILER=/usr/bin/clang++-14 ``` -> **NOTE**: Option `-j` (defaults to your system CPU core count, otherwise you can optionally specify a number) is used to control the build parallelism. Higher parallelism consumes more memory. If you encounter compiler OOM or hang, try to lower the parallelism by specifying a reasonable number, e.g., half of your system CPU core count or even smaller, after `-j`, depending on the available memory in your system. - -After building, you can get TiFlash binary under `$BUILD/dbms/src/Server/tiflash`. +After building, you can get TiFlash binary under `tiflash/cmake-build/dbms/src/Server/tiflash`. ### Build Options @@ -150,7 +163,9 @@ TiFlash has several CMake build options to tweak for development purposes. These To tweat options, pass one or multiple `-D...=...` args when invoking CMake, for example: ```shell -cmake $WORKSPACE/tiflash -DCMAKE_BUILD_TYPE=DEBUG +# In cmake-build directory: +cmake .. -GNinja -DCMAKE_BUILD_TYPE=RELEASE + ^^^^^^^^^^^^^^^^^^^^^^^^^^ ``` - **Build Type**: @@ -163,9 +178,27 @@ cmake $WORKSPACE/tiflash -DCMAKE_BUILD_TYPE=DEBUG - **Build with Unit Tests**: + - `-DENABLE_TESTS=ON` + - `-DENABLE_TESTS=OFF`: Default - - `-DENABLE_TESTS=ON` +- **Build using GNU Make instead of ninja-build**: + +
+ Click to expand instructions + + To use GNU Make, simply don't pass `-GNinja` to cmake: + + ```shell + # In cmake-build directory: + cmake .. -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON + make tiflash -j + ``` + + > **NOTE**: Option `-j` (defaults to your system CPU core count, otherwise you can optionally specify a number) is used to control the build parallelism. Higher parallelism consumes more memory. If you encounter compiler OOM or hang, try to lower the parallelism by specifying a reasonable number, e.g., half of your system CPU core count or even smaller, after `-j`, depending on the available memory in your system. + +
+ - **Build with System Libraries**: @@ -182,7 +215,7 @@ cmake $WORKSPACE/tiflash -DCMAKE_BUILD_TYPE=DEBUG You can view these options along with their descriptions by running: ```shell - cd $BUILD + # In cmake-build directory: cmake -LH | grep "USE_INTERNAL" -A3 ``` @@ -208,17 +241,18 @@ cmake $WORKSPACE/tiflash -DCMAKE_BUILD_TYPE=DEBUG ## Run Unit Tests -To run unit tests, you need to build with `-DCMAKE_BUILD_TYPE=DEBUG`: +To run unit tests, you need to build with `-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON`: ```shell -cd $BUILD -cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=DEBUG +# In cmake-build directory: +cmake .. -GNinja \ + -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON ninja gtests_dbms # Most TiFlash unit tests ninja gtests_libdaemon # Settings related tests ninja gtests_libcommon ``` -And the unit-test executables are at `$BUILD/dbms/gtests_dbms`, `$BUILD/libs/libdaemon/src/tests/gtests_libdaemon` and `$BUILD/libs/libcommon/src/tests/gtests_libcommon`. +And the unit-test executables are at `cmake-build/dbms/gtests_dbms`, `cmake-build/libs/libdaemon/src/tests/gtests_libdaemon` and `cmake-build/libs/libcommon/src/tests/gtests_libcommon`. ## Run Sanitizer Tests @@ -227,8 +261,9 @@ TiFlash supports testing with thread sanitizer and address sanitizer. To generate unit test executables with sanitizer enabled: ```shell -cd $BUILD -cmake $WORKSPACE/tiflash -GNinja -DENABLE_TESTS=ON -DCMAKE_BUILD_TYPE=ASan # or TSan +# In cmake-build directory: +cmake .. -GNinja \ + -DENABLE_TESTS=ON -DCMAKE_BUILD_TYPE=ASan # or TSan ninja gtests_dbms ninja gtests_libdaemon ninja gtests_libcommon @@ -237,22 +272,28 @@ ninja gtests_libcommon There are known false positives reported from leak sanitizer (which is included in address sanitizer). To suppress these errors, set the following environment variables before running the executables: ```shell -LSAN_OPTIONS=suppressions=$WORKSPACE/tiflash/test/sanitize/asan.suppression +LSAN_OPTIONS=suppressions=test/sanitize/asan.suppression ``` ## Run Integration Tests -1. Build your own tiflash binary in $BUILD with `-DCMAKE_BUILD_TYPE=DEBUG`. -``` -cd $BUILD -cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=DEBUG -ninja tiflash -``` -2. Run tidb cluster locally using tiup playgroud or other tools. -``` -tiup playground nightly --tiflash.binpath $BUILD/dbms/src/Server/tiflash -``` -3. Check $WORKSPACE/tests/_env.sh to make the port and build dir right. +1. Build your own TiFlash binary with `-DCMAKE_BUILD_TYPE=DEBUG`: + + ```shell + # In cmake-build directory: + cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG + ninja tiflash + ``` + +2. Run TiDB cluster locally with your own TiFlash binary using TiUP Playground: + + ```shell + # In cmake-build directory: + tiup playground nightly --tiflash.binpath dbms/src/Server/tiflash + ``` + +3. Check `tests/_env.sh` to make the port and build dir right. + 4. Run your integration tests using commands like "./run-test.sh fullstack-test2/ddl" under $WORKSPACE/tests dir ## Run MicroBenchmark Tests @@ -262,7 +303,7 @@ To run micro benchmark tests, you need to build with -DCMAKE_BUILD_TYPE=RELEASE ```shell cd $BUILD cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON -ninja bench_dbms +ninja bench_dbms ``` And the microbenchmark-test executables are at `$BUILD/dbms/bench_dbms`, you can run it with `./bench_dbms` or `./bench_dbms --benchmark_filter=xxx` . More usage please check with `./bench_dbms --help`. @@ -282,8 +323,8 @@ Before submitting a pull request, please use [format-diff.py](format-diff.py) to > **NOTE**: It is required to use clang-format 12.0.0+. ```shell -cd $WORKSPACE/tiflash -python3 format-diff.py --diff_from `git merge-base ${TARGET_REMOTE_BRANCH} HEAD` +# In the TiFlash repository root: +python3 format-diff.py --diff_from `git merge-base origin/master HEAD` ``` ## License From d3148d815d979f12186fe5f27f45e76b955deb47 Mon Sep 17 00:00:00 2001 From: Wish Date: Mon, 19 Sep 2022 18:44:49 +0800 Subject: [PATCH 07/14] Give some default directory names by default Signed-off-by: Wish --- README.md | 122 +++++++++++++++++++++++++++++++--------------- tests/_env.sh | 25 +++++++--- tests/run-test.sh | 22 ++++++--- 3 files changed, 117 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 88ce53e8a5e..faa21448f44 100644 --- a/README.md +++ b/README.md @@ -78,12 +78,14 @@ sudo apt install -y cmake **If you are facing "ld.lld: error: duplicate symbol: ssl3_cbc_digest_record":** -It is likely caused by you have an pre-installed libssl3 where TiFlash prefers libssl1. TiFlash has vendored libssl1, so that you can simply remove the one in the system to make compiling work: +It is likely because you have a pre-installed libssl3 where TiFlash prefers libssl1. TiFlash has vendored libssl1, so that you can simply remove the one in the system to make compiling work: ```shell sudo apt remove libssl-dev ``` +If this doesn't work, please [file an issue](https://github.com/pingcap/tiflash/issues/new?assignees=&labels=type%2Fquestion&template=general-question.md). +
@@ -137,24 +139,24 @@ To build TiFlash for development: ```shell # In the TiFlash repository root: -mkdir cmake-build # The directory name can be customized -cd cmake-build +mkdir cmake-build-debug # The directory name can be customized +cd cmake-build-debug -cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON +cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG ninja tiflash ``` -Note: In Linux, usually you need to specify using LLVM instead of the default GCC: +Note: In Linux, usually you need to explicitly specify to use LLVM. Otherwise, the default compiler will be GCC: ```shell -# In cmake-build directory: -cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON \ +# In cmake-build-debug directory: +cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG \ -DCMAKE_C_COMPILER=/usr/bin/clang-14 \ -DCMAKE_CXX_COMPILER=/usr/bin/clang++-14 ``` -After building, you can get TiFlash binary under `tiflash/cmake-build/dbms/src/Server/tiflash`. +After building, you can get TiFlash binary in `dbms/src/Server/tiflash` in the `cmake-build-debug` directory. ### Build Options @@ -163,9 +165,9 @@ TiFlash has several CMake build options to tweak for development purposes. These To tweat options, pass one or multiple `-D...=...` args when invoking CMake, for example: ```shell -# In cmake-build directory: -cmake .. -GNinja -DCMAKE_BUILD_TYPE=RELEASE - ^^^^^^^^^^^^^^^^^^^^^^^^^^ +cd cmake-build-debug +cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG -DFOO=BAR + ^^^^^^^^^ ``` - **Build Type**: @@ -176,11 +178,13 @@ cmake .. -GNinja -DCMAKE_BUILD_TYPE=RELEASE - `-DCMAKE_BUILD_TYPE=RELEASE`: Release build + Usually you may want to use different build directories for different build types, e.g. a new build directory named `cmake-build-release` for the release build, so that compile unit cache will not be invalidated when you switch between different build types. + - **Build with Unit Tests**: - - `-DENABLE_TESTS=ON` + - `-DENABLE_TESTS=ON`: Enable unit tests (enabled by default in debug profile) - - `-DENABLE_TESTS=OFF`: Default + - `-DENABLE_TESTS=OFF`: Disable unit tests (default in release profile) - **Build using GNU Make instead of ninja-build**: @@ -190,15 +194,14 @@ cmake .. -GNinja -DCMAKE_BUILD_TYPE=RELEASE To use GNU Make, simply don't pass `-GNinja` to cmake: ```shell - # In cmake-build directory: - cmake .. -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON + cd cmake-build-debug + cmake .. -DCMAKE_BUILD_TYPE=DEBUG make tiflash -j ``` > **NOTE**: Option `-j` (defaults to your system CPU core count, otherwise you can optionally specify a number) is used to control the build parallelism. Higher parallelism consumes more memory. If you encounter compiler OOM or hang, try to lower the parallelism by specifying a reasonable number, e.g., half of your system CPU core count or even smaller, after `-j`, depending on the available memory in your system. -
- + - **Build with System Libraries**: @@ -215,7 +218,7 @@ cmake .. -GNinja -DCMAKE_BUILD_TYPE=RELEASE You can view these options along with their descriptions by running: ```shell - # In cmake-build directory: + cd cmake-build-debug cmake -LH | grep "USE_INTERNAL" -A3 ``` @@ -241,29 +244,38 @@ cmake .. -GNinja -DCMAKE_BUILD_TYPE=RELEASE ## Run Unit Tests -To run unit tests, you need to build with `-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON`: +Unit tests are automatically enabled in debug profile. To build these unit tests: ```shell -# In cmake-build directory: -cmake .. -GNinja \ - -DCMAKE_BUILD_TYPE=DEBUG -DENABLE_TESTS=ON +cd cmake-build-debug +cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG ninja gtests_dbms # Most TiFlash unit tests ninja gtests_libdaemon # Settings related tests ninja gtests_libcommon ``` -And the unit-test executables are at `cmake-build/dbms/gtests_dbms`, `cmake-build/libs/libdaemon/src/tests/gtests_libdaemon` and `cmake-build/libs/libcommon/src/tests/gtests_libcommon`. +Then, to run these unit tests: + +```shell +cd cmake-build-debug +./dbms/gtests_dbms +./libs/libdaemon/src/tests/gtests_libdaemon +./libs/libcommon/src/tests/gtests_libcommon +``` + +More usages are available via `./dbms/gtests_dbms --help`. ## Run Sanitizer Tests TiFlash supports testing with thread sanitizer and address sanitizer. -To generate unit test executables with sanitizer enabled: +To build unit test executables with sanitizer enabled: ```shell -# In cmake-build directory: -cmake .. -GNinja \ - -DENABLE_TESTS=ON -DCMAKE_BUILD_TYPE=ASan # or TSan +# In the TiFlash repository root: +mkdir cmake-build-sanitizer +cd cmake-build-sanitizer +cmake .. -GNinja -DENABLE_TESTS=ON -DCMAKE_BUILD_TYPE=ASan # or TSan ninja gtests_dbms ninja gtests_libdaemon ninja gtests_libcommon @@ -277,36 +289,70 @@ LSAN_OPTIONS=suppressions=test/sanitize/asan.suppression ## Run Integration Tests -1. Build your own TiFlash binary with `-DCMAKE_BUILD_TYPE=DEBUG`: +1. Build your own TiFlash binary using debug profile: ```shell - # In cmake-build directory: + cd cmake-build-debug cmake .. -GNinja -DCMAKE_BUILD_TYPE=DEBUG ninja tiflash ``` -2. Run TiDB cluster locally with your own TiFlash binary using TiUP Playground: +2. Start a local TiDB cluster with your own TiFlash binary using TiUP: ```shell - # In cmake-build directory: - tiup playground nightly --tiflash.binpath dbms/src/Server/tiflash + cd cmake-build-debug + tiup playground nightly --tiflash.binpath ./dbms/src/Server/tiflash + + # Or using a more stable cluster version: + # tiup playground v6.1.0 --tiflash.binpath dbms/src/Server/tiflash ``` -3. Check `tests/_env.sh` to make the port and build dir right. + [TiUP](https://tiup.io) is the TiDB component manager. If you don't have one, you can install it via: + + ```shell + curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh + ``` -4. Run your integration tests using commands like "./run-test.sh fullstack-test2/ddl" under $WORKSPACE/tests dir + If you are not running the cluster using the default port (for example, you run multiple clusters), make sure that the port and build directory in `tests/_env.sh` are correct. + +3. Run integration tests: + + ```shell + # In the TiFlash repository root: + cd tests + ./run-test.sh + + # Or run specific integration test: + # ./run-test.sh fullstack-test2/ddl + ``` + +Note: some integration tests (namely, tests under `delta-merge-test`) requires a standalone TiFlash service without a TiDB cluster, otherwise they will fail. + +To run these integration tests: TBD ## Run MicroBenchmark Tests -To run micro benchmark tests, you need to build with -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON: +To build micro benchmark tests, you need release profile and tests enabled: ```shell -cd $BUILD -cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON +# In the TiFlash repository root: +mkdir cmake-build-release +cd cmake-build-release +cmake .. -GNinja -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON ninja bench_dbms ``` -And the microbenchmark-test executables are at `$BUILD/dbms/bench_dbms`, you can run it with `./bench_dbms` or `./bench_dbms --benchmark_filter=xxx` . More usage please check with `./bench_dbms --help`. +Then, to run these micro benchmarks: + +```shell +cd cmake-build-release +./dbms/bench_dbms + +# Or run with filter: +# ./dbms/bench_dbms --benchmark_filter=xxx +``` + +More usages are available via `./dbms/bench_dbms --help`. ## Generate LLVM Coverage Report diff --git a/tests/_env.sh b/tests/_env.sh index 5482118196f..8a3e9659850 100644 --- a/tests/_env.sh +++ b/tests/_env.sh @@ -16,14 +16,25 @@ # Executable path -if [ `uname` == "Darwin" ]; then - export build_dir="../../build_clang" -else - export build_dir="../../build" -fi -# export build_dir="../cmake-build-debug" +# Try with some common build +TIFLASH_PATH="dbms/src/Server/tiflash" + +if [ -z ${storage_bin+x} ]; then + if [ -f "../cmake-build-debug/${TIFLASH_PATH}" ]; then + build_dir="../cmake-build-debug" + elif [ -f "../../build_clang/${TIFLASH_PATH}" ]; then + # Used in CI + build_dir="../../build_clang" + elif [ -f "../../build/${TIFLASH_PATH}" ]; then + # Used in CI + build_dir="../../build" + else + echo 'Error: Cannot find TiFlash binary. Specify via: export storage_bin=xxx' >&2 + exit 1 + fi -export storage_bin="$build_dir/dbms/src/Server/tiflash" + export storage_bin="$build_dir/dbms/src/Server/tiflash" +fi # Server address for connecting export storage_server="127.0.0.1" diff --git a/tests/run-test.sh b/tests/run-test.sh index 514c0418d38..360ae34bb40 100755 --- a/tests/run-test.sh +++ b/tests/run-test.sh @@ -25,13 +25,13 @@ function get_elapse_s() # time format:$(date +"%s.%N"), such as 1662367015.453429263 start_time=$1 end_time=$2 - + start_s=${start_time%.*} start_nanos=${start_time#*.} end_s=${end_time%.*} end_nanos=${end_time#*.} - - # end_nanos > start_nanos? + + # end_nanos > start_nanos? # Another way, the time part may start with 0, which means # it will be regarded as oct format, use "10#" to ensure # calculateing with decimal @@ -39,9 +39,9 @@ function get_elapse_s() end_s=$(( 10#$end_s - 1 )) end_nanos=$(( 10#$end_nanos + 10**9 )) fi - + elapse_s=$(( 10#$end_s - 10#$start_s )).`printf "%03d\n" $(( (10#$end_nanos - 10#$start_nanos)/10**6 ))` - + echo $elapse_s } @@ -142,8 +142,16 @@ set -e # Export the `PY` env so that it can be # used when the function `wait_table` is # called from subprocess. -export PY="python2" -# export PY="python3" +if [ -x "$(command -v python3)" ]; then + export PY="python3" +elif [ -x "$(command -v python2)" ]; then + export PY="python2" +elif [ -x "$(command -v python)" ]; then + export PY="python" +else + echo 'Error: python not found in PATH.' >&2 + exit 1 +fi target="$1" fullstack="$2" From 38d59f1dc485a42a9f7f529a0af1ac04c5f4f19d Mon Sep 17 00:00:00 2001 From: Wish Date: Mon, 19 Sep 2022 21:20:32 +0800 Subject: [PATCH 08/14] Seems that CI is using another env file. Let's try to make it simple. Signed-off-by: Wish --- README.md | 4 +--- tests/_env.sh | 23 ++++++++--------------- tests/run-test.sh | 18 +++++++++--------- 3 files changed, 18 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index faa21448f44..dc15533f66b 100644 --- a/README.md +++ b/README.md @@ -326,9 +326,7 @@ LSAN_OPTIONS=suppressions=test/sanitize/asan.suppression # ./run-test.sh fullstack-test2/ddl ``` -Note: some integration tests (namely, tests under `delta-merge-test`) requires a standalone TiFlash service without a TiDB cluster, otherwise they will fail. - -To run these integration tests: TBD +Note: some integration tests (namely, tests under `delta-merge-test`) requires a standalone TiFlash service without a TiDB cluster, otherwise they will fail. To run these integration tests: TBD ## Run MicroBenchmark Tests diff --git a/tests/_env.sh b/tests/_env.sh index 8a3e9659850..6559892b8b1 100644 --- a/tests/_env.sh +++ b/tests/_env.sh @@ -16,24 +16,17 @@ # Executable path -# Try with some common build +# Try with some common build path TIFLASH_PATH="dbms/src/Server/tiflash" if [ -z ${storage_bin+x} ]; then - if [ -f "../cmake-build-debug/${TIFLASH_PATH}" ]; then - build_dir="../cmake-build-debug" - elif [ -f "../../build_clang/${TIFLASH_PATH}" ]; then - # Used in CI - build_dir="../../build_clang" - elif [ -f "../../build/${TIFLASH_PATH}" ]; then - # Used in CI - build_dir="../../build" - else - echo 'Error: Cannot find TiFlash binary. Specify via: export storage_bin=xxx' >&2 - exit 1 - fi - - export storage_bin="$build_dir/dbms/src/Server/tiflash" + if [ -f "../cmake-build-debug/${TIFLASH_PATH}" ]; then + build_dir="../cmake-build-debug" + else + echo 'Error: Cannot find TiFlash binary. Specify via: export storage_bin=xxx' >&2 + exit 1 + fi + export storage_bin="$build_dir/dbms/src/Server/tiflash" fi # Server address for connecting diff --git a/tests/run-test.sh b/tests/run-test.sh index 360ae34bb40..4e04d374ca0 100755 --- a/tests/run-test.sh +++ b/tests/run-test.sh @@ -149,8 +149,8 @@ elif [ -x "$(command -v python2)" ]; then elif [ -x "$(command -v python)" ]; then export PY="python" else - echo 'Error: python not found in PATH.' >&2 - exit 1 + echo 'Error: python not found in PATH.' >&2 + exit 1 fi target="$1" @@ -208,13 +208,13 @@ fi mysql_client="mysql -u root -P $tidb_port -h $tidb_server -e" if [ "$fullstack" = true ]; then - mysql -u root -P $tidb_port -h $tidb_server -e "create database if not exists $tidb_db" - sleep 10 - if [ $? != 0 ]; then - echo "create database '"$tidb_db"' failed" >&2 - exit 1 - fi - ${PY} generate-fullstack-test.py "$tidb_db" "$tidb_table" + mysql -u root -P $tidb_port -h $tidb_server -e "create database if not exists $tidb_db" + sleep 10 + if [ $? != 0 ]; then + echo "create database '"$tidb_db"' failed" >&2 + exit 1 + fi + ${PY} generate-fullstack-test.py "$tidb_db" "$tidb_table" fi run_path "$dbc" "$target" "$continue_on_error" "$fuzz" "$skip_raw_test" "$mysql_client" "$verbose" From 5b8c686c11a848946fcf954b4b5eff8d530c66f6 Mon Sep 17 00:00:00 2001 From: Wish Date: Wed, 21 Sep 2022 23:10:39 +0800 Subject: [PATCH 09/14] Introduce a new framework for DMStore similar to Segment test Signed-off-by: Wish --- .../tests/gtest_dm_delta_merge_store.cpp | 159 +-------- .../tests/gtest_dm_simple_pk_test_basic.cpp | 310 ++++++++++++++++++ .../tests/gtest_dm_simple_pk_test_basic.h | 104 ++++++ .../tests/gtest_dm_store_background.cpp | 171 ++++++++++ libs/libsymbolization/CMakeLists.txt | 9 +- 5 files changed, 589 insertions(+), 164 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp create mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h create mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 16e8c25dbcb..a510d74391f 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -35,8 +36,6 @@ #include #include -#include "Storages/DeltaMerge/RowKeyRange.h" - namespace DB { namespace FailPoints @@ -3383,162 +3382,6 @@ try CATCH -class DeltaMergeStoreBackgroundTest - : public DB::base::TiFlashStorageTestBasic -{ -public: - void SetUp() override - { - FailPointHelper::enableFailPoint(FailPoints::gc_skip_update_safe_point); - - try - { - TiFlashStorageTestBasic::SetUp(); - setupDMStore(); - // Split into 4 segments. - helper = std::make_unique(*db_context); - helper->prepareSegments(store, 50, DMTestEnv::PkType::CommonHandle); - } - CATCH - } - - void TearDown() override - { - TiFlashStorageTestBasic::TearDown(); - FailPointHelper::disableFailPoint(FailPoints::gc_skip_update_safe_point); - } - - void setupDMStore() - { - auto cols = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::CommonHandle); - store = std::make_shared(*db_context, - false, - "test", - DB::base::TiFlashStorageTestBasic::getCurrentFullTestName(), - 101, - *cols, - (*cols)[0], - true, - 1, - DeltaMergeStore::Settings()); - dm_context = store->newDMContext(*db_context, db_context->getSettingsRef(), DB::base::TiFlashStorageTestBasic::getCurrentFullTestName()); - } - -protected: - std::unique_ptr helper{}; - DeltaMergeStorePtr store; - DMContextPtr dm_context; -}; - -TEST_F(DeltaMergeStoreBackgroundTest, GCWillMergeMultipleSegments) -try -{ - ASSERT_EQ(store->segments.size(), 4); - auto gc_n = store->onSyncGc(1); - ASSERT_EQ(store->segments.size(), 1); - ASSERT_EQ(gc_n, 1); -} -CATCH - -TEST_F(DeltaMergeStoreBackgroundTest, GCOnlyMergeSmallSegments) -try -{ - UInt64 gc_n = 0; - - // Note: initially we have 4 segments, each segment contains 50 rows. - - ASSERT_EQ(store->segments.size(), 4); - db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 10; - gc_n = store->onSyncGc(100); - ASSERT_EQ(store->segments.size(), 4); - ASSERT_EQ(gc_n, 0); - - // In this case, merge two segments will exceed small_segment_rows, so no merge will happen - db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 55 * 3; - gc_n = store->onSyncGc(100); - ASSERT_EQ(store->segments.size(), 4); - ASSERT_EQ(gc_n, 0); - - // In this case, we will only merge two segments and then stop. - // [50, 50, 50, 50] => [100, 100] - db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 105 * 3; - gc_n = store->onSyncGc(100); - ASSERT_EQ(store->segments.size(), 2); - ASSERT_EQ(gc_n, 2); - helper->resetExpectedRows(); - ASSERT_EQ(helper->rows_by_segments[0], 100); - ASSERT_EQ(helper->rows_by_segments[1], 100); - - gc_n = store->onSyncGc(100); - ASSERT_EQ(store->segments.size(), 2); - ASSERT_EQ(gc_n, 0); - helper->verifyExpectedRowsForAllSegments(); -} -CATCH - -TEST_F(DeltaMergeStoreBackgroundTest, GCMergeAndStop) -try -{ - UInt64 gc_n = 0; - - // Note: initially we have 4 segments, each segment contains 50 rows. - - ASSERT_EQ(store->segments.size(), 4); - - // In this case, we will only merge two segments and then stop. - // [50, 50, 50, 50] => [100, 50, 50] - db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 105 * 3; - gc_n = store->onSyncGc(1); - ASSERT_EQ(store->segments.size(), 3); - ASSERT_EQ(gc_n, 1); - helper->resetExpectedRows(); - ASSERT_EQ(helper->rows_by_segments[0], 100); - ASSERT_EQ(helper->rows_by_segments[1], 50); - ASSERT_EQ(helper->rows_by_segments[2], 50); -} -CATCH - -TEST_F(DeltaMergeStoreBackgroundTest, GCMergeWhileFlushing) -try -{ - ASSERT_EQ(store->segments.size(), 4); - - Block block = DMTestEnv::prepareSimpleWriteBlock(0, 500, false, DMTestEnv::PkType::CommonHandle, 10 /* new tso */); - store->write(*db_context, db_context->getSettingsRef(), block); - - // Currently, when there is a flush in progress, the segment merge in GC thread will be blocked. - - auto sp_flush_commit = SyncPointCtl::enableInScope("before_ColumnFileFlushTask::commit"); - auto sp_merge_flush_retry = SyncPointCtl::enableInScope("before_DeltaMergeStore::segmentMerge|retry_flush"); - - auto th_flush = std::async([&]() { - auto result = store->segments.begin()->second->flushCache(*dm_context); - ASSERT_TRUE(result); - }); - - sp_flush_commit.waitAndPause(); - - auto th_gc = std::async([&]() { - auto gc_n = store->onSyncGc(1); - ASSERT_EQ(gc_n, 1); - ASSERT_EQ(store->segments.size(), 1); - }); - - // Expect merge triggered by GC is retrying... because there is a flush in progress. - sp_merge_flush_retry.waitAndPause(); - - // Finish the flush. - sp_flush_commit.next(); - sp_flush_commit.disable(); - th_flush.wait(); - - // The merge in GC should continue without any further retries. - sp_merge_flush_retry.next(); - th_gc.wait(); -} -CATCH - - } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp new file mode 100644 index 00000000000..15857ab9676 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp @@ -0,0 +1,310 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB +{ + +namespace FailPoints +{ +extern const char skip_check_segment_update[]; +} // namespace FailPoints + +namespace DM +{ + +namespace tests +{ + +void SimplePKTestBasic::reload() +{ + TiFlashStorageTestBasic::SetUp(); + + version = 0; + + auto cols = DMTestEnv::getDefaultColumns(is_common_handle ? DMTestEnv::PkType::CommonHandle : DMTestEnv::PkType::HiddenTiDBRowID); + store = std::make_shared(*db_context, + false, + "test", + DB::base::TiFlashStorageTestBasic::getCurrentFullTestName(), + 101, + *cols, + (*cols)[0], + is_common_handle, + 1, + DeltaMergeStore::Settings()); + dm_context = store->newDMContext(*db_context, db_context->getSettingsRef(), DB::base::TiFlashStorageTestBasic::getCurrentFullTestName()); +} + +SegmentPtr SimplePKTestBasic::getSegmentAt(Int64 key) const +{ + auto row_key = buildRowKey(key); + std::shared_lock lock(store->read_write_mutex); + auto segment_it = store->segments.upper_bound(row_key.toRowKeyValueRef()); + RUNTIME_CHECK(segment_it != store->segments.end()); + auto segment = segment_it->second; + RUNTIME_CHECK(store->isSegmentValid(lock, segment)); + return segment; +} + +void SimplePKTestBasic::ensureSegmentBreakpoints(const std::vector & breakpoints, bool use_logical_split) +{ + for (const auto & bp : breakpoints) + { + auto bp_key = buildRowKey(bp); + + while (true) + { + SegmentPtr segment; + { + std::shared_lock lock(store->read_write_mutex); + + auto segment_it = store->segments.upper_bound(bp_key.toRowKeyValueRef()); + RUNTIME_CHECK(segment_it != store->segments.end()); + segment = segment_it->second; + } + // The segment is already break at the boundary + if (compare(segment->getRowKeyRange().getStart(), bp_key.toRowKeyValueRef()) == 0) + break; + auto split_mode = use_logical_split ? DeltaMergeStore::SegmentSplitMode::Logical : DeltaMergeStore::SegmentSplitMode::Physical; + auto [left, right] = store->segmentSplit(*dm_context, segment, DeltaMergeStore::SegmentSplitReason::ForegroundWrite, bp_key, split_mode); + if (left) + break; + } + } +} + +std::vector SimplePKTestBasic::getSegmentBreakpoints() const +{ + std::vector breakpoints; + std::unique_lock lock(store->read_write_mutex); + for (auto it = std::next(store->segments.cbegin()); it != store->segments.cend(); it++) + { + auto [start, end] = parseRange(it->second->getRowKeyRange()); + breakpoints.push_back(start); + } + return breakpoints; +} + +RowKeyValue SimplePKTestBasic::buildRowKey(Int64 pk) const +{ + if (!is_common_handle) + return RowKeyValue::fromHandle(pk); + + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(pk, ss); + return RowKeyValue{true, std::make_shared(ss.releaseStr()), pk}; +} + +RowKeyRange SimplePKTestBasic::buildRowRange(Int64 start, Int64 end) const +{ + return RowKeyRange(buildRowKey(start), buildRowKey(end), is_common_handle, 1); +} + +std::pair SimplePKTestBasic::parseRange(const RowKeyRange & range) const +{ + Int64 start_key, end_key; + + if (!is_common_handle) + { + start_key = range.getStart().int_value; + end_key = range.getEnd().int_value; + return {start_key, end_key}; + } + + if (range.isStartInfinite()) + { + start_key = std::numeric_limits::min(); + } + else + { + EXPECT_EQ(range.getStart().data[0], TiDB::CodecFlagInt); + size_t cursor = 1; + start_key = DecodeInt64(cursor, String(range.getStart().data, range.getStart().size)); + } + if (range.isEndInfinite()) + { + end_key = std::numeric_limits::max(); + } + else + { + EXPECT_EQ(range.getEnd().data[0], TiDB::CodecFlagInt); + size_t cursor = 1; + end_key = DecodeInt64(cursor, String(range.getEnd().data, range.getEnd().size)); + } + + return {start_key, end_key}; +} + +Block SimplePKTestBasic::prepareWriteBlock(Int64 start_key, Int64 end_key, bool is_deleted) +{ + RUNTIME_CHECK(start_key <= end_key); + if (end_key == start_key) + return Block{}; + version++; + return DMTestEnv::prepareSimpleWriteBlock( + start_key, // + end_key, + false, + version, + DMTestEnv::pk_name, + EXTRA_HANDLE_COLUMN_ID, + is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, + is_common_handle, + 1, + true, + is_deleted); +} + +void SimplePKTestBasic::fill(Int64 start_key, Int64 end_key) +{ + auto block = prepareWriteBlock(start_key, end_key); + store->write(*db_context, db_context->getSettingsRef(), block); +} + +void SimplePKTestBasic::fillDelete(Int64 start_key, Int64 end_key) +{ + auto block = prepareWriteBlock(start_key, end_key, /* delete */ true); + store->write(*db_context, db_context->getSettingsRef(), block); +} + +void SimplePKTestBasic::flush(Int64 start_key, Int64 end_key) +{ + auto range = buildRowRange(start_key, end_key); + store->flushCache(*db_context, range, true); +} + +void SimplePKTestBasic::flush() +{ + auto range = RowKeyRange::newAll(is_common_handle, 1); + store->flushCache(*db_context, range, true); +} + +void SimplePKTestBasic::mergeDelta() +{ + store->mergeDeltaAll(*db_context); +} + +void SimplePKTestBasic::deleteRange(Int64 start_key, Int64 end_key) +{ + auto range = buildRowRange(start_key, end_key); + store->deleteRange(*db_context, db_context->getSettingsRef(), range); +} + +size_t SimplePKTestBasic::getRowsN() +{ + const auto & columns = store->getTableColumns(); + auto in = store->read( + *db_context, + db_context->getSettingsRef(), + columns, + {RowKeyRange::newAll(is_common_handle, 1)}, + /* num_streams= */ 1, + /* max_version= */ std::numeric_limits::max(), + EMPTY_FILTER, + "", + /* keep_order= */ false, + /* is_fast_mode= */ false, + /* expected_block_size= */ 1024)[0]; + return getInputStreamNRows(in); +} + +size_t SimplePKTestBasic::getRowsN(Int64 start_key, Int64 end_key) +{ + const auto & columns = store->getTableColumns(); + auto in = store->read( + *db_context, + db_context->getSettingsRef(), + columns, + {buildRowRange(start_key, end_key)}, + /* num_streams= */ 1, + /* max_version= */ std::numeric_limits::max(), + EMPTY_FILTER, + "", + /* keep_order= */ false, + /* is_fast_mode= */ false, + /* expected_block_size= */ 1024)[0]; + return getInputStreamNRows(in); +} + + +TEST_F(SimplePKTestBasic, FillAndRead) +try +{ + fill(0, 100); + EXPECT_EQ(100, getRowsN(-50, 200)); + EXPECT_EQ(80, getRowsN(20, 200)); + + fillDelete(40, 150); + EXPECT_EQ(40, getRowsN(-50, 200)); +} +CATCH + + +TEST_F(SimplePKTestBasic, SegmentBreakpoints) +try +{ + FailPointHelper::enableFailPoint(FailPoints::skip_check_segment_update); + SCOPE_EXIT({ + FailPointHelper::disableFailPoint(FailPoints::skip_check_segment_update); + }); + + for (auto ch : {true, false}) + { + is_common_handle = ch; + reload(); + + { + ASSERT_EQ(store->segments.size(), 1); + auto bps = getSegmentBreakpoints(); + ASSERT_EQ(bps.size(), 0); + } + { + ensureSegmentBreakpoints({100, 10, -40, 500}); + } + { + ASSERT_EQ(store->segments.size(), 5); + auto bps = getSegmentBreakpoints(); + ASSERT_EQ(bps.size(), 4); + ASSERT_EQ(bps[0], -40); + ASSERT_EQ(bps[1], 10); + ASSERT_EQ(bps[2], 100); + ASSERT_EQ(bps[3], 500); + } + { + // One breakpoint is equal to a segment boundary, check whether it does not cause problems. + ensureSegmentBreakpoints({30, 10}); + } + { + ASSERT_EQ(store->segments.size(), 6); + auto bps = getSegmentBreakpoints(); + ASSERT_EQ(bps.size(), 5); + ASSERT_EQ(bps[0], -40); + ASSERT_EQ(bps[1], 10); + ASSERT_EQ(bps[2], 30); + ASSERT_EQ(bps[3], 100); + ASSERT_EQ(bps[4], 500); + } + } +} +CATCH + + +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h new file mode 100644 index 00000000000..622f55e5474 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h @@ -0,0 +1,104 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ + +/** + * This is similar to SegmentTestBasic, but is for the DeltaMergeStore. + * It allows you to write tests easier based on the assumption that the PK is either Int or Int encoded in String. + */ +class SimplePKTestBasic : public DB::base::TiFlashStorageTestBasic +{ +public: + void SetUp() override + { + reload(); + } + + void TearDown() override + { + TiFlashStorageTestBasic::TearDown(); + } + +public: + // Lightweight wrappers + + void fill(Int64 start_key, Int64 end_key); + void fillDelete(Int64 start_key, Int64 end_key); + void flush(Int64 start_key, Int64 end_key); + void flush(); + void mergeDelta(); + void deleteRange(Int64 start_key, Int64 end_key); + size_t getRowsN(); + size_t getRowsN(Int64 start_key, Int64 end_key); + +public: + + SegmentPtr getSegmentAt(Int64 key) const; + + /** + * Ensure segments in the store are split at the specified breakpoints. + * This could be used to initialize segments as desired. + */ + void ensureSegmentBreakpoints(const std::vector & breakpoints, bool use_logical_split = false); + + /** + * Returns the breakpoints of all segments in the store. + * + * Example: + * + * Segments | Expected Breakpoints + * -------------------------------------------------- + * [-inf, +inf] | None + * [-inf, 10), [10, +inf) | 10 + * [-inf, 10), [10, 30), [30, +inf) | 10, 30 + */ + std::vector getSegmentBreakpoints() const; + +protected: + void reload(); + + RowKeyValue buildRowKey(Int64 pk) const; + + RowKeyRange buildRowRange(Int64 start, Int64 end) const; + + std::pair parseRange(const RowKeyRange & range) const; + + Block prepareWriteBlock(Int64 start_key, Int64 end_key, bool is_deleted = false); + +protected: + DeltaMergeStorePtr store; + DMContextPtr dm_context; + + UInt64 version = 0; + +protected: + // Below are options + bool is_common_handle = false; +}; +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp new file mode 100644 index 00000000000..644973f727d --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +namespace DB +{ +namespace FailPoints +{ +extern const char gc_skip_update_safe_point[]; +extern const char skip_check_segment_update[]; +} // namespace FailPoints + +namespace DM +{ +namespace tests +{ + + +class DeltaMergeStoreBackgroundTest + : public SimplePKTestBasic +{ +public: + void SetUp() override + { + FailPointHelper::enableFailPoint(FailPoints::gc_skip_update_safe_point); + FailPointHelper::enableFailPoint(FailPoints::skip_check_segment_update); + SimplePKTestBasic::SetUp(); + global_settings_backup = db_context->getGlobalContext().getSettings(); + } + + void TearDown() override + { + SimplePKTestBasic::TearDown(); + FailPointHelper::disableFailPoint(FailPoints::skip_check_segment_update); + FailPointHelper::disableFailPoint(FailPoints::gc_skip_update_safe_point); + db_context->getGlobalContext().setSettings(global_settings_backup); + } + +protected: + Settings global_settings_backup; +}; + + +TEST_F(DeltaMergeStoreBackgroundTest, GCWillMergeMultipleSegments) +try +{ + ensureSegmentBreakpoints({0, 10, 40, 100}); + ASSERT_EQ(std::vector({0, 10, 40, 100}), getSegmentBreakpoints()); + + auto gc_n = store->onSyncGc(1); + ASSERT_EQ(std::vector{}, getSegmentBreakpoints()); + ASSERT_EQ(gc_n, 1); + ASSERT_EQ(0, getRowsN()); +} +CATCH + + +TEST_F(DeltaMergeStoreBackgroundTest, GCOnlyMergeSmallSegments) +try +{ + UInt64 gc_n = 0; + + ensureSegmentBreakpoints({0, 50, 100, 150, 200}); + fill(-1000, 1000); + ASSERT_EQ(2000, getRowsN()); + + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 10; + gc_n = store->onSyncGc(100); + ASSERT_EQ(std::vector({0, 50, 100, 150, 200}), getSegmentBreakpoints()); + ASSERT_EQ(gc_n, 0); + + // In this case, merge two segments will exceed small_segment_rows, so no merge will happen + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 55 * 3; + gc_n = store->onSyncGc(100); + ASSERT_EQ(std::vector({0, 50, 100, 150, 200}), getSegmentBreakpoints()); + ASSERT_EQ(gc_n, 0); + + // In this case, we will only merge two segments and then stop. + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 105 * 3; + gc_n = store->onSyncGc(100); + ASSERT_EQ(std::vector({0, 100, 200}), getSegmentBreakpoints()); + ASSERT_EQ(gc_n, 2); + + gc_n = store->onSyncGc(100); + ASSERT_EQ(std::vector({0, 100, 200}), getSegmentBreakpoints()); + ASSERT_EQ(gc_n, 0); + + ASSERT_EQ(200, getRowsN(0, 200)); + ASSERT_EQ(2000, getRowsN()); +} +CATCH + + +TEST_F(DeltaMergeStoreBackgroundTest, GCMergeAndStop) +try +{ + fill(-1000, 1000); + flush(); + mergeDelta(); + + ensureSegmentBreakpoints({0, 50, 100, 150, 200}); + + // In this case, we will only merge two segments and then stop. + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 105 * 3; + auto gc_n = store->onSyncGc(1); + ASSERT_EQ(std::vector({0, 100, 150, 200}), getSegmentBreakpoints()); + ASSERT_EQ(gc_n, 1); + + ASSERT_EQ(200, getRowsN(0, 200)); + ASSERT_EQ(2000, getRowsN()); +} +CATCH + + +TEST_F(DeltaMergeStoreBackgroundTest, GCMergeWhileFlushing) +try +{ + fill(-1000, 1000); + + ensureSegmentBreakpoints({0, 50, 100}); + + // Currently, when there is a flush in progress, the segment merge in GC thread will be blocked. + + auto sp_flush_commit = SyncPointCtl::enableInScope("before_ColumnFileFlushTask::commit"); + auto sp_merge_flush_retry = SyncPointCtl::enableInScope("before_DeltaMergeStore::segmentMerge|retry_flush"); + + auto th_flush = std::async([&]() { + // Flush the first segment that GC will touch with. + flush(-10, 0); + }); + + sp_flush_commit.waitAndPause(); + + auto th_gc = std::async([&]() { + auto gc_n = store->onSyncGc(1); + ASSERT_EQ(gc_n, 1); + ASSERT_EQ(store->segments.size(), 1); + }); + + // Expect merge triggered by GC is retrying... because there is a flush in progress. + sp_merge_flush_retry.waitAndPause(); + + // Finish the flush. + sp_flush_commit.next(); + sp_flush_commit.disable(); + th_flush.wait(); + + // The merge in GC should continue without any further retries. + sp_merge_flush_retry.next(); + th_gc.wait(); +} +CATCH + +} // namespace tests +} // namespace DM +} // namespace DB \ No newline at end of file diff --git a/libs/libsymbolization/CMakeLists.txt b/libs/libsymbolization/CMakeLists.txt index 78dae44199a..ffd360fc9d4 100644 --- a/libs/libsymbolization/CMakeLists.txt +++ b/libs/libsymbolization/CMakeLists.txt @@ -12,12 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR SAN_DEBUG) - set(_SYMBOLIZATION_BUILD_PROFILE "debug") -else() - set(_SYMBOLIZATION_BUILD_PROFILE "release") - set(_SYMBOLIZATION_BUILD_ARGS --release) -endif() +# Debug-mode symbolization is slow. Let's always build a release. The building is fast anyway. +set(_SYMBOLIZATION_BUILD_PROFILE "release") +set(_SYMBOLIZATION_BUILD_ARGS --release) set(_SYMBOLIZATION_SOURCE_DIR "${TiFlash_SOURCE_DIR}/libs/libsymbolization") set(_SYMBOLIZATION_LIBRARY "${CMAKE_CURRENT_BINARY_DIR}/${_SYMBOLIZATION_BUILD_PROFILE}/${CMAKE_STATIC_LIBRARY_PREFIX}symbolization${CMAKE_STATIC_LIBRARY_SUFFIX}") From cd863a584751246bc1dc652c59642415f87c1d7f Mon Sep 17 00:00:00 2001 From: Wish Date: Wed, 21 Sep 2022 23:21:14 +0800 Subject: [PATCH 10/14] Reformat Signed-off-by: Wish --- .../Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp | 1 - .../DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp | 4 ++-- .../Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index a510d74391f..edb34cea2b4 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp index 15857ab9676..1d1b99c4e42 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp @@ -219,7 +219,7 @@ size_t SimplePKTestBasic::getRowsN() EMPTY_FILTER, "", /* keep_order= */ false, - /* is_fast_mode= */ false, + /* is_fast_scan= */ false, /* expected_block_size= */ 1024)[0]; return getInputStreamNRows(in); } @@ -237,7 +237,7 @@ size_t SimplePKTestBasic::getRowsN(Int64 start_key, Int64 end_key) EMPTY_FILTER, "", /* keep_order= */ false, - /* is_fast_mode= */ false, + /* is_fast_scan= */ false, /* expected_block_size= */ 1024)[0]; return getInputStreamNRows(in); } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h index 622f55e5474..73c955c7988 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h @@ -56,7 +56,6 @@ class SimplePKTestBasic : public DB::base::TiFlashStorageTestBasic size_t getRowsN(Int64 start_key, Int64 end_key); public: - SegmentPtr getSegmentAt(Int64 key) const; /** From 0dd1712d6ec080cdd7239bded0a83b24892e9214 Mon Sep 17 00:00:00 2001 From: Wish Date: Thu, 22 Sep 2022 20:59:20 +0800 Subject: [PATCH 11/14] storage: merge segments that is contained by one pack Signed-off-by: Wish --- dbms/src/Common/FailPoint.cpp | 2 + .../Storages/DeltaMerge/DeltaMergeStore.cpp | 10 +- .../DeltaMerge/DeltaMergeStore_InternalBg.cpp | 82 +++-- dbms/src/Storages/DeltaMerge/Segment.cpp | 15 +- .../Storages/DeltaMerge/StableValueSpace.cpp | 62 +++- .../Storages/DeltaMerge/StableValueSpace.h | 89 ++++- .../DeltaMerge/tests/gtest_dm_segment.cpp | 2 +- .../tests/gtest_dm_simple_pk_test_basic.cpp | 66 ++++ .../tests/gtest_dm_simple_pk_test_basic.h | 7 + .../tests/gtest_dm_store_background.cpp | 334 +++++++++++++++++- .../DeltaMerge/tests/gtest_segment.cpp | 70 ---- 11 files changed, 592 insertions(+), 147 deletions(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index 4b2ffcbe167..9ac51fa0806 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -76,6 +76,8 @@ std::unordered_map> FailPointHelper::f #define APPLY_FOR_FAILPOINTS(M) \ M(skip_check_segment_update) \ M(gc_skip_update_safe_point) \ + M(gc_skip_merge_delta) \ + M(gc_skip_merge) \ M(force_set_page_file_write_errno) \ M(force_split_io_size_4k) \ M(minimum_block_size_for_cross_join) \ diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index fc6cbb49795..085d44d89d8 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -1546,17 +1546,17 @@ DeltaMergeStoreStat DeltaMergeStore::getStat() total_delta_valid_cache_rows += delta->getValidCacheRows(); } - if (stable->getPacks()) + if (stable->getDMFilesPacks()) { stat.total_rows += stable->getRows(); stat.total_size += stable->getBytes(); stat.stable_count += 1; - stat.total_pack_count_in_stable += stable->getPacks(); + stat.total_pack_count_in_stable += stable->getDMFilesPacks(); stat.total_stable_rows += stable->getRows(); stat.total_stable_size += stable->getBytes(); - stat.total_stable_size_on_disk += stable->getBytesOnDisk(); + stat.total_stable_size_on_disk += stable->getDMFilesBytesOnDisk(); } } @@ -1679,10 +1679,10 @@ SegmentStats DeltaMergeStore::getSegmentStats() stat.size = delta->getBytes() + stable->getBytes(); stat.delete_ranges = delta->getDeletes(); - stat.stable_size_on_disk = stable->getBytesOnDisk(); + stat.stable_size_on_disk = stable->getDMFilesBytesOnDisk(); stat.delta_pack_count = delta->getColumnFileCount(); - stat.stable_pack_count = stable->getPacks(); + stat.stable_pack_count = stable->getDMFilesPacks(); stat.avg_delta_pack_rows = static_cast(delta->getRows()) / stat.delta_pack_count; stat.avg_stable_pack_rows = static_cast(stable->getRows()) / stat.stable_pack_count; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp index ea387529198..e4e81876958 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp @@ -30,6 +30,8 @@ namespace DB namespace FailPoints { extern const char gc_skip_update_safe_point[]; +extern const char gc_skip_merge_delta[]; +extern const char gc_skip_merge[]; extern const char pause_before_dt_background_delta_merge[]; extern const char pause_until_dt_background_delta_merge[]; } // namespace FailPoints @@ -396,12 +398,22 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte double invalid_data_ratio_threshold, const LoggerPtr & log) { - auto [first_pack_included, last_pack_included] = snap->stable->isFirstAndLastPackIncludedInRange(context, seg->getRowKeyRange()); - // Do a quick check about whether the DTFile is completely included in the segment range - if (first_pack_included && last_pack_included) + if (snap->stable->getDMFilesPacks() == 0) { - LOG_FMT_TRACE(log, "GC - shouldCompactStableWithTooMuchDataOutOfSegmentRange marking " - "segment as valid data ratio checked because all packs are included, segment={}", + LOG_FMT_TRACE( + log, + "GC - shouldCompactStableWithTooMuchDataOutOfSegmentRange skipped segment " + "because the DTFile of stable is empty, segment={}", + seg->info()); + return false; + } + + auto at_least_result = snap->stable->getAtLeastRowsAndBytes(context, seg->getRowKeyRange()); + if (at_least_result.first_pack_intersection == RSResult::All // + && at_least_result.last_pack_intersection == RSResult::All) + { + LOG_FMT_TRACE(log, "GC - shouldCompactStableWithTooMuchDataOutOfSegmentRange permanently skipped segment " + "because all packs in DTFiles are fully contained by the segment range, segment={}", seg->info()); seg->setValidDataRatioChecked(); return false; @@ -410,34 +422,35 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte std::unordered_set prev_segment_file_ids = getDMFileIDs(prev_seg); std::unordered_set next_segment_file_ids = getDMFileIDs(next_seg); + // Only try to compact the segment when there is data out of this segment range and is also not shared by neighbor segments. bool contains_invalid_data = false; const auto & dt_files = snap->stable->getDMFiles(); - if (!first_pack_included) + if (at_least_result.first_pack_intersection != RSResult::All) { - auto first_file_id = dt_files[0]->fileId(); - if (prev_segment_file_ids.count(first_file_id) == 0) + auto first_file_id = dt_files.front()->fileId(); + if (prev_seg != nullptr && prev_segment_file_ids.count(first_file_id) == 0) { contains_invalid_data = true; } } - if (!last_pack_included) + if (at_least_result.last_pack_intersection != RSResult::All) { - auto last_file_id = dt_files[dt_files.size() - 1]->fileId(); - if (next_segment_file_ids.count(last_file_id) == 0) + auto last_file_id = dt_files.back()->fileId(); + if (next_seg != nullptr && next_segment_file_ids.count(last_file_id) == 0) { contains_invalid_data = true; } } - // Only try to compact the segment when there is data out of this segment range and is also not shared by neighbor segments. if (!contains_invalid_data) { LOG_FMT_TRACE( log, - "GC - shouldCompactStableWithTooMuchDataOutOfSegmentRange checked false because no invalid data, " - "segment={} first_pack_included={} last_pack_included={} prev_seg_files=[{}] next_seg_files=[{}] my_files=[{}]", - seg->simpleInfo(), - first_pack_included, - last_pack_included, + "GC - shouldCompactStableWithTooMuchDataOutOfSegmentRange checked false " + "because segment DTFile is shared with a neighbor segment, " + "segment={} first_pack_inc={} last_pack_inc={} prev_seg_files=[{}] next_seg_files=[{}] my_files=[{}]", + seg->info(), + magic_enum::enum_name(at_least_result.first_pack_intersection), + magic_enum::enum_name(at_least_result.last_pack_intersection), fmt::join(prev_segment_file_ids, ","), fmt::join(next_segment_file_ids, ","), [&] { @@ -451,29 +464,32 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte ","); return fmt_buf.toString(); }()); + // We do not mark `setValidDataRatioChecked` because neighbor segments' state could change. return false; } - size_t total_rows = 0; - size_t total_bytes = 0; + size_t dt_file_rows = 0; + size_t dt_file_bytes = 0; for (const auto & file : dt_files) { - total_rows += file->getRows(); - total_bytes += file->getBytes(); + dt_file_rows += file->getRows(); + dt_file_bytes += file->getBytes(); } - auto valid_rows = snap->stable->getRows(); - auto valid_bytes = snap->stable->getBytes(); - auto check_result = (valid_rows < total_rows * (1 - invalid_data_ratio_threshold)) || (valid_bytes < total_bytes * (1 - invalid_data_ratio_threshold)); + auto check_result = (at_least_result.rows < dt_file_rows * (1 - invalid_data_ratio_threshold)) // + || (at_least_result.bytes < dt_file_bytes * (1 - invalid_data_ratio_threshold)); LOG_FMT_TRACE( log, "GC - Checking shouldCompactStableWithTooMuchDataOutOfSegmentRange, " - "check_result={} valid_rows={} valid_bytes={} file_rows={} file_bytes={}", + "segment={} check_result={} first_pack_inc={} last_pack_inc={} rows_at_least={} bytes_at_least={} file_rows={} file_bytes={}", + seg->info(), check_result, - valid_rows, - valid_bytes, - total_rows, - total_bytes); + magic_enum::enum_name(at_least_result.first_pack_intersection), + magic_enum::enum_name(at_least_result.last_pack_intersection), + at_least_result.rows, + at_least_result.bytes, + dt_file_rows, + dt_file_bytes); seg->setValidDataRatioChecked(); return check_result; } @@ -482,6 +498,10 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte SegmentPtr DeltaMergeStore::gcTrySegmentMerge(const DMContextPtr & dm_context, const SegmentPtr & segment) { + fiu_do_on(FailPoints::gc_skip_merge, { + return {}; + }); + auto segment_rows = segment->getEstimatedRows(); auto segment_bytes = segment->getEstimatedBytes(); if (segment_rows >= dm_context->small_segment_rows || segment_bytes >= dm_context->small_segment_bytes) @@ -521,6 +541,10 @@ SegmentPtr DeltaMergeStore::gcTrySegmentMerge(const DMContextPtr & dm_context, c SegmentPtr DeltaMergeStore::gcTrySegmentMergeDelta(const DMContextPtr & dm_context, const SegmentPtr & segment, const SegmentPtr & prev_segment, const SegmentPtr & next_segment, DB::Timestamp gc_safe_point) { + fiu_do_on(FailPoints::gc_skip_merge_delta, { + return {}; + }); + SegmentSnapshotPtr segment_snap; { std::shared_lock lock(read_write_mutex); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 86dbec61db0..684d596b226 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -1010,7 +1010,7 @@ std::optional Segment::prepareSplit(DMContext & dm_context, { // When split point is not specified, there are some preconditions in order to use logical split. if (!dm_context.enable_logical_split // - || segment_snap->stable->getPacks() <= 3 // + || segment_snap->stable->getDMFilesPacks() <= 3 // || segment_snap->delta->getRows() > segment_snap->stable->getRows()) { try_split_mode = SplitMode::Physical; @@ -1565,18 +1565,27 @@ String Segment::simpleInfo() const String Segment::info() const { - return fmt::format("", + return fmt::format("", segment_id, epoch, rowkey_range.toDebugString(), hasAbandoned() ? " abandoned=true" : "", next_segment_id, + delta->getRows(), delta->getBytes(), delta->getDeletes(), + stable->getDMFilesString(), stable->getRows(), - stable->getBytes()); + stable->getBytes(), + + stable->getDMFilesRows(), + stable->getDMFilesBytes(), + stable->getDMFilesPacks()); } String Segment::simpleInfo(const std::vector & segments) diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 1c3e8de30ab..4a45a3a1bdf 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -128,17 +128,15 @@ size_t StableValueSpace::getBytes() const return valid_bytes; } -size_t StableValueSpace::getBytesOnDisk() const +size_t StableValueSpace::getDMFilesBytesOnDisk() const { - // If this stable value space is logical splitted, some file may not used, - // and this will return more bytes than actual used. size_t bytes = 0; for (const auto & file : files) bytes += file->getBytesOnDisk(); return bytes; } -size_t StableValueSpace::getPacks() const +size_t StableValueSpace::getDMFilesPacks() const { size_t packs = 0; for (const auto & file : files) @@ -146,6 +144,22 @@ size_t StableValueSpace::getPacks() const return packs; } +size_t StableValueSpace::getDMFilesRows() const +{ + size_t rows = 0; + for (const auto & file : files) + rows += file->getRows(); + return rows; +} + +size_t StableValueSpace::getDMFilesBytes() const +{ + size_t bytes = 0; + for (const auto & file : files) + bytes += file->getBytes(); + return bytes; +} + String StableValueSpace::getDMFilesString() { String s; @@ -397,16 +411,17 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & return {approx_rows, approx_bytes}; } -std::pair StableValueSpace::Snapshot::isFirstAndLastPackIncludedInRange(const DMContext & context, const RowKeyRange & range) const +StableValueSpace::Snapshot::AtLeastRowsAndBytesResult // +StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, const RowKeyRange & range) const { + AtLeastRowsAndBytesResult ret{}; + // Usually, this method will be called for some "cold" key ranges. // Loading the index into cache may pollute the cache and make the hot index cache invalid. // So don't refill the cache if the index does not exist. - bool first_pack_included = false; - bool last_pack_included = false; - for (size_t i = 0; i < stable->files.size(); i++) + for (size_t file_idx = 0; file_idx < stable->files.size(); ++file_idx) { - const auto & file = stable->files[i]; + const auto & file = stable->files[file_idx]; auto filter = DMFilePackFilter::loadFrom( file, context.db_context.getGlobalContext().getMinMaxIndexCache(), @@ -417,20 +432,37 @@ std::pair StableValueSpace::Snapshot::isFirstAndLastPackIncludedInRa context.db_context.getFileProvider(), context.getReadLimiter(), context.tracing_id); - const auto & use_packs = filter.getUsePacks(); - if (i == 0) + const auto & handle_filter_result = filter.getHandleRes(); + if (file_idx == 0) { // TODO: this check may not be correct when support multiple files in a stable, let's just keep it now for simplicity - first_pack_included = use_packs.empty() || use_packs[0]; + if (handle_filter_result.empty()) + ret.first_pack_intersection = RSResult::None; + else + ret.first_pack_intersection = handle_filter_result.front(); } - if (i == stable->files.size() - 1) + if (file_idx == stable->files.size() - 1) { // TODO: this check may not be correct when support multiple files in a stable, let's just keep it now for simplicity - last_pack_included = use_packs.empty() || use_packs.back(); + if (handle_filter_result.empty()) + ret.last_pack_intersection = RSResult::None; + else + ret.last_pack_intersection = handle_filter_result.back(); + } + + const auto & pack_stats = file->getPackStats(); + for (size_t pack_idx = 0; pack_idx < pack_stats.size(); ++pack_idx) + { + // Only count packs that are fully contained by the range. + if (handle_filter_result[pack_idx] == RSResult::All) + { + ret.rows += pack_stats[pack_idx].rows; + ret.bytes += pack_stats[pack_idx].bytes; + } } } - return std::make_pair(first_pack_included, last_pack_included); + return ret; } } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.h b/dbms/src/Storages/DeltaMerge/StableValueSpace.h index f9006058e7a..93761e9ee68 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -49,13 +50,46 @@ class StableValueSpace : public std::enable_shared_from_this PageId getId() { return id; } void saveMeta(WriteBatch & meta_wb); - const DMFiles & getDMFiles() { return files; } - String getDMFilesString(); size_t getRows() const; size_t getBytes() const; - size_t getBytesOnDisk() const; - size_t getPacks() const; + + /** + * Return the underlying DTFiles. + * DTFiles are not fully included in the segment range will be also included in the result. + * Note: Out-of-range DTFiles may be produced by logical split. + */ + const DMFiles & getDMFiles() const { return files; } + + String getDMFilesString(); + + /** + * Return the total on-disk size of the underlying DTFiles. + * DTFiles are not fully included in the segment range will be also counted in. + * Note: Out-of-range DTFiles may be produced by logical split. + */ + size_t getDMFilesBytesOnDisk() const; + + /** + * Return the total number of packs of the underlying DTFiles. + * Packs that are not included in the segment range will be also counted in. + * Note: Out-of-range packs may be produced by logical split. + */ + size_t getDMFilesPacks() const; + + /** + * Return the total number of rows of the underlying DTFiles. + * Rows from packs that are not included in the segment range will be also counted in. + * Note: Out-of-range rows may be produced by logical split. + */ + size_t getDMFilesRows() const; + + /** + * Return the total size of the data of the underlying DTFiles. + * Rows from packs that are not included in the segment range will be also counted in. + * Note: Out-of-range rows may be produced by logical split. + */ + size_t getDMFilesBytes() const; void enableDMFilesGC(); @@ -111,7 +145,7 @@ class StableValueSpace : public std::enable_shared_from_this : log(&Poco::Logger::get("StableValueSpace::Snapshot")) {} - SnapshotPtr clone() + SnapshotPtr clone() const { auto c = std::make_shared(); c->stable = stable; @@ -127,20 +161,24 @@ class StableValueSpace : public std::enable_shared_from_this return c; } - PageId getId() { return id; } + PageId getId() const { return id; } - size_t getRows() { return valid_rows; } - size_t getBytes() { return valid_bytes; } + size_t getRows() const { return valid_rows; } + size_t getBytes() const { return valid_bytes; } - const DMFiles & getDMFiles() { return stable->getDMFiles(); } + /** + * Return the underlying DTFiles. + * DTFiles are not fully included in the segment range will be also included in the result. + * Note: Out-of-range DTFiles may be produced by logical split. + */ + const DMFiles & getDMFiles() const { return stable->getDMFiles(); } - size_t getPacks() - { - size_t packs = 0; - for (auto & file : getDMFiles()) - packs += file->getPacks(); - return packs; - } + /** + * Return the total number of packs of the underlying DTFiles. + * Packs that are not included in the segment range will be also counted in. + * Note: Out-of-range packs may be produced by logical split. + */ + size_t getDMFilesPacks() const { return stable->getDMFilesPacks(); } ColumnCachePtrs & getColumnCaches() { return column_caches; } @@ -156,7 +194,19 @@ class StableValueSpace : public std::enable_shared_from_this RowsAndBytes getApproxRowsAndBytes(const DMContext & context, const RowKeyRange & range) const; - std::pair isFirstAndLastPackIncludedInRange(const DMContext & context, const RowKeyRange & range) const; + struct AtLeastRowsAndBytesResult + { + size_t rows = 0; + size_t bytes = 0; + RSResult first_pack_intersection = RSResult::None; + RSResult last_pack_intersection = RSResult::None; + }; + + /** + * Get the rows and bytes calculated from packs that is **fully contained** by the given range. + * If the pack is partially intersected, then it is not counted. + */ + AtLeastRowsAndBytesResult getAtLeastRowsAndBytes(const DMContext & context, const RowKeyRange & range) const; private: Poco::Logger * log; @@ -171,8 +221,9 @@ class StableValueSpace : public std::enable_shared_from_this // Valid rows is not always the sum of rows in file, // because after logical split, two segments could reference to a same file. - UInt64 valid_rows; - UInt64 valid_bytes; + UInt64 valid_rows; /* At most. The actual valid rows may be lower than this value. */ + UInt64 valid_bytes; /* At most. The actual valid bytes may be lower than this value. */ + DMFiles files; StableProperty property; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index b5de675d2ee..24f79c892c8 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -197,7 +197,7 @@ try // flush segment and make sure there is two packs in stable segment = segment->mergeDelta(dmContext(), tableColumns()); - ASSERT_EQ(segment->getStable()->getPacks(), 2); + ASSERT_EQ(segment->getStable()->getDMFilesPacks(), 2); } { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp index 1d1b99c4e42..b2169a20440 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.cpp @@ -63,6 +63,12 @@ SegmentPtr SimplePKTestBasic::getSegmentAt(Int64 key) const void SimplePKTestBasic::ensureSegmentBreakpoints(const std::vector & breakpoints, bool use_logical_split) { + LOG_FMT_INFO( + logger_op, + "ensureSegmentBreakpoints [{}] logical_split={}", + fmt::join(breakpoints, ","), + use_logical_split); + for (const auto & bp : breakpoints) { auto bp_key = buildRowKey(bp); @@ -173,35 +179,85 @@ Block SimplePKTestBasic::prepareWriteBlock(Int64 start_key, Int64 end_key, bool void SimplePKTestBasic::fill(Int64 start_key, Int64 end_key) { + LOG_FMT_INFO( + logger_op, + "fill [{}, {})", + start_key, + end_key); + auto block = prepareWriteBlock(start_key, end_key); store->write(*db_context, db_context->getSettingsRef(), block); } void SimplePKTestBasic::fillDelete(Int64 start_key, Int64 end_key) { + LOG_FMT_INFO( + logger_op, + "fillDelete [{}, {})", + start_key, + end_key); + auto block = prepareWriteBlock(start_key, end_key, /* delete */ true); store->write(*db_context, db_context->getSettingsRef(), block); } void SimplePKTestBasic::flush(Int64 start_key, Int64 end_key) { + LOG_FMT_INFO( + logger_op, + "flush [{}, {})", + start_key, + end_key); + auto range = buildRowRange(start_key, end_key); store->flushCache(*db_context, range, true); } void SimplePKTestBasic::flush() { + LOG_FMT_INFO( + logger_op, + "flushAll"); + auto range = RowKeyRange::newAll(is_common_handle, 1); store->flushCache(*db_context, range, true); } +void SimplePKTestBasic::mergeDelta(Int64 start_key, Int64 end_key) +{ + LOG_FMT_INFO( + logger_op, + "mergeDelta [{}, {})", + start_key, + end_key); + + auto range = buildRowRange(start_key, end_key); + while (!range.none()) + { + auto processed_range = store->mergeDeltaBySegment(*db_context, range.start); + RUNTIME_CHECK(processed_range.has_value()); + range.setStart(processed_range->end); + } +} + void SimplePKTestBasic::mergeDelta() { + LOG_FMT_INFO( + logger_op, + "mergeDeltaAll"); + + flush(); // as mergeDeltaBySegment always flush, so we also flush here. store->mergeDeltaAll(*db_context); } void SimplePKTestBasic::deleteRange(Int64 start_key, Int64 end_key) { + LOG_FMT_INFO( + logger_op, + "deleteRange [{}, {})", + start_key, + end_key); + auto range = buildRowRange(start_key, end_key); store->deleteRange(*db_context, db_context->getSettingsRef(), range); } @@ -242,6 +298,16 @@ size_t SimplePKTestBasic::getRowsN(Int64 start_key, Int64 end_key) return getInputStreamNRows(in); } +void SimplePKTestBasic::debugDumpAllSegments() const +{ + std::shared_lock lock(store->read_write_mutex); + for (auto [key, segment] : store->segments) + { + UNUSED(key); + LOG_FMT_INFO(logger, "debugDumpAllSegments: {}", segment->info()); + } +} + TEST_F(SimplePKTestBasic, FillAndRead) try diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h index 73c955c7988..1d349ca7755 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_simple_pk_test_basic.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include #include @@ -50,6 +51,7 @@ class SimplePKTestBasic : public DB::base::TiFlashStorageTestBasic void fillDelete(Int64 start_key, Int64 end_key); void flush(Int64 start_key, Int64 end_key); void flush(); + void mergeDelta(Int64 start_key, Int64 end_key); void mergeDelta(); void deleteRange(Int64 start_key, Int64 end_key); size_t getRowsN(); @@ -77,6 +79,8 @@ class SimplePKTestBasic : public DB::base::TiFlashStorageTestBasic */ std::vector getSegmentBreakpoints() const; + void debugDumpAllSegments() const; + protected: void reload(); @@ -94,6 +98,9 @@ class SimplePKTestBasic : public DB::base::TiFlashStorageTestBasic UInt64 version = 0; + LoggerPtr logger = Logger::get("SimplePKTestBasic"); + LoggerPtr logger_op = Logger::get("SimplePKTestBasicOperations"); + protected: // Below are options bool is_common_handle = false; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp index 644973f727d..90dab93cf2a 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp @@ -22,6 +22,8 @@ namespace DB namespace FailPoints { extern const char gc_skip_update_safe_point[]; +extern const char gc_skip_merge_delta[]; +extern const char gc_skip_merge[]; extern const char skip_check_segment_update[]; } // namespace FailPoints @@ -31,7 +33,7 @@ namespace tests { -class DeltaMergeStoreBackgroundTest +class DeltaMergeStoreGCTest : public SimplePKTestBasic { public: @@ -56,7 +58,23 @@ class DeltaMergeStoreBackgroundTest }; -TEST_F(DeltaMergeStoreBackgroundTest, GCWillMergeMultipleSegments) +class DeltaMergeStoreGCMergeTest : public DeltaMergeStoreGCTest +{ +public: + void SetUp() override + { + FailPointHelper::enableFailPoint(FailPoints::gc_skip_merge_delta); + DeltaMergeStoreGCTest::SetUp(); + } + + void TearDown() override + { + DeltaMergeStoreGCTest::TearDown(); + FailPointHelper::disableFailPoint(FailPoints::gc_skip_merge_delta); + } +}; + +TEST_F(DeltaMergeStoreGCMergeTest, MergeMultipleSegments) try { ensureSegmentBreakpoints({0, 10, 40, 100}); @@ -70,7 +88,7 @@ try CATCH -TEST_F(DeltaMergeStoreBackgroundTest, GCOnlyMergeSmallSegments) +TEST_F(DeltaMergeStoreGCMergeTest, OnlyMergeSmallSegments) try { UInt64 gc_n = 0; @@ -106,7 +124,7 @@ try CATCH -TEST_F(DeltaMergeStoreBackgroundTest, GCMergeAndStop) +TEST_F(DeltaMergeStoreGCMergeTest, MergeAndStop) try { fill(-1000, 1000); @@ -127,7 +145,7 @@ try CATCH -TEST_F(DeltaMergeStoreBackgroundTest, GCMergeWhileFlushing) +TEST_F(DeltaMergeStoreGCMergeTest, MergeWhileFlushing) try { fill(-1000, 1000); @@ -166,6 +184,312 @@ try } CATCH + +class DeltaMergeStoreGCMergeDeltaTest : public DeltaMergeStoreGCTest +{ +public: + void SetUp() override + { + FailPointHelper::enableFailPoint(FailPoints::gc_skip_merge); + DeltaMergeStoreGCTest::SetUp(); + } + + void TearDown() override + { + DeltaMergeStoreGCTest::TearDown(); + FailPointHelper::disableFailPoint(FailPoints::gc_skip_merge); + } +}; + + +TEST_F(DeltaMergeStoreGCMergeDeltaTest, AfterLogicalSplit) +try +{ + db_context->getSettingsRef().dt_segment_stable_pack_rows = 107; // for mergeDelta + db_context->getGlobalContext().getSettingsRef().dt_segment_stable_pack_rows = 107; // for GC + + auto gc_n = store->onSyncGc(1); + ASSERT_EQ(0, gc_n); + + fill(0, 1000); + flush(); + mergeDelta(); + + gc_n = store->onSyncGc(1); + ASSERT_EQ(0, gc_n); + + // Segments that are just logical splited out should not trigger merge delta at all. + ensureSegmentBreakpoints({500}, /* logical_split */ true); + gc_n = store->onSyncGc(1); + ASSERT_EQ(0, gc_n); + + ASSERT_EQ(2, store->segments.size()); + ASSERT_EQ(1000, getSegmentAt(0)->getStable()->getDMFilesRows()); + ASSERT_EQ(1000, getSegmentAt(500)->getStable()->getDMFilesRows()); + + // Segments that are just logical splited out should not trigger merge delta at all. + ensureSegmentBreakpoints({150, 500}, /* logical_split */ true); + gc_n = store->onSyncGc(1); + ASSERT_EQ(0, gc_n); + + ASSERT_EQ(3, store->segments.size()); + ASSERT_EQ(1000, getSegmentAt(0)->getStable()->getDMFilesRows()); + ASSERT_EQ(1000, getSegmentAt(300)->getStable()->getDMFilesRows()); + ASSERT_EQ(1000, getSegmentAt(600)->getStable()->getDMFilesRows()); + + // merge delta for right most segment and check again + mergeDelta(1000, 1001); + ASSERT_EQ(500, getSegmentAt(600)->getStable()->getDMFilesRows()); + + gc_n = store->onSyncGc(100); + ASSERT_EQ(1, gc_n); + + ASSERT_EQ(3, store->segments.size()); + ASSERT_EQ(1000, getSegmentAt(0)->getStable()->getDMFilesRows()); + ASSERT_EQ(350, getSegmentAt(300)->getStable()->getDMFilesRows()); + ASSERT_EQ(500, getSegmentAt(600)->getStable()->getDMFilesRows()); + + // Trigger GC again, more segments will be merged delta + gc_n = store->onSyncGc(100); + ASSERT_EQ(1, gc_n); + + ASSERT_EQ(3, store->segments.size()); + ASSERT_EQ(150, getSegmentAt(0)->getStable()->getDMFilesRows()); + ASSERT_EQ(350, getSegmentAt(300)->getStable()->getDMFilesRows()); + ASSERT_EQ(500, getSegmentAt(600)->getStable()->getDMFilesRows()); + + // Trigger GC again, no more merge delta. + gc_n = store->onSyncGc(100); + ASSERT_EQ(0, gc_n); + ASSERT_EQ(3, store->segments.size()); +} +CATCH + + +TEST_F(DeltaMergeStoreGCMergeDeltaTest, SegmentExactlyContainsStable) +try +{ + ensureSegmentBreakpoints({400, 500}); + fill(100, 600); + flush(); + mergeDelta(); + + auto gc_n = store->onSyncGc(100); + ASSERT_EQ(0, gc_n); +} +CATCH + + +TEST_F(DeltaMergeStoreGCMergeDeltaTest, NoPacks) +try +{ + db_context->getSettingsRef().dt_segment_stable_pack_rows = 1; + db_context->getGlobalContext().getSettingsRef().dt_segment_stable_pack_rows = 1; + + ensureSegmentBreakpoints({100, 200, 300}); + + auto gc_n = store->onSyncGc(100); + ASSERT_EQ(0, gc_n); +} +CATCH + + +TEST_F(DeltaMergeStoreGCMergeDeltaTest, SegmentContainedByPack) +try +{ + for (auto pack_size : {7, 200}) + { + reload(); + db_context->getSettingsRef().dt_segment_stable_pack_rows = pack_size; // for mergeDelta + db_context->getGlobalContext().getSettingsRef().dt_segment_stable_pack_rows = pack_size; // for GC + + fill(0, 200); + flush(); + mergeDelta(); + + auto pack_n = static_cast(std::ceil(200.0 / static_cast(pack_size))); + EXPECT_EQ(pack_n, getSegmentAt(0)->getStable()->getDMFilesPacks()); + + auto gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + + ensureSegmentBreakpoints({10, 190}, /* logical_split */ true); + gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + + mergeDelta(0, 1); + mergeDelta(190, 191); + + EXPECT_EQ(10, getSegmentAt(0)->getStable()->getDMFilesRows()); + EXPECT_EQ(10, getSegmentAt(190)->getStable()->getDMFilesRows()); + + EXPECT_EQ(pack_n, getSegmentAt(50)->getStable()->getDMFilesPacks()); + EXPECT_EQ(200, getSegmentAt(50)->getStable()->getDMFilesRows()); + + if (pack_size == 200) + { + // The segment [10, 190) only overlaps with 1 pack and is contained by the pack. + // Even it contains most of the data, it will still be GCed. + gc_n = store->onSyncGc(50); + EXPECT_EQ(1, gc_n); + EXPECT_EQ(1, getSegmentAt(150)->getStable()->getDMFilesPacks()); + EXPECT_EQ(180, getSegmentAt(150)->getStable()->getDMFilesRows()); + + // There should be no more GCs. + gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + } + else if (pack_size == 7) + { + // When pack size is small, we will more precisely know that most of the DTFile is still valid. + // So in this case, no GC will happen. + gc_n = store->onSyncGc(50); + EXPECT_EQ(0, gc_n); + } + else + { + FAIL(); + } + } +} +CATCH + + +TEST_F(DeltaMergeStoreGCMergeDeltaTest, SmallReclaimRatioDoesNotMergeDelta) +try +{ + db_context->getSettingsRef().dt_segment_stable_pack_rows = 7; + db_context->getGlobalContext().getSettingsRef().dt_segment_stable_pack_rows = 7; + + fill(0, 400); + flush(); + mergeDelta(); + + auto gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + + ensureSegmentBreakpoints({10}, /* logical_split */ true); + gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + + mergeDelta(0, 1); + EXPECT_EQ(10, getSegmentAt(0)->getStable()->getDMFilesRows()); + EXPECT_EQ(400, getSegmentAt(150)->getStable()->getDMFilesRows()); + + gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + EXPECT_EQ(10, getSegmentAt(0)->getStable()->getDMFilesRows()); + EXPECT_EQ(400, getSegmentAt(150)->getStable()->getDMFilesRows()); +} +CATCH + + +TEST_F(DeltaMergeStoreGCMergeDeltaTest, SimpleBigReclaimRatio) +try +{ + db_context->getSettingsRef().dt_segment_stable_pack_rows = 7; + db_context->getGlobalContext().getSettingsRef().dt_segment_stable_pack_rows = 7; + + fill(0, 400); + flush(); + mergeDelta(); + + auto gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + + ensureSegmentBreakpoints({10}, /* logical_split */ true); + gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + + mergeDelta(100, 101); + EXPECT_EQ(400, getSegmentAt(0)->getStable()->getDMFilesRows()); + EXPECT_EQ(390, getSegmentAt(150)->getStable()->getDMFilesRows()); + + gc_n = store->onSyncGc(100); + EXPECT_EQ(1, gc_n); + EXPECT_EQ(10, getSegmentAt(0)->getStable()->getDMFilesRows()); + EXPECT_EQ(390, getSegmentAt(150)->getStable()->getDMFilesRows()); + + // GC again does not introduce new changes + gc_n = store->onSyncGc(100); + EXPECT_EQ(0, gc_n); + EXPECT_EQ(10, getSegmentAt(0)->getStable()->getDMFilesRows()); + EXPECT_EQ(390, getSegmentAt(150)->getStable()->getDMFilesRows()); +} +CATCH + + +// This test enables GC merge and GC merge delta. +TEST_F(DeltaMergeStoreGCTest, RandomShuffleLogicalSplitAndDeleteRange) +try +{ + // TODO: Better to be fuzz tests, in order to reach edge cases efficiently. + + std::random_device rd; + std::mt19937 random(rd()); + + for (auto pack_size : {1, 7, 10, 200}) + { + for (size_t random_round = 0; random_round < 10; random_round++) + { + LOG_FMT_INFO(logger, "Run round #{} for pack_size = {}", random_round, pack_size); + + // For each pack_size, we randomize N rounds. We should always expect everything are + // reclaimed in each round. + + reload(); + db_context->getSettingsRef().dt_segment_stable_pack_rows = pack_size; // for mergeDelta + db_context->getGlobalContext().getSettingsRef().dt_segment_stable_pack_rows = pack_size; // for GC + + fill(0, 100); + fill(500, 600); + flush(); + mergeDelta(); + + auto operations = std::vector>{ + [&] { ensureSegmentBreakpoints({50}, true); }, + [&] { ensureSegmentBreakpoints({80}, true); }, + [&] { ensureSegmentBreakpoints({100}, true); }, + [&] { ensureSegmentBreakpoints({300}, true); }, + [&] { ensureSegmentBreakpoints({700}, true); }, + [&] { deleteRange(-10, 30); }, + [&] { deleteRange(30, 70); }, + [&] { deleteRange(70, 100); }, + [&] { deleteRange(400, 500); }, + [&] { deleteRange(500, 600); }, + }; + + std::shuffle(std::begin(operations), std::end(operations), random); + + for (const auto & op : operations) + { + op(); + + // There will be also a change to randomly merge some delta. + auto merge_delta_ops = std::uniform_int_distribution(0, 2)(random); + for (size_t i = 0; i < merge_delta_ops; i++) + { + auto merge_delta_at = std::uniform_int_distribution(0, 700)(random); + mergeDelta(merge_delta_at, merge_delta_at + 1); + } + } + + // Finally, let's do GCs. We should expect everything are reclaimed within 10 rounds of GC. + for (size_t gc_round = 0; gc_round < 10; gc_round++) + store->onSyncGc(100); + + // Check whether we have reclaimed everything + EXPECT_EQ(store->segments.size(), 1); + EXPECT_EQ(getSegmentAt(0)->getStable()->getDMFilesPacks(), 0); + + // No more GCs are needed. + EXPECT_EQ(0, store->onSyncGc(100)); + } + } +} +CATCH + + } // namespace tests } // namespace DM } // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp index 7c18b32a795..f22cd93a1fc 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -494,76 +494,6 @@ try CATCH -TEST_F(SegmentOperationTest, GCCheckAfterSegmentLogicalSplit) -try -{ - { - SegmentTestOptions options; - options.db_settings.dt_segment_stable_pack_rows = 100; - reloadWithOptions(options); - } - - auto invalid_data_ratio_threshold = dm_context->db_context.getSettingsRef().dt_bg_gc_delta_delete_ratio_to_trigger_gc; - { - auto segment = segments[DELTA_MERGE_FIRST_SEGMENT_ID]; - auto snap = segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, segment, snap, /* prev_seg */ nullptr, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); - } - - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); - flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); - mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); - { - auto segment = segments[DELTA_MERGE_FIRST_SEGMENT_ID]; - auto snap = segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, segment, snap, /* prev_seg */ nullptr, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); - } - - auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); - ASSERT_TRUE(new_seg_id_opt.has_value()); - auto left_segment_id = DELTA_MERGE_FIRST_SEGMENT_ID; - auto right_segment_id = new_seg_id_opt.value(); - { - auto left_segment = segments[left_segment_id]; - auto right_segment = segments[right_segment_id]; - auto left_snap = left_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - auto right_snap = right_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, left_segment, left_snap, /* prev_seg */ nullptr, /* next_seg */ right_segment, invalid_data_ratio_threshold, log)); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, right_segment, right_snap, /* prev_seg */ left_segment, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); - } - - auto new_seg_id_opt2 = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); - ASSERT_TRUE(new_seg_id_opt2.has_value()); - auto middle_segment_id = new_seg_id_opt2.value(); - { - auto left_segment = segments[left_segment_id]; - auto middle_segment = segments[middle_segment_id]; - auto right_segment = segments[right_segment_id]; - auto left_snap = left_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - auto middle_snap = middle_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - auto right_snap = right_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, left_segment, left_snap, /* prev_seg */ nullptr, /* next_seg */ middle_segment, invalid_data_ratio_threshold, log)); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, middle_segment, middle_snap, /* prev_seg */ left_segment, /* next_seg */ right_segment, invalid_data_ratio_threshold, log)); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, right_segment, right_snap, /* prev_seg */ middle_segment, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); - } - - // merge delta left segment and check again - mergeSegmentDelta(left_segment_id); - { - auto left_segment = segments[left_segment_id]; - auto middle_segment = segments[middle_segment_id]; - auto right_segment = segments[right_segment_id]; - auto left_snap = left_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - auto middle_snap = middle_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - auto right_snap = right_segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, left_segment, left_snap, /* prev_seg */ nullptr, /* next_seg */ middle_segment, invalid_data_ratio_threshold, log)); - ASSERT_TRUE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, middle_segment, middle_snap, /* prev_seg */ left_segment, /* next_seg */ right_segment, invalid_data_ratio_threshold, log)); - ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, right_segment, right_snap, /* prev_seg */ middle_segment, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); - } -} -CATCH - - TEST_F(SegmentOperationTest, Issue5570) try { From 3e6000dbef7e4a5c55396b5d75f6118002206a1e Mon Sep 17 00:00:00 2001 From: Wish Date: Thu, 22 Sep 2022 21:29:46 +0800 Subject: [PATCH 12/14] Just one more test case Signed-off-by: Wish --- .../DeltaMerge/tests/gtest_dm_store_background.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp index 90dab93cf2a..2164a1c0a43 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_store_background.cpp @@ -266,6 +266,20 @@ try CATCH +TEST_F(DeltaMergeStoreGCMergeDeltaTest, SegmentContainsPack) +try +{ + ensureSegmentBreakpoints({400, 500}); + fill(410, 450); + flush(); + mergeDelta(); + + auto gc_n = store->onSyncGc(100); + ASSERT_EQ(0, gc_n); +} +CATCH + + TEST_F(DeltaMergeStoreGCMergeDeltaTest, SegmentExactlyContainsStable) try { From b09f91ba0d6eedbe86a71825ac2b588cbc88caff Mon Sep 17 00:00:00 2001 From: Wish Date: Thu, 22 Sep 2022 21:57:24 +0800 Subject: [PATCH 13/14] simplify Signed-off-by: Wish --- .../DeltaMerge/DeltaMergeStore_InternalBg.cpp | 27 ++++++++----------- .../Storages/DeltaMerge/StableValueSpace.h | 14 ++++++++++ 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp index e4e81876958..71e6784c6f1 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp @@ -447,8 +447,7 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte log, "GC - shouldCompactStableWithTooMuchDataOutOfSegmentRange checked false " "because segment DTFile is shared with a neighbor segment, " - "segment={} first_pack_inc={} last_pack_inc={} prev_seg_files=[{}] next_seg_files=[{}] my_files=[{}]", - seg->info(), + "first_pack_inc={} last_pack_inc={} prev_seg_files=[{}] next_seg_files=[{}] my_files=[{}] segment={}", magic_enum::enum_name(at_least_result.first_pack_intersection), magic_enum::enum_name(at_least_result.last_pack_intersection), fmt::join(prev_segment_file_ids, ","), @@ -463,33 +462,29 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte }, ","); return fmt_buf.toString(); - }()); + }(), + seg->info()); // We do not mark `setValidDataRatioChecked` because neighbor segments' state could change. return false; } - size_t dt_file_rows = 0; - size_t dt_file_bytes = 0; - for (const auto & file : dt_files) - { - dt_file_rows += file->getRows(); - dt_file_bytes += file->getBytes(); - } + size_t file_rows = snap->stable->getDMFilesRows(); + size_t file_bytes = snap->stable->getDMFilesBytes(); - auto check_result = (at_least_result.rows < dt_file_rows * (1 - invalid_data_ratio_threshold)) // - || (at_least_result.bytes < dt_file_bytes * (1 - invalid_data_ratio_threshold)); + auto check_result = (at_least_result.rows < file_rows * (1 - invalid_data_ratio_threshold)) // + || (at_least_result.bytes < file_bytes * (1 - invalid_data_ratio_threshold)); LOG_FMT_TRACE( log, "GC - Checking shouldCompactStableWithTooMuchDataOutOfSegmentRange, " - "segment={} check_result={} first_pack_inc={} last_pack_inc={} rows_at_least={} bytes_at_least={} file_rows={} file_bytes={}", - seg->info(), + "check_result={} first_pack_inc={} last_pack_inc={} rows_at_least={} bytes_at_least={} file_rows={} file_bytes={} segment={} ", check_result, magic_enum::enum_name(at_least_result.first_pack_intersection), magic_enum::enum_name(at_least_result.last_pack_intersection), at_least_result.rows, at_least_result.bytes, - dt_file_rows, - dt_file_bytes); + file_rows, + file_bytes, + seg->info()); seg->setValidDataRatioChecked(); return check_result; } diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.h b/dbms/src/Storages/DeltaMerge/StableValueSpace.h index 93761e9ee68..2bd15e87db4 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.h @@ -180,6 +180,20 @@ class StableValueSpace : public std::enable_shared_from_this */ size_t getDMFilesPacks() const { return stable->getDMFilesPacks(); } + /** + * Return the total number of rows of the underlying DTFiles. + * Rows from packs that are not included in the segment range will be also counted in. + * Note: Out-of-range rows may be produced by logical split. + */ + size_t getDMFilesRows() const { return stable->getDMFilesRows(); }; + + /** + * Return the total size of the data of the underlying DTFiles. + * Rows from packs that are not included in the segment range will be also counted in. + * Note: Out-of-range rows may be produced by logical split. + */ + size_t getDMFilesBytes() const { return stable->getDMFilesBytes(); }; + ColumnCachePtrs & getColumnCaches() { return column_caches; } SkippableBlockInputStreamPtr getInputStream(const DMContext & context, // From c6ae68f451bac48128d04b4fdd675ba444bb99e1 Mon Sep 17 00:00:00 2001 From: Wish Date: Thu, 22 Sep 2022 22:22:26 +0800 Subject: [PATCH 14/14] Add comments Signed-off-by: Wish --- .../DeltaMerge/DeltaMergeStore_InternalBg.cpp | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp index 71e6784c6f1..b65ec088d38 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp @@ -471,6 +471,30 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte size_t file_rows = snap->stable->getDMFilesRows(); size_t file_bytes = snap->stable->getDMFilesBytes(); + // We use at_least_rows|bytes, instead of stable_rows|bytes. The difference is that, at_least_rows|bytes only count packs + // that are fully contained in the segment range, while stable_rows|bytes count packs that are intersected with the segment + // range. + // + // Consider the following case, where segment only contain one pack: + // │***** ******│ DTFile only contains 1 pack + // │<------>│ Segment + // This kind of data layout may be produced by logical split. In this case, ratio calculated using at_least_rows would be 0%, + // but ratio calculated using stable_rows would be 100%. + // We definitely want such DTFile to be reclaimed, because this segment is not containing any real rows at all!. + // + // Of course there are false positives, consider the following case: + // │*************************│ DTFile only contains 1 pack + // │<------------------->│ Segment + // The segment is containing most of the data in the DTFile and not much space can be reclaimed after merging the delta. + // We are just wasting the disk IO when doing the GC. + // This is currently acceptable, considering that: + // 1) The cost of rewriting the stable of 1 pack is small + // 2) After rewriting, the segment will not need to be rewritten again, as it will look like: + // │*********************│ DTFile only contains 1 pack + // │<------------------->│ Segment + // + // See https://github.com/pingcap/tiflash/pull/6010 for more details. + auto check_result = (at_least_result.rows < file_rows * (1 - invalid_data_ratio_threshold)) // || (at_least_result.bytes < file_bytes * (1 - invalid_data_ratio_threshold)); LOG_FMT_TRACE(