diff --git a/.github/actions/build-windows-artifacts/action.yml b/.github/actions/build-windows-artifacts/action.yml index ea11cf9f98dc..8c9ab8da44dd 100644 --- a/.github/actions/build-windows-artifacts/action.yml +++ b/.github/actions/build-windows-artifacts/action.yml @@ -69,7 +69,7 @@ runs: uses: actions/upload-artifact@v4 with: name: sqlness-logs - path: C:\tmp\greptime-*.log + path: C:\Users\RUNNER~1\AppData\Local\Temp\sqlness* retention-days: 3 - name: Build greptime binary diff --git a/.github/actions/setup-greptimedb-cluster/with-disk.yaml b/.github/actions/setup-greptimedb-cluster/with-disk.yaml index 1cbd22dbba6f..4b33b37ab08e 100644 --- a/.github/actions/setup-greptimedb-cluster/with-disk.yaml +++ b/.github/actions/setup-greptimedb-cluster/with-disk.yaml @@ -1,13 +1,13 @@ meta: - config: |- + configData: |- [runtime] global_rt_size = 4 datanode: - config: |- + configData: |- [runtime] global_rt_size = 4 compact_rt_size = 2 frontend: - config: |- + configData: |- [runtime] - global_rt_size = 4 \ No newline at end of file + global_rt_size = 4 diff --git a/.github/actions/setup-greptimedb-cluster/with-minio-and-cache.yaml b/.github/actions/setup-greptimedb-cluster/with-minio-and-cache.yaml index fc89bd542253..8f99c242f4bf 100644 --- a/.github/actions/setup-greptimedb-cluster/with-minio-and-cache.yaml +++ b/.github/actions/setup-greptimedb-cluster/with-minio-and-cache.yaml @@ -1,5 +1,5 @@ meta: - config: |- + configData: |- [runtime] global_rt_size = 4 @@ -7,7 +7,7 @@ meta: [datanode.client] timeout = "60s" datanode: - config: |- + configData: |- [runtime] global_rt_size = 4 compact_rt_size = 2 @@ -16,7 +16,7 @@ datanode: cache_path = "/data/greptimedb/s3cache" cache_capacity = "256MB" frontend: - config: |- + configData: |- [runtime] global_rt_size = 4 diff --git a/.github/actions/setup-greptimedb-cluster/with-minio.yaml b/.github/actions/setup-greptimedb-cluster/with-minio.yaml index b0b1c6b757e4..b8121374ce03 100644 --- a/.github/actions/setup-greptimedb-cluster/with-minio.yaml +++ b/.github/actions/setup-greptimedb-cluster/with-minio.yaml @@ -1,5 +1,5 @@ meta: - config: |- + configData: |- [runtime] global_rt_size = 4 @@ -7,12 +7,12 @@ meta: [datanode.client] timeout = "60s" datanode: - config: |- + configData: |- [runtime] global_rt_size = 4 compact_rt_size = 2 frontend: - config: |- + configData: |- [runtime] global_rt_size = 4 diff --git a/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml b/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml index b5a7dec358fe..043b9fe43f85 100644 --- a/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml +++ b/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml @@ -1,5 +1,5 @@ meta: - config: |- + configData: |- [runtime] global_rt_size = 4 @@ -13,7 +13,7 @@ meta: [datanode.client] timeout = "60s" datanode: - config: |- + configData: |- [runtime] global_rt_size = 4 compact_rt_size = 2 @@ -23,7 +23,7 @@ datanode: broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"] linger = "2ms" frontend: - config: |- + configData: |- [runtime] global_rt_size = 4 diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 4a3982b585fd..8b4e0b8416ad 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -145,6 +145,18 @@ jobs: matrix: target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ] steps: + - name: Remove unused software + run: | + echo "Disk space before:" + df -h + [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet + [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android + [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc + [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + sudo docker builder prune -a + echo "Disk space after:" + df -h - uses: actions/checkout@v4 - uses: arduino/setup-protoc@v3 with: @@ -193,6 +205,18 @@ jobs: matrix: target: [ "unstable_fuzz_create_table_standalone" ] steps: + - name: Remove unused software + run: | + echo "Disk space before:" + df -h + [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet + [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android + [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc + [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + sudo docker builder prune -a + echo "Disk space after:" + df -h - uses: actions/checkout@v4 - uses: arduino/setup-protoc@v3 with: @@ -285,24 +309,24 @@ jobs: strategy: matrix: target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ] - mode: - - name: "Disk" - minio: false - kafka: false - values: "with-disk.yaml" - - name: "Minio" - minio: true - kafka: false - values: "with-minio.yaml" - - name: "Minio with Cache" - minio: true - kafka: false - values: "with-minio-and-cache.yaml" + mode: - name: "Remote WAL" minio: true kafka: true values: "with-remote-wal.yaml" steps: + - name: Remove unused software + run: | + echo "Disk space before:" + df -h + [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet + [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android + [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc + [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + sudo docker builder prune -a + echo "Disk space after:" + df -h - uses: actions/checkout@v4 - name: Setup Kind uses: ./.github/actions/setup-kind @@ -427,6 +451,18 @@ jobs: kafka: true values: "with-remote-wal.yaml" steps: + - name: Remove unused software + run: | + echo "Disk space before:" + df -h + [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet + [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android + [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc + [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + sudo docker builder prune -a + echo "Disk space after:" + df -h - uses: actions/checkout@v4 - name: Setup Kind uses: ./.github/actions/setup-kind diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml index 6175939333ce..a337409dc3a2 100644 --- a/.github/workflows/nightly-ci.yml +++ b/.github/workflows/nightly-ci.yml @@ -59,7 +59,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: sqlness-logs - path: C:\tmp\greptime-*.log + path: C:\Users\RUNNER~1\AppData\Local\Temp\sqlness* retention-days: 3 test-on-windows: diff --git a/Cargo.lock b/Cargo.lock index fbf30e4f08ce..ccbcb771cc64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -214,7 +214,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c" [[package]] name = "api" -version = "0.9.0" +version = "0.9.1" dependencies = [ "common-base", "common-decimal", @@ -226,7 +226,7 @@ dependencies = [ "paste", "prost 0.12.6", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "tonic-build", ] @@ -762,7 +762,7 @@ dependencies = [ [[package]] name = "auth" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -774,7 +774,7 @@ dependencies = [ "digest", "notify", "sha1", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "tokio", ] @@ -1277,15 +1277,15 @@ dependencies = [ [[package]] name = "cache" -version = "0.9.0" +version = "0.9.1" dependencies = [ "catalog", "common-error", "common-macro", "common-meta", "moka", - "snafu 0.8.3", - "substrait 0.9.0", + "snafu 0.8.4", + "substrait 0.9.1", ] [[package]] @@ -1312,7 +1312,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "catalog" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arrow", @@ -1351,7 +1351,7 @@ dependencies = [ "prometheus", "serde_json", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "store-api", "table", @@ -1637,7 +1637,7 @@ checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" [[package]] name = "client" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arc-swap", @@ -1665,9 +1665,9 @@ dependencies = [ "query", "rand", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "substrait 0.37.3", - "substrait 0.9.0", + "substrait 0.9.1", "tokio", "tokio-stream", "tonic 0.11.0", @@ -1697,7 +1697,7 @@ dependencies = [ [[package]] name = "cmd" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-trait", "auth", @@ -1751,9 +1751,9 @@ dependencies = [ "serde_json", "servers", "session", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", - "substrait 0.9.0", + "substrait 0.9.1", "table", "temp-env", "tempfile", @@ -1799,7 +1799,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335" [[package]] name = "common-base" -version = "0.9.0" +version = "0.9.1" dependencies = [ "anymap", "bitvec", @@ -1808,25 +1808,25 @@ dependencies = [ "common-macro", "paste", "serde", - "snafu 0.8.3", + "snafu 0.8.4", "toml 0.8.14", "zeroize", ] [[package]] name = "common-catalog" -version = "0.9.0" +version = "0.9.1" dependencies = [ "chrono", "common-error", "common-macro", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", ] [[package]] name = "common-config" -version = "0.9.0" +version = "0.9.1" dependencies = [ "common-base", "common-error", @@ -1840,7 +1840,7 @@ dependencies = [ "num_cpus", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "sysinfo", "temp-env", "tempfile", @@ -1849,7 +1849,7 @@ dependencies = [ [[package]] name = "common-datasource" -version = "0.9.0" +version = "0.9.1" dependencies = [ "arrow", "arrow-schema", @@ -1876,7 +1876,7 @@ dependencies = [ "rand", "regex", "serde", - "snafu 0.8.3", + "snafu 0.8.4", "strum 0.25.0", "tokio", "tokio-util", @@ -1886,7 +1886,7 @@ dependencies = [ [[package]] name = "common-decimal" -version = "0.9.0" +version = "0.9.1" dependencies = [ "bigdecimal", "common-error", @@ -1894,21 +1894,21 @@ dependencies = [ "rust_decimal", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", ] [[package]] name = "common-error" -version = "0.9.0" +version = "0.9.1" dependencies = [ - "snafu 0.8.3", + "snafu 0.8.4", "strum 0.25.0", "tonic 0.11.0", ] [[package]] name = "common-frontend" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -1917,13 +1917,13 @@ dependencies = [ "common-macro", "common-query", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", ] [[package]] name = "common-function" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arc-swap", @@ -1948,7 +1948,7 @@ dependencies = [ "serde", "serde_json", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "statrs", "store-api", @@ -1957,7 +1957,7 @@ dependencies = [ [[package]] name = "common-greptimedb-telemetry" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-trait", "common-runtime", @@ -1974,7 +1974,7 @@ dependencies = [ [[package]] name = "common-grpc" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arrow-flight", @@ -1992,7 +1992,7 @@ dependencies = [ "lazy_static", "prost 0.12.6", "rand", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", "tonic 0.11.0", "tower", @@ -2000,7 +2000,7 @@ dependencies = [ [[package]] name = "common-grpc-expr" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "common-base", @@ -2012,31 +2012,31 @@ dependencies = [ "datatypes", "paste", "prost 0.12.6", - "snafu 0.8.3", + "snafu 0.8.4", "table", ] [[package]] name = "common-macro" -version = "0.9.0" +version = "0.9.1" dependencies = [ "arc-swap", "common-query", "datatypes", "proc-macro2", "quote", - "snafu 0.8.3", + "snafu 0.8.4", "static_assertions", "syn 2.0.66", ] [[package]] name = "common-mem-prof" -version = "0.9.0" +version = "0.9.1" dependencies = [ "common-error", "common-macro", - "snafu 0.8.3", + "snafu 0.8.4", "tempfile", "tikv-jemalloc-ctl", "tikv-jemalloc-sys", @@ -2045,7 +2045,7 @@ dependencies = [ [[package]] name = "common-meta" -version = "0.9.0" +version = "0.9.1" dependencies = [ "anymap2", "api", @@ -2088,7 +2088,7 @@ dependencies = [ "serde_json", "serde_with", "session", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "strum 0.25.0", "table", @@ -2100,11 +2100,11 @@ dependencies = [ [[package]] name = "common-plugins" -version = "0.9.0" +version = "0.9.1" [[package]] name = "common-procedure" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-stream", "async-trait", @@ -2123,14 +2123,14 @@ dependencies = [ "serde", "serde_json", "smallvec", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", "uuid", ] [[package]] name = "common-procedure-test" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-trait", "common-procedure", @@ -2138,7 +2138,7 @@ dependencies = [ [[package]] name = "common-query" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -2153,7 +2153,7 @@ dependencies = [ "datafusion-expr", "datatypes", "serde", - "snafu 0.8.3", + "snafu 0.8.4", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "sqlparser_derive 0.1.1", "statrs", @@ -2163,7 +2163,7 @@ dependencies = [ [[package]] name = "common-recordbatch" -version = "0.9.0" +version = "0.9.1" dependencies = [ "arc-swap", "common-error", @@ -2176,13 +2176,13 @@ dependencies = [ "pin-project", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", ] [[package]] name = "common-runtime" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-trait", "common-error", @@ -2194,7 +2194,7 @@ dependencies = [ "paste", "prometheus", "serde", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", "tokio-metrics", "tokio-metrics-collector", @@ -2204,7 +2204,7 @@ dependencies = [ [[package]] name = "common-telemetry" -version = "0.9.0" +version = "0.9.1" dependencies = [ "atty", "backtrace", @@ -2231,7 +2231,7 @@ dependencies = [ [[package]] name = "common-test-util" -version = "0.9.0" +version = "0.9.1" dependencies = [ "client", "common-query", @@ -2243,7 +2243,7 @@ dependencies = [ [[package]] name = "common-time" -version = "0.9.0" +version = "0.9.1" dependencies = [ "arrow", "chrono", @@ -2254,12 +2254,12 @@ dependencies = [ "rand", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", ] [[package]] name = "common-version" -version = "0.9.0" +version = "0.9.1" dependencies = [ "build-data", "const_format", @@ -2270,7 +2270,7 @@ dependencies = [ [[package]] name = "common-wal" -version = "0.9.0" +version = "0.9.1" dependencies = [ "common-base", "common-error", @@ -2282,7 +2282,7 @@ dependencies = [ "serde", "serde_json", "serde_with", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", "toml 0.8.14", ] @@ -2764,7 +2764,7 @@ checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" [[package]] name = "datafusion" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "ahash 0.8.11", "arrow", @@ -2816,7 +2816,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "ahash 0.8.11", "arrow", @@ -2837,7 +2837,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "tokio", ] @@ -2845,7 +2845,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "arrow", "chrono", @@ -2865,7 +2865,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "ahash 0.8.11", "arrow", @@ -2882,7 +2882,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "arrow", "base64 0.22.1", @@ -2908,7 +2908,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "ahash 0.8.11", "arrow", @@ -2925,7 +2925,7 @@ dependencies = [ [[package]] name = "datafusion-functions-array" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "arrow", "arrow-array", @@ -2944,7 +2944,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "arrow", "async-trait", @@ -2962,7 +2962,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "ahash 0.8.11", "arrow", @@ -2992,7 +2992,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "arrow", "datafusion-common", @@ -3003,7 +3003,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "ahash 0.8.11", "arrow", @@ -3036,7 +3036,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "arrow", "arrow-array", @@ -3052,7 +3052,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "38.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=d7bda5c9b762426e81f144296deadc87e5f4a0b8#d7bda5c9b762426e81f144296deadc87e5f4a0b8" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=7823ef2f63663907edab46af0d51359900f608d6#7823ef2f63663907edab46af0d51359900f608d6" dependencies = [ "async-recursion", "chrono", @@ -3066,7 +3066,7 @@ dependencies = [ [[package]] name = "datanode" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arrow-flight", @@ -3113,9 +3113,9 @@ dependencies = [ "serde", "servers", "session", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", - "substrait 0.9.0", + "substrait 0.9.1", "table", "tokio", "toml 0.8.14", @@ -3124,7 +3124,7 @@ dependencies = [ [[package]] name = "datatypes" -version = "0.9.0" +version = "0.9.1" dependencies = [ "arrow", "arrow-array", @@ -3143,7 +3143,7 @@ dependencies = [ "paste", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", ] [[package]] @@ -3685,7 +3685,7 @@ dependencies = [ [[package]] name = "file-engine" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -3707,7 +3707,7 @@ dependencies = [ "object-store", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "table", "tokio", @@ -3781,7 +3781,7 @@ dependencies = [ [[package]] name = "flow" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arrow-schema", @@ -3832,10 +3832,10 @@ dependencies = [ "servers", "session", "smallvec", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "strum 0.25.0", - "substrait 0.9.0", + "substrait 0.9.1", "table", "tokio", "tonic 0.11.0", @@ -3882,7 +3882,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" [[package]] name = "frontend" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arc-swap", @@ -3930,7 +3930,7 @@ dependencies = [ "serde_json", "servers", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "store-api", @@ -4993,7 +4993,7 @@ dependencies = [ [[package]] name = "index" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-trait", "asynchronous-codec", @@ -5014,7 +5014,7 @@ dependencies = [ "regex", "regex-automata 0.4.7", "serde", - "snafu 0.8.3", + "snafu 0.8.4", "tantivy", "tantivy-jieba", "tempfile", @@ -5228,6 +5228,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -5763,7 +5772,7 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "log-store" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-stream", "async-trait", @@ -5781,6 +5790,7 @@ dependencies = [ "futures-util", "itertools 0.10.5", "lazy_static", + "pin-project", "prometheus", "protobuf", "protobuf-build", @@ -5790,7 +5800,7 @@ dependencies = [ "rskafka", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "tokio", "uuid", @@ -6070,7 +6080,7 @@ dependencies = [ [[package]] name = "meta-client" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -6085,7 +6095,7 @@ dependencies = [ "meta-srv", "rand", "serde", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", "tokio-stream", "tonic 0.11.0", @@ -6096,7 +6106,7 @@ dependencies = [ [[package]] name = "meta-srv" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -6138,7 +6148,7 @@ dependencies = [ "serde_json", "servers", "session", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "table", "tokio", @@ -6172,7 +6182,7 @@ dependencies = [ [[package]] name = "metric-engine" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "aquamarine", @@ -6194,7 +6204,7 @@ dependencies = [ "object-store", "prometheus", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "tokio", ] @@ -6263,7 +6273,7 @@ dependencies = [ [[package]] name = "mito2" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "aquamarine", @@ -6320,7 +6330,7 @@ dependencies = [ "serde_with", "session", "smallvec", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "strum 0.25.0", "table", @@ -6910,7 +6920,7 @@ dependencies = [ [[package]] name = "object-store" -version = "0.9.0" +version = "0.9.1" dependencies = [ "anyhow", "bytes", @@ -6967,9 +6977,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "opendal" -version = "0.47.2" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff159a2da374ef2d64848a6547943cf1af7d2ceada5ae77be175e1389aa07ae3" +checksum = "615d41187deea0ea7fab5b48e9afef6ae8fc742fdcfa248846ee3d92ff71e986" dependencies = [ "anyhow", "async-trait", @@ -6986,7 +6996,7 @@ dependencies = [ "md-5", "once_cell", "percent-encoding", - "quick-xml 0.31.0", + "quick-xml 0.36.1", "reqsign", "reqwest", "serde", @@ -7157,7 +7167,7 @@ dependencies = [ [[package]] name = "operator" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -7198,11 +7208,11 @@ dependencies = [ "regex", "serde_json", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "store-api", - "substrait 0.9.0", + "substrait 0.9.1", "table", "tokio", "tokio-util", @@ -7452,7 +7462,7 @@ dependencies = [ [[package]] name = "partition" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -7466,7 +7476,7 @@ dependencies = [ "itertools 0.10.5", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "store-api", @@ -7741,7 +7751,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pipeline" -version = "0.9.0" +version = "0.9.1" dependencies = [ "ahash 0.8.11", "api", @@ -7787,7 +7797,7 @@ dependencies = [ "serde", "serde_json", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "table", "tokio", @@ -7902,14 +7912,14 @@ dependencies = [ [[package]] name = "plugins" -version = "0.9.0" +version = "0.9.1" dependencies = [ "auth", "common-base", "datanode", "frontend", "meta-srv", - "snafu 0.8.3", + "snafu 0.8.4", ] [[package]] @@ -7931,11 +7941,11 @@ checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" [[package]] name = "postgres-protocol" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" +checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "byteorder", "bytes", "fallible-iterator", @@ -7949,9 +7959,9 @@ dependencies = [ [[package]] name = "postgres-types" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" +checksum = "02048d9e032fb3cc3413bbf7b83a15d84a5d419778e2628751896d856498eee9" dependencies = [ "array-init", "bytes", @@ -8171,7 +8181,7 @@ dependencies = [ [[package]] name = "promql" -version = "0.9.0" +version = "0.9.1" dependencies = [ "ahash 0.8.11", "async-trait", @@ -8191,7 +8201,7 @@ dependencies = [ "prost 0.12.6", "query", "session", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", ] @@ -8245,7 +8255,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", - "heck 0.5.0", + "heck 0.4.1", "itertools 0.12.1", "log", "multimap", @@ -8266,8 +8276,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bb182580f71dd070f88d01ce3de9f4da5021db7115d2e1c3605a754153b77c1" dependencies = [ "bytes", - "heck 0.5.0", - "itertools 0.12.1", + "heck 0.4.1", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -8311,7 +8321,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.66", @@ -8406,7 +8416,7 @@ dependencies = [ [[package]] name = "puffin" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-compression 0.4.11", "async-trait", @@ -8427,7 +8437,7 @@ dependencies = [ "serde", "serde_json", "sha2", - "snafu 0.8.3", + "snafu 0.8.4", "tokio", "tokio-util", "uuid", @@ -8528,7 +8538,7 @@ dependencies = [ [[package]] name = "query" -version = "0.9.0" +version = "0.9.1" dependencies = [ "ahash 0.8.11", "api", @@ -8584,14 +8594,14 @@ dependencies = [ "rand", "regex", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "statrs", "stats-cli", "store-api", "streaming-stats", - "substrait 0.9.0", + "substrait 0.9.1", "table", "tokio", "tokio-stream", @@ -8608,9 +8618,19 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.31.0" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +checksum = "86e446ed58cef1bbfe847bc2fda0e2e4ea9f0e57b90c507d4781292590d72a4e" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quick-xml" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a05e2e8efddfa51a84ca47cec303fac86c8541b686d37cac5efc0e094417bc" dependencies = [ "memchr", "serde", @@ -8886,9 +8906,9 @@ dependencies = [ [[package]] name = "reqsign" -version = "0.15.2" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70fe66d4cd0b5ed9b1abbfe639bf6baeaaf509f7da2d51b31111ba945be59286" +checksum = "03dd4ba7c3901dd43e6b8c7446a760d45bc1ea4301002e1a6fa48f97c3a796fa" dependencies = [ "anyhow", "async-trait", @@ -8904,7 +8924,7 @@ dependencies = [ "log", "once_cell", "percent-encoding", - "quick-xml 0.31.0", + "quick-xml 0.35.0", "rand", "reqwest", "rsa 0.9.6", @@ -9919,7 +9939,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "script" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arc-swap", @@ -9966,7 +9986,7 @@ dependencies = [ "serde", "servers", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "table", "tokio", @@ -10212,7 +10232,7 @@ dependencies = [ [[package]] name = "servers" -version = "0.9.0" +version = "0.9.1" dependencies = [ "aide", "api", @@ -10293,7 +10313,7 @@ dependencies = [ "serde", "serde_json", "session", - "snafu 0.8.3", + "snafu 0.8.4", "snap", "sql", "strum 0.25.0", @@ -10318,7 +10338,7 @@ dependencies = [ [[package]] name = "session" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arc-swap", @@ -10330,7 +10350,7 @@ dependencies = [ "common-time", "derive_builder 0.12.0", "meter-core", - "snafu 0.8.3", + "snafu 0.8.4", "sql", ] @@ -10379,9 +10399,9 @@ dependencies = [ [[package]] name = "shadow-rs" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a600f795d0894cda22235b44eea4b85c2a35b405f65523645ac8e35b306817a" +checksum = "66caf2de9b7e61293c00006cd2807d6c4e4b31018c5ea21d008f44f4852b93c3" dependencies = [ "const_format", "git2", @@ -10516,11 +10536,11 @@ dependencies = [ [[package]] name = "snafu" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418b8136fec49956eba89be7da2847ec1909df92a9ae4178b5ff0ff092c8d95e" +checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" dependencies = [ - "snafu-derive 0.8.3", + "snafu-derive 0.8.4", ] [[package]] @@ -10537,11 +10557,11 @@ dependencies = [ [[package]] name = "snafu-derive" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a4812a669da00d17d8266a0439eddcacbc88b17f732f927e52eeb9d196f7fb5" +checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.66", @@ -10619,7 +10639,7 @@ dependencies = [ [[package]] name = "sql" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "chrono", @@ -10643,7 +10663,7 @@ dependencies = [ "lazy_static", "regex", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "sqlparser_derive 0.1.1", "table", @@ -10679,7 +10699,7 @@ dependencies = [ [[package]] name = "sqlness-runner" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-trait", "clap 4.5.7", @@ -10896,7 +10916,7 @@ dependencies = [ [[package]] name = "store-api" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "aquamarine", @@ -10916,7 +10936,7 @@ dependencies = [ "futures", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "strum 0.25.0", "tokio", ] @@ -11065,7 +11085,7 @@ dependencies = [ [[package]] name = "substrait" -version = "0.9.0" +version = "0.9.1" dependencies = [ "async-trait", "bytes", @@ -11079,7 +11099,7 @@ dependencies = [ "datatypes", "promql", "prost 0.12.6", - "snafu 0.8.3", + "snafu 0.8.4", "substrait 0.37.3", "tokio", ] @@ -11266,7 +11286,7 @@ dependencies = [ [[package]] name = "table" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "async-trait", @@ -11296,7 +11316,7 @@ dependencies = [ "paste", "serde", "serde_json", - "snafu 0.8.3", + "snafu 0.8.4", "store-api", "tokio", "tokio-util", @@ -11531,7 +11551,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "tests-fuzz" -version = "0.9.0" +version = "0.9.1" dependencies = [ "arbitrary", "async-trait", @@ -11561,7 +11581,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "sqlx", @@ -11573,7 +11593,7 @@ dependencies = [ [[package]] name = "tests-integration" -version = "0.9.0" +version = "0.9.1" dependencies = [ "api", "arrow-flight", @@ -11629,11 +11649,11 @@ dependencies = [ "serde_json", "servers", "session", - "snafu 0.8.3", + "snafu 0.8.4", "sql", "sqlx", "store-api", - "substrait 0.9.0", + "substrait 0.9.1", "table", "tempfile", "time", @@ -11912,9 +11932,9 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" +checksum = "03adcf0147e203b6032c0b2d30be1415ba03bc348901f3ff1cc0df6a733e60c3" dependencies = [ "async-trait", "byteorder", @@ -13710,9 +13730,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.10.2" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" dependencies = [ "yoke", "zerofrom", @@ -13721,9 +13741,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.10.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 4dd7e493c95b..5d473de72226 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,7 +64,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.9.0" +version = "0.9.1" edition = "2021" license = "Apache-2.0" @@ -104,15 +104,15 @@ clap = { version = "4.4", features = ["derive"] } config = "0.13.0" crossbeam-utils = "0.8" dashmap = "5.4" -datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } -datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" } +datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } +datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" } derive_builder = "0.12" dotenv = "0.15" etcd-client = { version = "0.13" } @@ -159,6 +159,7 @@ schemars = "0.8" serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0", features = ["float_roundtrip"] } serde_with = "3" +shadow-rs = "0.31" smallvec = { version = "1", features = ["serde"] } snafu = "0.8" sysinfo = "0.30" diff --git a/config/config.md b/config/config.md index b2a96860ecf7..3fe696abac3c 100644 --- a/config/config.md +++ b/config/config.md @@ -145,7 +145,7 @@ | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | -| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. | | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | @@ -230,7 +230,7 @@ | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | -| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. | | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | @@ -292,7 +292,7 @@ | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | -| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. | | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | @@ -432,7 +432,7 @@ | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | -| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. | | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | @@ -477,7 +477,7 @@ | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. | | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. | | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. | -| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. | +| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. | | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. | | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.
Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
ratio > 1 are treated as 1. Fractions < 0 are treated as 0 | | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 97e4fae1d503..7754542c6238 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -493,8 +493,7 @@ level = "info" enable_otlp_tracing = false ## The OTLP tracing endpoint. -## +toml2docs:none-default -otlp_endpoint = "" +otlp_endpoint = "http://localhost:4317" ## Whether to append logs to stdout. append_stdout = true diff --git a/config/flownode.example.toml b/config/flownode.example.toml index 0f9ddea45740..443edb0303f5 100644 --- a/config/flownode.example.toml +++ b/config/flownode.example.toml @@ -70,8 +70,7 @@ level = "info" enable_otlp_tracing = false ## The OTLP tracing endpoint. -## +toml2docs:none-default -otlp_endpoint = "" +otlp_endpoint = "http://localhost:4317" ## Whether to append logs to stdout. append_stdout = true diff --git a/config/frontend.example.toml b/config/frontend.example.toml index 8f6a1c859ee4..d59719c8c1d2 100644 --- a/config/frontend.example.toml +++ b/config/frontend.example.toml @@ -177,8 +177,7 @@ level = "info" enable_otlp_tracing = false ## The OTLP tracing endpoint. -## +toml2docs:none-default -otlp_endpoint = "" +otlp_endpoint = "http://localhost:4317" ## Whether to append logs to stdout. append_stdout = true diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index 494e89a1c2f2..7a71d4a8cbcb 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -136,8 +136,7 @@ level = "info" enable_otlp_tracing = false ## The OTLP tracing endpoint. -## +toml2docs:none-default -otlp_endpoint = "" +otlp_endpoint = "http://localhost:4317" ## Whether to append logs to stdout. append_stdout = true diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 36a46e9ed9d0..73775b9fc1c4 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -522,8 +522,7 @@ level = "info" enable_otlp_tracing = false ## The OTLP tracing endpoint. -## +toml2docs:none-default -otlp_endpoint = "" +otlp_endpoint = "http://localhost:4317" ## Whether to append logs to stdout. append_stdout = true diff --git a/docker/docker-compose/cluster-with-etcd.yaml b/docker/docker-compose/cluster-with-etcd.yaml index 6491bdeac1fd..e7794662a8d0 100644 --- a/docker/docker-compose/cluster-with-etcd.yaml +++ b/docker/docker-compose/cluster-with-etcd.yaml @@ -1,9 +1,9 @@ x-custom: etcd_initial_cluster_token: &etcd_initial_cluster_token "--initial-cluster-token=etcd-cluster" etcd_common_settings: &etcd_common_settings - image: quay.io/coreos/etcd:v3.5.10 + image: "${ETCD_REGISTRY:-quay.io}/${ETCD_NAMESPACE:-coreos}/etcd:${ETCD_VERSION:-v3.5.10}" entrypoint: /usr/local/bin/etcd - greptimedb_image: &greptimedb_image docker.io/greptimedb/greptimedb:latest + greptimedb_image: &greptimedb_image "${GREPTIMEDB_REGISTRY:-docker.io}/${GREPTIMEDB_NAMESPACE:-greptime}/greptimedb:${GREPTIMEDB_VERSION:-latest}" services: etcd0: diff --git a/scripts/install.sh b/scripts/install.sh index 5740c565edca..4a2bd8415146 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -1,62 +1,72 @@ -#!/bin/sh +#!/usr/bin/env bash set -ue OS_TYPE= ARCH_TYPE= + +# Set the GitHub token to avoid GitHub API rate limit. +# You can run with `GITHUB_TOKEN`: +# GITHUB_TOKEN= ./scripts/install.sh +GITHUB_TOKEN=${GITHUB_TOKEN:-} + VERSION=${1:-latest} GITHUB_ORG=GreptimeTeam GITHUB_REPO=greptimedb BIN=greptime -get_os_type() { - os_type="$(uname -s)" +function get_os_type() { + os_type="$(uname -s)" - case "$os_type" in + case "$os_type" in Darwin) - OS_TYPE=darwin - ;; + OS_TYPE=darwin + ;; Linux) - OS_TYPE=linux - ;; + OS_TYPE=linux + ;; *) - echo "Error: Unknown OS type: $os_type" - exit 1 - esac + echo "Error: Unknown OS type: $os_type" + exit 1 + esac } -get_arch_type() { - arch_type="$(uname -m)" +function get_arch_type() { + arch_type="$(uname -m)" - case "$arch_type" in + case "$arch_type" in arm64) - ARCH_TYPE=arm64 - ;; + ARCH_TYPE=arm64 + ;; aarch64) - ARCH_TYPE=arm64 - ;; + ARCH_TYPE=arm64 + ;; x86_64) - ARCH_TYPE=amd64 - ;; + ARCH_TYPE=amd64 + ;; amd64) - ARCH_TYPE=amd64 - ;; + ARCH_TYPE=amd64 + ;; *) - echo "Error: Unknown CPU type: $arch_type" - exit 1 - esac + echo "Error: Unknown CPU type: $arch_type" + exit 1 + esac } -get_os_type -get_arch_type - -if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then - # Use the latest nightly version. +function download_artifact() { + if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then + # Use the latest stable released version. + # GitHub API reference: https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#get-the-latest-release. if [ "${VERSION}" = "latest" ]; then - VERSION=$(curl -s -XGET "https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/releases" | grep tag_name | grep nightly | cut -d: -f 2 | sed 's/.*"\(.*\)".*/\1/' | uniq | sort -r | head -n 1) - if [ -z "${VERSION}" ]; then - echo "Failed to get the latest version." - exit 1 + # To avoid other tools dependency, we choose to use `curl` to get the version metadata and parsed by `sed`. + VERSION=$(curl -sL \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + ${GITHUB_TOKEN:+-H "Authorization: Bearer $GITHUB_TOKEN"} \ + "https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/releases/latest" | sed -n 's/.*"tag_name": "\([^"]*\)".*/\1/p') + if [ -z "${VERSION}" ]; then + echo "Failed to get the latest stable released version." + exit 1 fi fi @@ -73,4 +83,9 @@ if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then rm -r "${PACKAGE_NAME%.tar.gz}" && \ echo "Run './${BIN} --help' to get started" fi -fi + fi +} + +get_os_type +get_arch_type +download_artifact diff --git a/src/client/src/flow.rs b/src/client/src/flow.rs index 3d3f7aa55707..de0d2c8b319e 100644 --- a/src/client/src/flow.rs +++ b/src/client/src/flow.rs @@ -16,7 +16,7 @@ use api::v1::flow::{FlowRequest, FlowResponse}; use api::v1::region::InsertRequests; use common_error::ext::BoxedError; use common_meta::node_manager::Flownode; -use snafu::{location, Location, ResultExt}; +use snafu::{location, ResultExt}; use crate::error::Result; use crate::Client; diff --git a/src/client/src/region.rs b/src/client/src/region.rs index cc91010aa0cb..b0c41084a40d 100644 --- a/src/client/src/region.rs +++ b/src/client/src/region.rs @@ -33,7 +33,7 @@ use common_telemetry::error; use common_telemetry::tracing_context::TracingContext; use prost::Message; use query::query_engine::DefaultSerializer; -use snafu::{location, Location, OptionExt, ResultExt}; +use snafu::{location, OptionExt, ResultExt}; use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; use tokio_stream::StreamExt; diff --git a/src/cmd/src/bin/greptime.rs b/src/cmd/src/bin/greptime.rs index 707d2daa05b5..f49cf2dff097 100644 --- a/src/cmd/src/bin/greptime.rs +++ b/src/cmd/src/bin/greptime.rs @@ -62,8 +62,37 @@ enum SubCommand { #[global_allocator] static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +#[cfg(debug_assertions)] +fn main() -> Result<()> { + use snafu::ResultExt; + // Set the stack size to 8MB for the thread so it wouldn't overflow on large stack usage in debug mode + // see https://github.com/GreptimeTeam/greptimedb/pull/4317 + // and https://github.com/rust-lang/rust/issues/34283 + std::thread::Builder::new() + .name("main_spawn".to_string()) + .stack_size(8 * 1024 * 1024) + .spawn(|| { + { + tokio::runtime::Builder::new_multi_thread() + .thread_stack_size(8 * 1024 * 1024) + .enable_all() + .build() + .expect("Failed building the Runtime") + .block_on(main_body()) + } + }) + .context(cmd::error::SpawnThreadSnafu)? + .join() + .expect("Couldn't join on the associated thread") +} + +#[cfg(not(debug_assertions))] #[tokio::main] async fn main() -> Result<()> { + main_body().await +} + +async fn main_body() -> Result<()> { setup_human_panic(); start(Command::parse()).await } diff --git a/src/cmd/src/cli/export.rs b/src/cmd/src/cli/export.rs index 171a0f2fa7fe..90699fae7746 100644 --- a/src/cmd/src/cli/export.rs +++ b/src/cmd/src/cli/export.rs @@ -22,7 +22,7 @@ use base64::Engine; use clap::{Parser, ValueEnum}; use client::DEFAULT_SCHEMA_NAME; use common_catalog::consts::DEFAULT_CATALOG_NAME; -use common_telemetry::{debug, error, info, warn}; +use common_telemetry::{debug, error, info}; use serde_json::Value; use servers::http::greptime_result_v1::GreptimedbV1Response; use servers::http::GreptimeQueryOutput; @@ -42,14 +42,13 @@ type TableReference = (String, String, String); #[derive(Debug, Default, Clone, ValueEnum)] enum ExportTarget { - /// Corresponding to `SHOW CREATE TABLE` + /// Export all table schemas, corresponding to `SHOW CREATE TABLE`. + Schema, + /// Export all table data, corresponding to `COPY DATABASE TO`. + Data, + /// Export all table schemas and data at once. #[default] - CreateTable, - /// Corresponding to `EXPORT TABLE` - #[deprecated(note = "Please use `DatabaseData` instead.")] - TableData, - /// Corresponding to `EXPORT DATABASE` - DatabaseData, + All, } #[derive(Debug, Default, Parser)] @@ -75,7 +74,7 @@ pub struct ExportCommand { max_retry: usize, /// Things to export - #[clap(long, short = 't', value_enum)] + #[clap(long, short = 't', value_enum, default_value = "all")] target: ExportTarget, /// A half-open time range: [start_time, end_time). @@ -178,7 +177,7 @@ impl Export { if let Some(schema) = &self.schema { Ok(vec![(self.catalog.clone(), schema.clone())]) } else { - let result = self.sql("show databases").await?; + let result = self.sql("SHOW DATABASES").await?; let Some(records) = result else { EmptyResultSnafu.fail()? }; @@ -205,9 +204,11 @@ impl Export { ) -> Result<(Vec, Vec)> { // Puts all metric table first let sql = format!( - "select table_catalog, table_schema, table_name from \ - information_schema.columns where column_name = '__tsid' \ - and table_catalog = \'{catalog}\' and table_schema = \'{schema}\'" + "SELECT table_catalog, table_schema, table_name \ + FROM information_schema.columns \ + WHERE column_name = '__tsid' \ + and table_catalog = \'{catalog}\' \ + and table_schema = \'{schema}\'" ); let result = self.sql(&sql).await?; let Some(records) = result else { @@ -227,9 +228,11 @@ impl Export { // TODO: SQL injection hurts let sql = format!( - "select table_catalog, table_schema, table_name from \ - information_schema.tables where table_type = \'BASE TABLE\' \ - and table_catalog = \'{catalog}\' and table_schema = \'{schema}\'", + "SELECT table_catalog, table_schema, table_name \ + FROM information_schema.tables \ + WHERE table_type = \'BASE TABLE\' \ + and table_catalog = \'{catalog}\' \ + and table_schema = \'{schema}\'", ); let result = self.sql(&sql).await?; let Some(records) = result else { @@ -266,7 +269,7 @@ impl Export { async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result { let sql = format!( - r#"show create table "{}"."{}"."{}""#, + r#"SHOW CREATE TABLE "{}"."{}"."{}""#, catalog, schema, table ); let result = self.sql(&sql).await?; @@ -341,99 +344,6 @@ impl Export { Ok(()) } - async fn export_table_data(&self) -> Result<()> { - let timer = Instant::now(); - let semaphore = Arc::new(Semaphore::new(self.parallelism)); - let db_names = self.iter_db_names().await?; - let db_count = db_names.len(); - let mut tasks = Vec::with_capacity(db_names.len()); - for (catalog, schema) in db_names { - let semaphore_moved = semaphore.clone(); - tasks.push(async move { - let _permit = semaphore_moved.acquire().await.unwrap(); - tokio::fs::create_dir_all(&self.output_dir) - .await - .context(FileIoSnafu)?; - let output_dir = Path::new(&self.output_dir).join(format!("{catalog}-{schema}/")); - // Ignores metric physical tables - let (metrics_tables, table_list) = self.get_table_list(&catalog, &schema).await?; - for (_, _, table_name) in metrics_tables { - warn!("Ignores metric physical table: {table_name}"); - } - for (catalog_name, schema_name, table_name) in table_list { - // copy table to - let sql = format!( - r#"Copy "{}"."{}"."{}" TO '{}{}.parquet' WITH (format='parquet');"#, - catalog_name, - schema_name, - table_name, - output_dir.to_str().unwrap(), - table_name, - ); - info!("Executing sql: {sql}"); - self.sql(&sql).await?; - } - info!("Finished exporting {catalog}.{schema} data"); - - // export copy from sql - let dir_filenames = match output_dir.read_dir() { - Ok(dir) => dir, - Err(_) => { - warn!("empty database {catalog}.{schema}"); - return Ok(()); - } - }; - - let copy_from_file = - Path::new(&self.output_dir).join(format!("{catalog}-{schema}_copy_from.sql")); - let mut writer = - BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?); - - for table_file in dir_filenames { - let table_file = table_file.unwrap(); - let table_name = table_file - .file_name() - .into_string() - .unwrap() - .replace(".parquet", ""); - - writer - .write( - format!( - "copy {} from '{}' with (format='parquet');\n", - table_name, - table_file.path().to_str().unwrap() - ) - .as_bytes(), - ) - .await - .context(FileIoSnafu)?; - } - writer.flush().await.context(FileIoSnafu)?; - - info!("finished exporting {catalog}.{schema} copy_from.sql"); - - Ok::<(), Error>(()) - }); - } - - let success = futures::future::join_all(tasks) - .await - .into_iter() - .filter(|r| match r { - Ok(_) => true, - Err(e) => { - error!(e; "export job failed"); - false - } - }) - .count(); - let elapsed = timer.elapsed(); - info!("Success {success}/{db_count} jobs, costs: {:?}", elapsed); - - Ok(()) - } - async fn export_database_data(&self) -> Result<()> { let timer = Instant::now(); let semaphore = Arc::new(Semaphore::new(self.parallelism)); @@ -530,9 +440,12 @@ impl Export { impl Tool for Export { async fn do_work(&self) -> Result<()> { match self.target { - ExportTarget::CreateTable => self.export_create_table().await, - ExportTarget::TableData => self.export_table_data().await, - ExportTarget::DatabaseData => self.export_database_data().await, + ExportTarget::Schema => self.export_create_table().await, + ExportTarget::Data => self.export_database_data().await, + ExportTarget::All => { + self.export_create_table().await?; + self.export_database_data().await + } } } } @@ -619,7 +532,7 @@ mod tests { "--output-dir", &*output_dir.path().to_string_lossy(), "--target", - "create-table", + "schema", ]); let mut cli_app = cli.build(LoggingOptions::default()).await?; cli_app.start().await?; diff --git a/src/cmd/src/error.rs b/src/cmd/src/error.rs index db347df9e5e6..66cc57c625c3 100644 --- a/src/cmd/src/error.rs +++ b/src/cmd/src/error.rs @@ -305,6 +305,12 @@ pub enum Error { error: std::io::Error, }, + #[snafu(display("Failed to spawn thread"))] + SpawnThread { + #[snafu(source)] + error: std::io::Error, + }, + #[snafu(display("Other error"))] Other { source: BoxedError, @@ -395,7 +401,9 @@ impl ErrorExt for Error { Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(), Error::StartCatalogManager { source, .. } => source.status_code(), - Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected, + Error::SerdeJson { .. } | Error::FileIo { .. } | Error::SpawnThread { .. } => { + StatusCode::Unexpected + } Error::Other { source, .. } => source.status_code(), diff --git a/src/cmd/src/flownode.rs b/src/cmd/src/flownode.rs index 328693f326fc..60ec6c6614c6 100644 --- a/src/cmd/src/flownode.rs +++ b/src/cmd/src/flownode.rs @@ -24,6 +24,7 @@ use common_grpc::channel_manager::ChannelConfig; use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder}; use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler; use common_meta::heartbeat::handler::HandlerGroupExecutor; +use common_meta::key::flow::FlowMetadataManager; use common_meta::key::TableMetadataManager; use common_telemetry::info; use common_telemetry::logging::TracingOptions; @@ -296,11 +297,13 @@ impl StartCommand { Arc::new(executor), ); + let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone())); let flownode_builder = FlownodeBuilder::new( opts, Plugins::new(), table_metadata_manager, catalog_manager.clone(), + flow_metadata_manager, ) .with_heartbeat_task(heartbeat_task); diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index ba25ab555f7c..efa360713ff9 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -476,11 +476,13 @@ impl StartCommand { .await .context(StartDatanodeSnafu)?; + let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone())); let flow_builder = FlownodeBuilder::new( Default::default(), plugins.clone(), table_metadata_manager.clone(), catalog_manager.clone(), + flow_metadata_manager.clone(), ); let flownode = Arc::new( flow_builder @@ -511,7 +513,6 @@ impl StartCommand { opts.wal.into(), kv_backend.clone(), )); - let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone())); let table_meta_allocator = Arc::new(TableMetadataAllocator::new( table_id_sequence, wal_options_allocator.clone(), diff --git a/src/cmd/tests/load_config_test.rs b/src/cmd/tests/load_config_test.rs index a6a632805951..8545b708cf6f 100644 --- a/src/cmd/tests/load_config_test.rs +++ b/src/cmd/tests/load_config_test.rs @@ -22,7 +22,7 @@ use common_grpc::channel_manager::{ DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE, }; use common_runtime::global::RuntimeOptions; -use common_telemetry::logging::LoggingOptions; +use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT}; use common_wal::config::raft_engine::RaftEngineConfig; use common_wal::config::DatanodeWalConfig; use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig}; @@ -88,7 +88,7 @@ fn test_load_datanode_example_config() { ], logging: LoggingOptions { level: Some("info".to_string()), - otlp_endpoint: Some("".to_string()), + otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()), tracing_sample_ratio: Some(Default::default()), ..Default::default() }, @@ -136,7 +136,7 @@ fn test_load_frontend_example_config() { }), logging: LoggingOptions { level: Some("info".to_string()), - otlp_endpoint: Some("".to_string()), + otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()), tracing_sample_ratio: Some(Default::default()), ..Default::default() }, @@ -174,7 +174,7 @@ fn test_load_metasrv_example_config() { logging: LoggingOptions { dir: "/tmp/greptimedb/logs".to_string(), level: Some("info".to_string()), - otlp_endpoint: Some("".to_string()), + otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()), tracing_sample_ratio: Some(Default::default()), ..Default::default() }, @@ -228,7 +228,7 @@ fn test_load_standalone_example_config() { }, logging: LoggingOptions { level: Some("info".to_string()), - otlp_endpoint: Some("".to_string()), + otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()), tracing_sample_ratio: Some(Default::default()), ..Default::default() }, diff --git a/src/common/datasource/src/object_store/fs.rs b/src/common/datasource/src/object_store/fs.rs index 16e30b0044c0..f21fe46099d5 100644 --- a/src/common/datasource/src/object_store/fs.rs +++ b/src/common/datasource/src/object_store/fs.rs @@ -19,9 +19,8 @@ use snafu::ResultExt; use crate::error::{BuildBackendSnafu, Result}; pub fn build_fs_backend(root: &str) -> Result { - let mut builder = Fs::default(); - let _ = builder.root(root); - let object_store = ObjectStore::new(builder) + let builder = Fs::default(); + let object_store = ObjectStore::new(builder.root(root)) .context(BuildBackendSnafu)? .layer( object_store::layers::LoggingLayer::default() diff --git a/src/common/datasource/src/object_store/s3.rs b/src/common/datasource/src/object_store/s3.rs index 6efc6474c45d..cdba93767745 100644 --- a/src/common/datasource/src/object_store/s3.rs +++ b/src/common/datasource/src/object_store/s3.rs @@ -44,28 +44,26 @@ pub fn build_s3_backend( path: &str, connection: &HashMap, ) -> Result { - let mut builder = S3::default(); - - let _ = builder.root(path).bucket(host); + let mut builder = S3::default().root(path).bucket(host); if let Some(endpoint) = connection.get(ENDPOINT) { - let _ = builder.endpoint(endpoint); + builder = builder.endpoint(endpoint); } if let Some(region) = connection.get(REGION) { - let _ = builder.region(region); + builder = builder.region(region); } if let Some(key_id) = connection.get(ACCESS_KEY_ID) { - let _ = builder.access_key_id(key_id); + builder = builder.access_key_id(key_id); } if let Some(key) = connection.get(SECRET_ACCESS_KEY) { - let _ = builder.secret_access_key(key); + builder = builder.secret_access_key(key); } if let Some(session_token) = connection.get(SESSION_TOKEN) { - let _ = builder.security_token(session_token); + builder = builder.session_token(session_token); } if let Some(enable_str) = connection.get(ENABLE_VIRTUAL_HOST_STYLE) { @@ -79,7 +77,7 @@ pub fn build_s3_backend( .build() })?; if enable { - let _ = builder.enable_virtual_host_style(); + builder = builder.enable_virtual_host_style(); } } diff --git a/src/common/datasource/src/test_util.rs b/src/common/datasource/src/test_util.rs index d3a24a23d24a..64fa41ad62ec 100644 --- a/src/common/datasource/src/test_util.rs +++ b/src/common/datasource/src/test_util.rs @@ -47,19 +47,15 @@ pub fn format_schema(schema: Schema) -> Vec { } pub fn test_store(root: &str) -> ObjectStore { - let mut builder = Fs::default(); - let _ = builder.root(root); - - ObjectStore::new(builder).unwrap().finish() + let builder = Fs::default(); + ObjectStore::new(builder.root(root)).unwrap().finish() } pub fn test_tmp_store(root: &str) -> (ObjectStore, TempDir) { let dir = create_temp_dir(root); - let mut builder = Fs::default(); - let _ = builder.root("/"); - - (ObjectStore::new(builder).unwrap().finish(), dir) + let builder = Fs::default(); + (ObjectStore::new(builder.root("/")).unwrap().finish(), dir) } pub fn test_basic_schema() -> SchemaRef { diff --git a/src/common/function/src/table/flush_compact_table.rs b/src/common/function/src/table/flush_compact_table.rs index 5adfc2510824..fe2220b127ee 100644 --- a/src/common/function/src/table/flush_compact_table.rs +++ b/src/common/function/src/table/flush_compact_table.rs @@ -33,6 +33,8 @@ use crate::handlers::TableMutationHandlerRef; /// Compact type: strict window. const COMPACT_TYPE_STRICT_WINDOW: &str = "strict_window"; +/// Compact type: strict window (short name). +const COMPACT_TYPE_STRICT_WINDOW_SHORT: &str = "swcs"; #[admin_fn( name = FlushTableFunction, @@ -168,8 +170,12 @@ fn parse_compact_params( }) } +/// Parses compaction strategy type. For `strict_window` or `swcs` strict window compaction is chose, +/// otherwise choose regular (TWCS) compaction. fn parse_compact_type(type_str: &str, option: Option<&str>) -> Result { - if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW) { + if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW) + | type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW_SHORT) + { let window_seconds = option .map(|v| { i64::from_str(v).map_err(|_| { @@ -350,6 +356,17 @@ mod tests { compact_options: Options::Regular(Default::default()), }, ), + ( + &["table", "swcs", "120"], + CompactTableRequest { + catalog_name: DEFAULT_CATALOG_NAME.to_string(), + schema_name: DEFAULT_SCHEMA_NAME.to_string(), + table_name: "table".to_string(), + compact_options: Options::StrictWindow(StrictWindow { + window_seconds: 120, + }), + }, + ), ]); assert!(parse_compact_params( diff --git a/src/common/meta/src/ddl/drop_table.rs b/src/common/meta/src/ddl/drop_table.rs index e2a7adf3cc0e..d1110cf05958 100644 --- a/src/common/meta/src/ddl/drop_table.rs +++ b/src/common/meta/src/ddl/drop_table.rs @@ -227,7 +227,7 @@ impl Procedure for DropTableProcedure { } fn rollback_supported(&self) -> bool { - !matches!(self.data.state, DropTableState::Prepare) + !matches!(self.data.state, DropTableState::Prepare) && self.data.allow_rollback } async fn rollback(&mut self, _: &ProcedureContext) -> ProcedureResult<()> { @@ -256,6 +256,8 @@ pub struct DropTableData { pub task: DropTableTask, pub physical_region_routes: Vec, pub physical_table_id: Option, + #[serde(default)] + pub allow_rollback: bool, } impl DropTableData { @@ -266,6 +268,7 @@ impl DropTableData { task, physical_region_routes: vec![], physical_table_id: None, + allow_rollback: false, } } diff --git a/src/common/meta/src/ddl/drop_table/metadata.rs b/src/common/meta/src/ddl/drop_table/metadata.rs index 52d82a003c2c..5e182720fe87 100644 --- a/src/common/meta/src/ddl/drop_table/metadata.rs +++ b/src/common/meta/src/ddl/drop_table/metadata.rs @@ -12,8 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_catalog::format_full_table_name; +use snafu::OptionExt; +use store_api::metric_engine_consts::METRIC_ENGINE_NAME; + use crate::ddl::drop_table::DropTableProcedure; -use crate::error::Result; +use crate::error::{self, Result}; impl DropTableProcedure { /// Fetches the table info and physical table route. @@ -29,6 +33,23 @@ impl DropTableProcedure { self.data.physical_region_routes = physical_table_route_value.region_routes; self.data.physical_table_id = Some(physical_table_id); + if physical_table_id == self.data.table_id() { + let table_info_value = self + .context + .table_metadata_manager + .table_info_manager() + .get(task.table_id) + .await? + .with_context(|| error::TableInfoNotFoundSnafu { + table: format_full_table_name(&task.catalog, &task.schema, &task.table), + })? + .into_inner(); + + let engine = table_info_value.table_info.meta.engine; + // rollback only if dropping the metric physical table fails + self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME + } + Ok(()) } } diff --git a/src/common/meta/src/ddl/test_util.rs b/src/common/meta/src/ddl/test_util.rs index 22a920346190..3a82f644e4fd 100644 --- a/src/common/meta/src/ddl/test_util.rs +++ b/src/common/meta/src/ddl/test_util.rs @@ -23,6 +23,7 @@ use std::collections::HashMap; use api::v1::meta::Partition; use api::v1::{ColumnDataType, SemanticType}; use common_procedure::Status; +use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME}; use table::metadata::{RawTableInfo, TableId}; use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure; @@ -130,6 +131,11 @@ pub fn test_create_logical_table_task(name: &str) -> CreateTableTask { .time_index("ts") .primary_keys(["host".into()]) .table_name(name) + .engine(METRIC_ENGINE_NAME) + .table_options(HashMap::from([( + LOGICAL_TABLE_METADATA_KEY.to_string(), + "phy".to_string(), + )])) .build() .unwrap() .into(); @@ -166,6 +172,7 @@ pub fn test_create_physical_table_task(name: &str) -> CreateTableTask { .time_index("ts") .primary_keys(["value".into()]) .table_name(name) + .engine(METRIC_ENGINE_NAME) .build() .unwrap() .into(); diff --git a/src/common/meta/src/ddl/test_util/create_table.rs b/src/common/meta/src/ddl/test_util/create_table.rs index 15f55dca2f0c..12896fbf915b 100644 --- a/src/common/meta/src/ddl/test_util/create_table.rs +++ b/src/common/meta/src/ddl/test_util/create_table.rs @@ -127,7 +127,7 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo { engine: expr.engine.to_string(), next_column_id: expr.column_defs.len() as u32, region_numbers: vec![], - options: TableOptions::default(), + options: TableOptions::try_from_iter(&expr.table_options).unwrap(), created_on: DateTime::default(), partition_key_indices: vec![], }, diff --git a/src/common/meta/src/ddl/tests/drop_table.rs b/src/common/meta/src/ddl/tests/drop_table.rs index fd34e2646348..aff123747223 100644 --- a/src/common/meta/src/ddl/tests/drop_table.rs +++ b/src/common/meta/src/ddl/tests/drop_table.rs @@ -91,6 +91,7 @@ async fn test_on_prepare_table() { // Drop if exists let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone()); procedure.on_prepare().await.unwrap(); + assert!(!procedure.rollback_supported()); let task = new_drop_table_task(table_name, table_id, false); // Drop table @@ -224,9 +225,12 @@ async fn test_on_rollback() { let task = new_drop_table_task("phy_table", physical_table_id, false); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone()); procedure.on_prepare().await.unwrap(); + assert!(procedure.rollback_supported()); procedure.on_delete_metadata().await.unwrap(); + assert!(procedure.rollback_supported()); procedure.rollback(&ctx).await.unwrap(); // Rollback again + assert!(procedure.rollback_supported()); procedure.rollback(&ctx).await.unwrap(); let kvs = kv_backend.dump(); assert_eq!(kvs, expected_kvs); @@ -236,12 +240,7 @@ async fn test_on_rollback() { let task = new_drop_table_task("foo", table_ids[0], false); let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone()); procedure.on_prepare().await.unwrap(); - procedure.on_delete_metadata().await.unwrap(); - procedure.rollback(&ctx).await.unwrap(); - // Rollback again - procedure.rollback(&ctx).await.unwrap(); - let kvs = kv_backend.dump(); - assert_eq!(kvs, expected_kvs); + assert!(!procedure.rollback_supported()); } fn new_drop_table_task(table_name: &str, table_id: TableId, drop_if_exists: bool) -> DropTableTask { diff --git a/src/common/meta/src/ddl/utils.rs b/src/common/meta/src/ddl/utils.rs index 7f4b279c39e0..1b74d3384a7d 100644 --- a/src/common/meta/src/ddl/utils.rs +++ b/src/common/meta/src/ddl/utils.rs @@ -15,7 +15,7 @@ use common_catalog::consts::METRIC_ENGINE; use common_error::ext::BoxedError; use common_procedure::error::Error as ProcedureError; -use snafu::{ensure, location, Location, OptionExt}; +use snafu::{ensure, location, OptionExt}; use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY; use table::metadata::TableId; diff --git a/src/common/procedure/src/local.rs b/src/common/procedure/src/local.rs index 54ae88a5cf50..574fb612b246 100644 --- a/src/common/procedure/src/local.rs +++ b/src/common/procedure/src/local.rs @@ -680,9 +680,8 @@ pub(crate) mod test_util { pub(crate) fn new_object_store(dir: &TempDir) -> ObjectStore { let store_dir = dir.path().to_str().unwrap(); - let mut builder = Builder::default(); - let _ = builder.root(store_dir); - ObjectStore::new(builder).unwrap().finish() + let builder = Builder::default(); + ObjectStore::new(builder.root(store_dir)).unwrap().finish() } } diff --git a/src/common/procedure/src/store.rs b/src/common/procedure/src/store.rs index 22e5043d306a..4bc5f8ca814e 100644 --- a/src/common/procedure/src/store.rs +++ b/src/common/procedure/src/store.rs @@ -361,8 +361,7 @@ mod tests { fn procedure_store_for_test(dir: &TempDir) -> ProcedureStore { let store_dir = dir.path().to_str().unwrap(); - let mut builder = Builder::default(); - let _ = builder.root(store_dir); + let builder = Builder::default().root(store_dir); let object_store = ObjectStore::new(builder).unwrap().finish(); ProcedureStore::from_object_store(object_store) diff --git a/src/common/procedure/src/store/state_store.rs b/src/common/procedure/src/store/state_store.rs index 096ef84b125d..4f119739545f 100644 --- a/src/common/procedure/src/store/state_store.rs +++ b/src/common/procedure/src/store/state_store.rs @@ -220,8 +220,7 @@ mod tests { async fn test_object_state_store() { let dir = create_temp_dir("state_store"); let store_dir = dir.path().to_str().unwrap(); - let mut builder = Builder::default(); - let _ = builder.root(store_dir); + let builder = Builder::default().root(store_dir); let object_store = ObjectStore::new(builder).unwrap().finish(); let state_store = ObjectStateStore::new(object_store); @@ -291,8 +290,7 @@ mod tests { async fn test_object_state_store_delete() { let dir = create_temp_dir("state_store_list"); let store_dir = dir.path().to_str().unwrap(); - let mut builder = Builder::default(); - let _ = builder.root(store_dir); + let builder = Builder::default().root(store_dir); let object_store = ObjectStore::new(builder).unwrap().finish(); let state_store = ObjectStateStore::new(object_store); diff --git a/src/common/query/src/error.rs b/src/common/query/src/error.rs index b8adeeba5c90..a41ab6df1127 100644 --- a/src/common/query/src/error.rs +++ b/src/common/query/src/error.rs @@ -155,13 +155,6 @@ pub enum Error { source: DataTypeError, }, - #[snafu(display("Failed to execute physical plan"))] - ExecutePhysicalPlan { - #[snafu(implicit)] - location: Location, - source: BoxedError, - }, - #[snafu(display("Failed to cast array to {:?}", typ))] TypeCast { #[snafu(source)] @@ -308,7 +301,6 @@ impl ErrorExt for Error { Error::DecodePlan { source, .. } | Error::Execute { source, .. } - | Error::ExecutePhysicalPlan { source, .. } | Error::ProcedureService { source, .. } | Error::TableMutation { source, .. } => source.status_code(), diff --git a/src/common/telemetry/src/logging.rs b/src/common/telemetry/src/logging.rs index 4088c5236ca8..0a60e35a0bdf 100644 --- a/src/common/telemetry/src/logging.rs +++ b/src/common/telemetry/src/logging.rs @@ -33,7 +33,7 @@ use tracing_subscriber::{filter, EnvFilter, Registry}; use crate::tracing_sampler::{create_sampler, TracingSampleOptions}; -const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317"; +pub const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317"; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(default)] diff --git a/src/common/version/Cargo.toml b/src/common/version/Cargo.toml index 6d602cabfe0b..830f5a757f39 100644 --- a/src/common/version/Cargo.toml +++ b/src/common/version/Cargo.toml @@ -14,8 +14,8 @@ codec = ["dep:serde", "dep:schemars"] const_format = "0.2" schemars = { workspace = true, optional = true } serde = { workspace = true, optional = true } -shadow-rs = "0.29" +shadow-rs.workspace = true [build-dependencies] build-data = "0.2" -shadow-rs = "0.29" +shadow-rs.workspace = true diff --git a/src/common/version/build.rs b/src/common/version/build.rs index eeb383771864..9a57b42afa9d 100644 --- a/src/common/version/build.rs +++ b/src/common/version/build.rs @@ -12,7 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeSet; +use std::env; + use build_data::{format_timestamp, get_source_time}; +use shadow_rs::{CARGO_METADATA, CARGO_TREE}; fn main() -> shadow_rs::SdResult<()> { println!("cargo:rerun-if-changed=.git/refs/heads"); @@ -25,5 +29,16 @@ fn main() -> shadow_rs::SdResult<()> { } ); build_data::set_BUILD_TIMESTAMP(); - shadow_rs::new() + + // The "CARGO_WORKSPACE_DIR" is set manually (not by Rust itself) in Cargo config file, to + // solve the problem where the "CARGO_MANIFEST_DIR" is not what we want when this repo is + // made as a submodule in another repo. + let src_path = env::var("CARGO_WORKSPACE_DIR").or_else(|_| env::var("CARGO_MANIFEST_DIR"))?; + let out_path = env::var("OUT_DIR")?; + let _ = shadow_rs::Shadow::build_with( + src_path, + out_path, + BTreeSet::from([CARGO_METADATA, CARGO_TREE]), + )?; + Ok(()) } diff --git a/src/datanode/src/store.rs b/src/datanode/src/store.rs index 3260b2f762ca..877f044974bb 100644 --- a/src/datanode/src/store.rs +++ b/src/datanode/src/store.rs @@ -112,11 +112,11 @@ async fn create_object_store_with_cache( let atomic_temp_dir = join_dir(path, ".tmp/"); clean_temp_dir(&atomic_temp_dir)?; - let cache_store = { - let mut builder = Fs::default(); - builder.root(path).atomic_write_dir(&atomic_temp_dir); - builder.build().context(error::InitBackendSnafu)? - }; + let cache_store = Fs::default() + .root(path) + .atomic_write_dir(&atomic_temp_dir) + .build() + .context(error::InitBackendSnafu)?; let cache_layer = LruCacheLayer::new(Arc::new(cache_store), cache_capacity.0 as usize) .await diff --git a/src/datanode/src/store/azblob.rs b/src/datanode/src/store/azblob.rs index 156c2897ec9d..ca7a5023a90e 100644 --- a/src/datanode/src/store/azblob.rs +++ b/src/datanode/src/store/azblob.rs @@ -30,8 +30,7 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res azblob_config.container, &root ); - let mut builder = Azblob::default(); - let _ = builder + let mut builder = Azblob::default() .root(&root) .container(&azblob_config.container) .endpoint(&azblob_config.endpoint) @@ -40,8 +39,8 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res .http_client(build_http_client()?); if let Some(token) = &azblob_config.sas_token { - let _ = builder.sas_token(token); - } + builder = builder.sas_token(token); + }; Ok(ObjectStore::new(builder) .context(error::InitBackendSnafu)? diff --git a/src/datanode/src/store/fs.rs b/src/datanode/src/store/fs.rs index 607598841261..119a5e1bf867 100644 --- a/src/datanode/src/store/fs.rs +++ b/src/datanode/src/store/fs.rs @@ -35,8 +35,9 @@ pub(crate) async fn new_fs_object_store( let atomic_write_dir = join_dir(data_home, ".tmp/"); store::clean_temp_dir(&atomic_write_dir)?; - let mut builder = Fs::default(); - let _ = builder.root(data_home).atomic_write_dir(&atomic_write_dir); + let builder = Fs::default() + .root(data_home) + .atomic_write_dir(&atomic_write_dir); let object_store = ObjectStore::new(builder) .context(error::InitBackendSnafu)? diff --git a/src/datanode/src/store/gcs.rs b/src/datanode/src/store/gcs.rs index e0a8c6a315a7..6c68ad3baccc 100644 --- a/src/datanode/src/store/gcs.rs +++ b/src/datanode/src/store/gcs.rs @@ -29,8 +29,7 @@ pub(crate) async fn new_gcs_object_store(gcs_config: &GcsConfig) -> Result Result Result Result (TempDir, ObjectStore) { let dir = create_temp_dir(prefix); let store_dir = dir.path().to_string_lossy(); - let mut builder = Fs::default(); - let _ = builder.root(&store_dir); + let builder = Fs::default().root(&store_dir); (dir, ObjectStore::new(builder).unwrap().finish()) } diff --git a/src/flow/src/error.rs b/src/flow/src/error.rs index 3b8877ed86dd..8b4f3adc65d2 100644 --- a/src/flow/src/error.rs +++ b/src/flow/src/error.rs @@ -83,6 +83,14 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to list flows in flownode={id:?}"))] + ListFlows { + id: Option, + source: common_meta::error::Error, + #[snafu(implicit)] + location: Location, + }, + #[snafu(display("Flow already exist, id={id}"))] FlowAlreadyExist { id: FlowId, @@ -214,7 +222,8 @@ impl ErrorExt for Error { } Self::TableNotFound { .. } | Self::TableNotFoundMeta { .. } - | Self::FlowNotFound { .. } => StatusCode::TableNotFound, + | Self::FlowNotFound { .. } + | Self::ListFlows { .. } => StatusCode::TableNotFound, Self::InvalidQueryProst { .. } | &Self::InvalidQuery { .. } | &Self::Plan { .. } diff --git a/src/flow/src/server.rs b/src/flow/src/server.rs index a8c850349fdd..d470eb0ad8e7 100644 --- a/src/flow/src/server.rs +++ b/src/flow/src/server.rs @@ -29,12 +29,13 @@ use common_meta::cache::{ }; use common_meta::ddl::{table_meta, ProcedureExecutorRef}; use common_meta::heartbeat::handler::HandlerGroupExecutor; +use common_meta::key::flow::FlowMetadataManagerRef; use common_meta::key::TableMetadataManagerRef; use common_meta::kv_backend::KvBackendRef; use common_meta::node_manager::{self, Flownode, NodeManagerRef}; use common_query::Output; use common_telemetry::tracing::info; -use futures::FutureExt; +use futures::{FutureExt, StreamExt, TryStreamExt}; use greptime_proto::v1::flow::{flow_server, FlowRequest, FlowResponse, InsertRequests}; use itertools::Itertools; use meta_client::client::MetaClient; @@ -47,7 +48,7 @@ use serde::de::Unexpected; use servers::error::{AlreadyStartedSnafu, StartGrpcSnafu, TcpBindSnafu, TcpIncomingSnafu}; use servers::heartbeat_options::HeartbeatOptions; use servers::server::Server; -use session::context::QueryContextRef; +use session::context::{QueryContext, QueryContextBuilder, QueryContextRef}; use snafu::{ensure, OptionExt, ResultExt}; use tokio::net::TcpListener; use tokio::sync::{broadcast, oneshot, Mutex}; @@ -57,7 +58,8 @@ use tonic::{Request, Response, Status}; use crate::adapter::FlowWorkerManagerRef; use crate::error::{ - CacheRequiredSnafu, ParseAddrSnafu, ShutdownServerSnafu, StartServerSnafu, UnexpectedSnafu, + CacheRequiredSnafu, ExternalSnafu, FlowNotFoundSnafu, ListFlowsSnafu, ParseAddrSnafu, + ShutdownServerSnafu, StartServerSnafu, UnexpectedSnafu, }; use crate::heartbeat::HeartbeatTask; use crate::transform::register_function_to_query_engine; @@ -240,6 +242,7 @@ pub struct FlownodeBuilder { plugins: Plugins, table_meta: TableMetadataManagerRef, catalog_manager: CatalogManagerRef, + flow_metadata_manager: FlowMetadataManagerRef, heartbeat_task: Option, } @@ -250,12 +253,14 @@ impl FlownodeBuilder { plugins: Plugins, table_meta: TableMetadataManagerRef, catalog_manager: CatalogManagerRef, + flow_metadata_manager: FlowMetadataManagerRef, ) -> Self { Self { opts, plugins, table_meta, catalog_manager, + flow_metadata_manager, heartbeat_task: None, } } @@ -283,6 +288,11 @@ impl FlownodeBuilder { self.build_manager(query_engine_factory.query_engine()) .await?, ); + + if let Err(err) = self.recover_flows(&manager).await { + common_telemetry::error!(err; "Failed to recover flows"); + } + let server = FlownodeServer::new(FlowService::new(manager.clone())); let heartbeat_task = self.heartbeat_task; @@ -296,6 +306,85 @@ impl FlownodeBuilder { Ok(instance) } + /// recover all flow tasks in this flownode in distributed mode(nodeid is Some()) + /// + /// or recover all existing flow tasks if in standalone mode(nodeid is None) + /// + /// TODO(discord9): persisent flow tasks with internal state + async fn recover_flows(&self, manager: &FlowWorkerManagerRef) -> Result { + let nodeid = self.opts.node_id; + let to_be_recovered: Vec<_> = if let Some(nodeid) = nodeid { + let to_be_recover = self + .flow_metadata_manager + .flownode_flow_manager() + .flows(nodeid) + .try_collect::>() + .await + .context(ListFlowsSnafu { id: Some(nodeid) })?; + to_be_recover.into_iter().map(|(id, _)| id).collect() + } else { + let all_catalogs = self + .catalog_manager + .catalog_names() + .await + .map_err(BoxedError::new) + .context(ExternalSnafu)?; + let mut all_flow_ids = vec![]; + for catalog in all_catalogs { + let flows = self + .flow_metadata_manager + .flow_name_manager() + .flow_names(&catalog) + .await + .try_collect::>() + .await + .map_err(BoxedError::new) + .context(ExternalSnafu)?; + + all_flow_ids.extend(flows.into_iter().map(|(_, id)| id.flow_id())); + } + all_flow_ids + }; + let cnt = to_be_recovered.len(); + + // TODO(discord9): recover in parallel + for flow_id in to_be_recovered { + let info = self + .flow_metadata_manager + .flow_info_manager() + .get(flow_id) + .await + .map_err(BoxedError::new) + .context(ExternalSnafu)? + .context(FlowNotFoundSnafu { id: flow_id })?; + + let sink_table_name = [ + info.sink_table_name().catalog_name.clone(), + info.sink_table_name().schema_name.clone(), + info.sink_table_name().table_name.clone(), + ]; + manager + .create_flow( + flow_id as _, + sink_table_name, + info.source_table_ids(), + true, + info.expire_after(), + Some(info.comment().clone()), + info.raw_sql().clone(), + info.options().clone(), + Some( + QueryContextBuilder::default() + .current_catalog(info.catalog_name().clone()) + .build(), + ), + ) + .await?; + } + + Ok(cnt) + } + /// build [`FlowWorkerManager`], note this doesn't take ownership of `self`, /// nor does it actually start running the worker. async fn build_manager( diff --git a/src/index/src/inverted_index/create/sort/intermediate_rw/codec_v1.rs b/src/index/src/inverted_index/create/sort/intermediate_rw/codec_v1.rs index bb1781a74365..9cf6b14e0d65 100644 --- a/src/index/src/inverted_index/create/sort/intermediate_rw/codec_v1.rs +++ b/src/index/src/inverted_index/create/sort/intermediate_rw/codec_v1.rs @@ -17,7 +17,7 @@ use std::io; use asynchronous_codec::{BytesMut, Decoder, Encoder}; use bytes::{Buf, BufMut}; use common_base::BitVec; -use snafu::{location, Location}; +use snafu::location; use crate::inverted_index::error::{Error, Result}; use crate::inverted_index::Bytes; diff --git a/src/log-store/Cargo.toml b/src/log-store/Cargo.toml index 7d324d81ef09..e599e0334995 100644 --- a/src/log-store/Cargo.toml +++ b/src/log-store/Cargo.toml @@ -26,7 +26,9 @@ common-time.workspace = true common-wal.workspace = true futures.workspace = true futures-util.workspace = true +itertools.workspace = true lazy_static.workspace = true +pin-project.workspace = true prometheus.workspace = true protobuf = { version = "2", features = ["bytes"] } raft-engine.workspace = true diff --git a/src/log-store/src/kafka.rs b/src/log-store/src/kafka.rs index a1cb2dc1b18b..21c5a397c03d 100644 --- a/src/log-store/src/kafka.rs +++ b/src/log-store/src/kafka.rs @@ -13,6 +13,9 @@ // limitations under the License. pub(crate) mod client_manager; +pub(crate) mod consumer; +#[allow(unused)] +pub(crate) mod index; pub mod log_store; pub(crate) mod producer; pub(crate) mod util; diff --git a/src/log-store/src/kafka/consumer.rs b/src/log-store/src/kafka/consumer.rs new file mode 100644 index 000000000000..70fa5e848274 --- /dev/null +++ b/src/log-store/src/kafka/consumer.rs @@ -0,0 +1,380 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::VecDeque; +use std::ops::Range; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use common_telemetry::debug; +use futures::future::{BoxFuture, Fuse, FusedFuture}; +use futures::{FutureExt, Stream}; +use pin_project::pin_project; +use rskafka::client::partition::PartitionClient; +use rskafka::record::RecordAndOffset; + +use super::index::{NextBatchHint, RegionWalIndexIterator}; + +#[async_trait::async_trait] +pub trait FetchClient: std::fmt::Debug + Send + Sync { + /// Fetch records. + /// + /// Arguments are identical to [`PartitionClient::fetch_records`]. + async fn fetch_records( + &self, + offset: i64, + bytes: Range, + max_wait_ms: i32, + ) -> rskafka::client::error::Result<(Vec, i64)>; +} + +#[async_trait::async_trait] +impl FetchClient for PartitionClient { + async fn fetch_records( + &self, + offset: i64, + bytes: Range, + max_wait_ms: i32, + ) -> rskafka::client::error::Result<(Vec, i64)> { + self.fetch_records(offset, bytes, max_wait_ms).await + } +} + +struct FetchResult { + records_and_offsets: Vec, + batch_size: usize, + fetch_bytes: i32, + watermark: i64, + used_offset: i64, +} + +/// The [`Consumer`] struct represents a Kafka consumer that fetches messages from +/// a Kafka cluster. Yielding records respecting the [`RegionWalIndexIterator`]. +#[pin_project] +pub struct Consumer { + last_high_watermark: i64, + + /// The client is used to fetch records from kafka topic. + client: Arc, + + /// The max batch size in a single fetch request. + max_batch_size: usize, + + /// The max wait milliseconds. + max_wait_ms: u32, + + /// The avg record size + avg_record_size: usize, + + /// Termination flag + terminated: bool, + + /// The buffer of records. + buffer: RecordsBuffer, + + /// The fetch future. + fetch_fut: Fuse>>, +} + +struct RecordsBuffer { + buffer: VecDeque, + + index: Box, +} + +impl RecordsBuffer { + fn pop_front(&mut self) -> Option { + while let Some(index) = self.index.peek() { + if let Some(record_and_offset) = self.buffer.pop_front() { + if index == record_and_offset.offset as u64 { + self.index.next(); + return Some(record_and_offset); + } + } else { + return None; + } + } + + self.buffer.clear(); + None + } + + fn extend(&mut self, records: Vec) { + if let (Some(first), Some(index)) = (records.first(), self.index.peek()) { + // TODO(weny): throw an error? + assert!( + index <= first.offset as u64, + "index: {index}, first offset: {}", + first.offset + ); + } + self.buffer.extend(records); + } +} + +impl Stream for Consumer { + type Item = rskafka::client::error::Result<(RecordAndOffset, i64)>; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + + loop { + if *this.terminated { + return Poll::Ready(None); + } + + if this.buffer.index.peek().is_none() { + return Poll::Ready(None); + } + + if let Some(x) = this.buffer.pop_front() { + debug!("Yielding record with offset: {}", x.offset); + return Poll::Ready(Some(Ok((x, *this.last_high_watermark)))); + } + + if this.fetch_fut.is_terminated() { + match this.buffer.index.peek() { + Some(next_offset) => { + let client = Arc::clone(this.client); + let max_wait_ms = *this.max_wait_ms as i32; + let offset = next_offset as i64; + let NextBatchHint { bytes, len } = this + .buffer + .index + .next_batch_hint(*this.avg_record_size) + .unwrap_or(NextBatchHint { + bytes: *this.avg_record_size, + len: 1, + }); + + let fetch_range = + 1i32..(bytes.saturating_add(1).min(*this.max_batch_size) as i32); + *this.fetch_fut = FutureExt::fuse(Box::pin(async move { + let (records_and_offsets, watermark) = client + .fetch_records(offset, fetch_range, max_wait_ms) + .await?; + + Ok(FetchResult { + records_and_offsets, + watermark, + used_offset: offset, + fetch_bytes: bytes as i32, + batch_size: len, + }) + })); + } + None => { + return Poll::Ready(None); + } + } + } + + let data = futures::ready!(this.fetch_fut.poll_unpin(cx)); + + match data { + Ok(FetchResult { + mut records_and_offsets, + watermark, + used_offset, + fetch_bytes, + batch_size, + }) => { + // Sort records by offset in case they aren't in order + records_and_offsets.sort_unstable_by_key(|x| x.offset); + *this.last_high_watermark = watermark; + if !records_and_offsets.is_empty() { + *this.avg_record_size = fetch_bytes as usize / records_and_offsets.len(); + debug!("set avg_record_size: {}", *this.avg_record_size); + } + + debug!( + "Fetch result: {:?}, used_offset: {used_offset}, max_batch_size: {fetch_bytes}, expected batch_num: {batch_size}, actual batch_num: {}", + records_and_offsets + .iter() + .map(|record| record.offset) + .collect::>(), + records_and_offsets + .len() + ); + this.buffer.extend(records_and_offsets); + continue; + } + Err(e) => { + *this.terminated = true; + + return Poll::Ready(Some(Err(e))); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::collections::VecDeque; + use std::ops::Range; + use std::sync::Arc; + + use chrono::{TimeZone, Utc}; + use futures::future::Fuse; + use futures::TryStreamExt; + use rskafka::record::{Record, RecordAndOffset}; + + use super::FetchClient; + use crate::kafka::consumer::{Consumer, RecordsBuffer}; + use crate::kafka::index::{MultipleRegionWalIndexIterator, RegionWalRange, RegionWalVecIndex}; + + #[derive(Debug)] + struct MockFetchClient { + record: Record, + } + + #[async_trait::async_trait] + impl FetchClient for MockFetchClient { + async fn fetch_records( + &self, + offset: i64, + bytes: Range, + _max_wait_ms: i32, + ) -> rskafka::client::error::Result<(Vec, i64)> { + let record_size = self.record.approximate_size(); + let num = (bytes.end.unsigned_abs() as usize / record_size).max(1); + + let records = (0..num) + .map(|idx| RecordAndOffset { + record: self.record.clone(), + offset: offset + idx as i64, + }) + .collect::>(); + let max_offset = offset + records.len() as i64; + Ok((records, max_offset)) + } + } + + fn test_record() -> Record { + Record { + key: Some(vec![0; 4]), + value: Some(vec![0; 6]), + headers: Default::default(), + timestamp: Utc.timestamp_millis_opt(1337).unwrap(), + } + } + + #[tokio::test] + async fn test_consumer_with_index() { + common_telemetry::init_default_ut_logging(); + let record = test_record(); + let record_size = record.approximate_size(); + let mock_client = MockFetchClient { + record: record.clone(), + }; + let index = RegionWalVecIndex::new([1, 3, 4, 8, 10, 12], record_size * 3); + let consumer = Consumer { + last_high_watermark: -1, + client: Arc::new(mock_client), + max_batch_size: usize::MAX, + max_wait_ms: 500, + avg_record_size: record_size, + terminated: false, + buffer: RecordsBuffer { + buffer: VecDeque::new(), + index: Box::new(index), + }, + fetch_fut: Fuse::terminated(), + }; + + let records = consumer.try_collect::>().await.unwrap(); + assert_eq!( + records + .into_iter() + .map(|(x, _)| x.offset) + .collect::>(), + vec![1, 3, 4, 8, 10, 12] + ) + } + + #[tokio::test] + async fn test_consumer_without_index() { + common_telemetry::init_default_ut_logging(); + let record = test_record(); + let mock_client = MockFetchClient { + record: record.clone(), + }; + let index = RegionWalRange::new(0..30, 1024); + let consumer = Consumer { + last_high_watermark: -1, + client: Arc::new(mock_client), + max_batch_size: usize::MAX, + max_wait_ms: 500, + avg_record_size: record.approximate_size(), + terminated: false, + buffer: RecordsBuffer { + buffer: VecDeque::new(), + index: Box::new(index), + }, + fetch_fut: Fuse::terminated(), + }; + + let records = consumer.try_collect::>().await.unwrap(); + assert_eq!( + records + .into_iter() + .map(|(x, _)| x.offset) + .collect::>(), + (0..30).collect::>() + ) + } + + #[tokio::test] + async fn test_consumer_with_multiple_index() { + common_telemetry::init_default_ut_logging(); + let record = test_record(); + let mock_client = MockFetchClient { + record: record.clone(), + }; + + let iter0 = Box::new(RegionWalRange::new(0..0, 1024)) as _; + let iter1 = Box::new(RegionWalVecIndex::new( + [0, 1, 2, 7, 8, 11], + record.approximate_size() * 4, + )) as _; + let iter2 = Box::new(RegionWalRange::new(12..12, 1024)) as _; + let iter3 = Box::new(RegionWalRange::new(1024..1028, 1024)) as _; + let iter = MultipleRegionWalIndexIterator::new([iter0, iter1, iter2, iter3]); + + let consumer = Consumer { + last_high_watermark: -1, + client: Arc::new(mock_client), + max_batch_size: usize::MAX, + max_wait_ms: 500, + avg_record_size: record.approximate_size(), + terminated: false, + buffer: RecordsBuffer { + buffer: VecDeque::new(), + index: Box::new(iter), + }, + fetch_fut: Fuse::terminated(), + }; + + let records = consumer.try_collect::>().await.unwrap(); + assert_eq!( + records + .into_iter() + .map(|(x, _)| x.offset) + .collect::>(), + [0, 1, 2, 7, 8, 11, 1024, 1025, 1026, 1027] + ) + } +} diff --git a/src/log-store/src/kafka/index.rs b/src/log-store/src/kafka/index.rs new file mode 100644 index 000000000000..b0b4048516c4 --- /dev/null +++ b/src/log-store/src/kafka/index.rs @@ -0,0 +1,20 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod iterator; + +pub(crate) use iterator::{ + MultipleRegionWalIndexIterator, NextBatchHint, RegionWalIndexIterator, RegionWalRange, + RegionWalVecIndex, +}; diff --git a/src/log-store/src/kafka/index/iterator.rs b/src/log-store/src/kafka/index/iterator.rs new file mode 100644 index 000000000000..8a33cf1d9a3d --- /dev/null +++ b/src/log-store/src/kafka/index/iterator.rs @@ -0,0 +1,360 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::{max, min}; +use std::collections::VecDeque; +use std::iter::Peekable; +use std::marker::PhantomData; +use std::ops::{Add, Mul, Range, Sub}; + +use chrono::format::Item; +use itertools::Itertools; +use store_api::logstore::EntryId; + +use crate::kafka::util::range::{ConvertIndexToRange, MergeRange}; + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct NextBatchHint { + pub(crate) bytes: usize, + pub(crate) len: usize, +} + +/// An iterator over WAL (Write-Ahead Log) entries index for a region. +pub trait RegionWalIndexIterator: Send + Sync { + /// Returns next batch hint. + fn next_batch_hint(&self, avg_size: usize) -> Option; + + // Peeks the next EntryId without advancing the iterator. + fn peek(&self) -> Option; + + // Advances the iterator and returns the next EntryId. + fn next(&mut self) -> Option; +} + +/// Represents a range [next_entry_id, end_entry_id) of WAL entries for a region. +pub struct RegionWalRange { + current_entry_id: EntryId, + end_entry_id: EntryId, + max_batch_size: usize, +} + +impl RegionWalRange { + pub fn new(range: Range, max_batch_size: usize) -> Self { + Self { + current_entry_id: range.start, + end_entry_id: range.end, + max_batch_size, + } + } + + fn next_batch_size(&self) -> Option { + if self.current_entry_id < self.end_entry_id { + Some( + self.end_entry_id + .checked_sub(self.current_entry_id) + .unwrap_or_default(), + ) + } else { + None + } + } +} + +impl RegionWalIndexIterator for RegionWalRange { + fn next_batch_hint(&self, avg_size: usize) -> Option { + if let Some(size) = self.next_batch_size() { + let bytes = min(size as usize * avg_size, self.max_batch_size); + let len = bytes / avg_size; + + return Some(NextBatchHint { bytes, len }); + } + + None + } + + fn peek(&self) -> Option { + if self.current_entry_id < self.end_entry_id { + Some(self.current_entry_id) + } else { + None + } + } + + fn next(&mut self) -> Option { + if self.current_entry_id < self.end_entry_id { + let next = self.current_entry_id; + self.current_entry_id += 1; + Some(next) + } else { + None + } + } +} + +/// Represents an index of Write-Ahead Log entries for a region, +/// stored as a vector of [EntryId]s. +pub struct RegionWalVecIndex { + index: VecDeque, + min_batch_window_size: usize, +} + +impl RegionWalVecIndex { + pub fn new>(index: I, min_batch_window_size: usize) -> Self { + Self { + index: index.into_iter().collect::>(), + min_batch_window_size, + } + } +} + +impl RegionWalIndexIterator for RegionWalVecIndex { + fn next_batch_hint(&self, avg_size: usize) -> Option { + let merger = MergeRange::new( + ConvertIndexToRange::new(self.index.iter().peekable(), avg_size), + self.min_batch_window_size, + ); + + merger.merge().map(|(range, size)| NextBatchHint { + bytes: range.end - range.start - 1, + len: size, + }) + } + + fn peek(&self) -> Option { + self.index.front().cloned() + } + + fn next(&mut self) -> Option { + self.index.pop_front() + } +} + +/// Represents an iterator over multiple region WAL indexes. +/// +/// Allowing iteration through multiple WAL indexes. +pub struct MultipleRegionWalIndexIterator { + iterator: VecDeque>, +} + +impl MultipleRegionWalIndexIterator { + pub fn new>>(iterator: I) -> Self { + Self { + iterator: iterator.into_iter().collect::>(), + } + } +} + +impl RegionWalIndexIterator for MultipleRegionWalIndexIterator { + fn next_batch_hint(&self, avg_size: usize) -> Option { + for iter in &self.iterator { + if let Some(batch) = iter.next_batch_hint(avg_size) { + return Some(batch); + } + } + + None + } + + fn peek(&self) -> Option { + for iter in &self.iterator { + let peek = iter.peek(); + if peek.is_some() { + return peek; + } + } + + None + } + + fn next(&mut self) -> Option { + while !self.iterator.is_empty() { + let remove = self.iterator.front().and_then(|iter| iter.peek()).is_none(); + if remove { + self.iterator.pop_front(); + } else { + break; + } + } + + self.iterator.front_mut().and_then(|iter| iter.next()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_region_wal_range() { + let mut range = RegionWalRange::new(0..1024, 1024); + assert_eq!( + range.next_batch_hint(10), + Some(NextBatchHint { + bytes: 1024, + len: 102 + }) + ); + + let mut range = RegionWalRange::new(0..1, 1024); + + assert_eq!(range.next_batch_size(), Some(1)); + assert_eq!(range.peek(), Some(0)); + + // Advance 1 step + assert_eq!(range.next(), Some(0)); + assert_eq!(range.next_batch_size(), None); + + // Advance 1 step + assert_eq!(range.next(), None); + assert_eq!(range.next_batch_size(), None); + // No effect + assert_eq!(range.next(), None); + assert_eq!(range.next_batch_size(), None); + + let mut range = RegionWalRange::new(0..0, 1024); + assert_eq!(range.next_batch_size(), None); + // No effect + assert_eq!(range.next(), None); + assert_eq!(range.next_batch_size(), None); + } + + #[test] + fn test_region_wal_vec_index() { + let mut index = RegionWalVecIndex::new([0, 1, 2, 7, 8, 11], 30); + assert_eq!( + index.next_batch_hint(10), + Some(NextBatchHint { bytes: 30, len: 3 }) + ); + assert_eq!(index.peek(), Some(0)); + // Advance 1 step + assert_eq!(index.next(), Some(0)); + assert_eq!( + index.next_batch_hint(10), + Some(NextBatchHint { bytes: 20, len: 2 }) + ); + // Advance 1 step + assert_eq!(index.next(), Some(1)); + assert_eq!( + index.next_batch_hint(10), + Some(NextBatchHint { bytes: 10, len: 1 }) + ); + // Advance 1 step + assert_eq!(index.next(), Some(2)); + assert_eq!( + index.next_batch_hint(10), + Some(NextBatchHint { bytes: 20, len: 2 }) + ); + // Advance 1 step + assert_eq!(index.next(), Some(7)); + assert_eq!( + index.next_batch_hint(10), + Some(NextBatchHint { bytes: 40, len: 2 }) + ); + // Advance 1 step + assert_eq!(index.next(), Some(8)); + assert_eq!( + index.next_batch_hint(10), + Some(NextBatchHint { bytes: 10, len: 1 }) + ); + // Advance 1 step + assert_eq!(index.next(), Some(11)); + assert_eq!(index.next_batch_hint(10), None); + + // No effect + assert_eq!(index.next(), None); + assert_eq!(index.next_batch_hint(10), None); + + let mut index = RegionWalVecIndex::new([], 1024); + assert_eq!(index.next_batch_hint(10), None); + assert_eq!(index.peek(), None); + // No effect + assert_eq!(index.peek(), None); + assert_eq!(index.next(), None); + assert_eq!(index.next_batch_hint(10), None); + } + + #[test] + fn test_multiple_region_wal_iterator() { + let iter0 = Box::new(RegionWalRange::new(0..0, 1024)) as _; + let iter1 = Box::new(RegionWalVecIndex::new([0, 1, 2, 7, 8, 11], 40)) as _; + let iter2 = Box::new(RegionWalRange::new(1024..1024, 1024)) as _; + let mut iter = MultipleRegionWalIndexIterator::new([iter0, iter1, iter2]); + + // The next batch is 0, 1, 2 + assert_eq!( + iter.next_batch_hint(10), + Some(NextBatchHint { bytes: 30, len: 3 }) + ); + assert_eq!(iter.peek(), Some(0)); + // Advance 1 step + assert_eq!(iter.next(), Some(0)); + + // The next batch is 1, 2 + assert_eq!( + iter.next_batch_hint(10), + Some(NextBatchHint { bytes: 20, len: 2 }) + ); + assert_eq!(iter.peek(), Some(1)); + // Advance 1 step + assert_eq!(iter.next(), Some(1)); + + // The next batch is 2 + assert_eq!( + iter.next_batch_hint(10), + Some(NextBatchHint { bytes: 10, len: 1 }) + ); + assert_eq!(iter.peek(), Some(2)); + + // Advance 1 step + assert_eq!(iter.next(), Some(2)); + // The next batch is 7, 8, 11 + assert_eq!( + iter.next_batch_hint(10), + Some(NextBatchHint { bytes: 50, len: 3 }) + ); + assert_eq!(iter.peek(), Some(7)); + + // Advance 1 step + assert_eq!(iter.next(), Some(7)); + // The next batch is 8, 11 + assert_eq!( + iter.next_batch_hint(10), + Some(NextBatchHint { bytes: 40, len: 2 }) + ); + assert_eq!(iter.peek(), Some(8)); + + // Advance 1 step + assert_eq!(iter.next(), Some(8)); + // The next batch is 11 + assert_eq!( + iter.next_batch_hint(10), + Some(NextBatchHint { bytes: 10, len: 1 }) + ); + assert_eq!(iter.peek(), Some(11)); + // Advance 1 step + assert_eq!(iter.next(), Some(11)); + + assert_eq!(iter.next_batch_hint(10), None,); + assert_eq!(iter.peek(), None); + assert!(!iter.iterator.is_empty()); + assert_eq!(iter.next(), None); + assert!(iter.iterator.is_empty()); + + // No effect + assert_eq!(iter.next(), None); + assert_eq!(iter.next_batch_hint(10), None,); + assert_eq!(iter.peek(), None); + assert_eq!(iter.next(), None); + } +} diff --git a/src/log-store/src/kafka/util.rs b/src/log-store/src/kafka/util.rs index 52d575cbce40..e871feca4296 100644 --- a/src/log-store/src/kafka/util.rs +++ b/src/log-store/src/kafka/util.rs @@ -12,4 +12,5 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod record; +pub(crate) mod range; +pub(crate) mod record; diff --git a/src/log-store/src/kafka/util/range.rs b/src/log-store/src/kafka/util/range.rs new file mode 100644 index 000000000000..282d0aba1851 --- /dev/null +++ b/src/log-store/src/kafka/util/range.rs @@ -0,0 +1,146 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::{max, min}; +use std::iter::Peekable; +use std::ops::Range; + +use store_api::logstore::EntryId; + +/// Convert a sequence of [`EntryId`]s into size ranges. +pub(crate) struct ConvertIndexToRange<'a, I: Iterator> { + base: Option, + iter: Peekable, + avg_size: usize, +} + +impl<'a, I: Iterator> ConvertIndexToRange<'a, I> { + pub fn new(mut iter: Peekable, avg_size: usize) -> Self { + let base = iter.peek().cloned().cloned(); + + Self { + base, + iter, + avg_size, + } + } +} + +impl<'a, I: Iterator> Iterator for ConvertIndexToRange<'a, I> { + type Item = Range; + + fn next(&mut self) -> Option { + let (base, val) = (&self.base?, self.iter.next()?); + let start = (*val - *base) as usize * self.avg_size; + let end = start + self.avg_size + 1; + Some(start..end) + } +} + +/// Merge all ranges smaller than the `window_size`. +/// +/// e.g., +/// +/// Case 1 +/// - input: range: [(0..3), (5..6), (5..8)], window_size: 6 +/// - output: range: (0..6) +/// +/// Case 2 +/// - input: range: [(0..3)], window_size: 6 +/// - output: range: (0..3) +pub(crate) struct MergeRange>> { + iter: I, + window_size: usize, +} + +impl>> MergeRange { + pub fn new(iter: I, window_size: usize) -> Self { + Self { iter, window_size } + } +} + +/// Merges ranges. +fn merge(this: &mut Range, other: &Range) { + this.start = min(this.start, other.start); + this.end = max(this.end, other.end); +} + +impl>> MergeRange { + /// Calculates the size of the next merged range. + pub(crate) fn merge(mut self) -> Option<(Range, usize)> { + let mut merged_range = self.iter.next(); + let this = merged_range.as_mut()?; + let mut merged = 1; + for next in self.iter { + let window = next.start - this.start; + if window > self.window_size { + break; + } else { + merge(this, &next); + merged += 1; + } + } + merged_range.map(|range| (range, merged)) + } +} + +#[cfg(test)] +#[allow(clippy::single_range_in_vec_init)] +mod tests { + use super::*; + + #[test] + fn test_convert_index_to_range() { + let avg_size = 1024; + let index = [1u64, 4, 10, 15]; + let mut converter = ConvertIndexToRange::new(index.iter().peekable(), avg_size); + + assert_eq!(converter.next(), Some(0..avg_size + 1)); + assert_eq!(converter.next(), Some(3 * avg_size..4 * avg_size + 1)); + assert_eq!(converter.next(), Some(9 * avg_size..10 * avg_size + 1)); + assert_eq!(converter.next(), Some(14 * avg_size..15 * avg_size + 1)); + assert_eq!(converter.next(), None); + + let index = []; + let mut converter = ConvertIndexToRange::new(index.iter().peekable(), avg_size); + assert_eq!(converter.next(), None); + } + + #[test] + fn test_merge_range() { + let size_range = [(10usize..13), (12..14), (16..18), (19..29)]; + let merger = MergeRange::new(size_range.into_iter(), 9); + assert_eq!(merger.merge(), Some((10..29, 4))); + + let size_range = [(10usize..13), (12..14), (16..18)]; + let merger = MergeRange::new(size_range.into_iter(), 5); + assert_eq!(merger.merge(), Some((10..14, 2))); + + let size_range = [(10usize..13), (15..17), (16..18)]; + let merger = MergeRange::new(size_range.into_iter(), 5); + assert_eq!(merger.merge(), Some((10..17, 2))); + + let size_range = [(10usize..13)]; + let merger = MergeRange::new(size_range.into_iter(), 4); + assert_eq!(merger.merge(), Some((10..13, 1))); + + let size_range = [(10usize..13)]; + let merger = MergeRange::new(size_range.into_iter(), 2); + assert_eq!(merger.merge(), Some((10..13, 1))); + + let size_range = []; + let merger = MergeRange::new(size_range.into_iter(), 2); + assert_eq!(merger.merge(), None); + } +} diff --git a/src/meta-srv/src/selector.rs b/src/meta-srv/src/selector.rs index d69f0ca5ead2..0795bccd9b82 100644 --- a/src/meta-srv/src/selector.rs +++ b/src/meta-srv/src/selector.rs @@ -57,12 +57,21 @@ impl Default for SelectorOptions { } } +/// [`SelectorType`] refers to the load balancer used when creating tables. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(try_from = "String")] pub enum SelectorType { - #[default] + /// The current load balancing is based on the number of regions on each datanode node; + /// the more regions, the higher the load (it may be changed to Capacity Units(CU) + /// calculation in the future). LoadBased, + /// This one randomly selects from all available (in lease) nodes. Its characteristic + /// is simplicity and fast. LeaseBased, + /// This one selects the node in a round-robin way. + /// In most cases, it's recommended and is the default option. If you're unsure which + /// to choose, using it is usually correct. + #[default] RoundRobin, } @@ -97,7 +106,7 @@ mod tests { #[test] fn test_default_selector_type() { - assert_eq!(SelectorType::LoadBased, SelectorType::default()); + assert_eq!(SelectorType::RoundRobin, SelectorType::default()); } #[test] diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index 71c35b6119cc..c5f7a2b4a32c 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -307,8 +307,7 @@ mod test { env.init_metric_region().await; let region_id = to_metadata_region_id(env.default_physical_region_id()); - let mut builder = Fs::default(); - builder.root(&env.data_home()); + let builder = Fs::default().root(&env.data_home()); let object_store = ObjectStore::new(builder).unwrap().finish(); let region_dir = "test_metric_region"; diff --git a/src/mito2/src/access_layer.rs b/src/mito2/src/access_layer.rs index a72527fb3351..c8f3de8785d5 100644 --- a/src/mito2/src/access_layer.rs +++ b/src/mito2/src/access_layer.rs @@ -212,8 +212,7 @@ pub(crate) async fn new_fs_cache_store(root: &str) -> Result { let atomic_write_dir = join_dir(root, ".tmp/"); clean_dir(&atomic_write_dir).await?; - let mut builder = Fs::default(); - builder.root(root).atomic_write_dir(&atomic_write_dir); + let builder = Fs::default().root(root).atomic_write_dir(&atomic_write_dir); let store = ObjectStore::new(builder).context(OpenDalSnafu)?.finish(); Ok(with_instrument_layers(store, false)) diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs index 008a71759318..6e902490c0dc 100644 --- a/src/mito2/src/cache/file_cache.rs +++ b/src/mito2/src/cache/file_cache.rs @@ -199,7 +199,9 @@ impl FileCache { .metakey(Metakey::ContentLength) .await .context(OpenDalSnafu)?; - let (mut total_size, mut total_keys) = (0, 0); + // Use i64 for total_size to reduce the risk of overflow. + // It is possible that the total size of the cache is larger than i32::MAX. + let (mut total_size, mut total_keys) = (0i64, 0); while let Some(entry) = lister.try_next().await.context(OpenDalSnafu)? { let meta = entry.metadata(); if !meta.is_file() { @@ -212,13 +214,11 @@ impl FileCache { self.memory_index .insert(key, IndexValue { file_size }) .await; - total_size += file_size; + total_size += i64::from(file_size); total_keys += 1; } // The metrics is a signed int gauge so we can updates it finally. - CACHE_BYTES - .with_label_values(&[FILE_TYPE]) - .add(total_size.into()); + CACHE_BYTES.with_label_values(&[FILE_TYPE]).add(total_size); info!( "Recovered file cache, num_keys: {}, num_bytes: {}, cost: {:?}", @@ -382,8 +382,7 @@ mod tests { use super::*; fn new_fs_store(path: &str) -> ObjectStore { - let mut builder = Fs::default(); - builder.root(path); + let builder = Fs::default().root(path); ObjectStore::new(builder).unwrap().finish() } diff --git a/src/mito2/src/cache/test_util.rs b/src/mito2/src/cache/test_util.rs index 306bb50467e8..9c3b08177fa4 100644 --- a/src/mito2/src/cache/test_util.rs +++ b/src/mito2/src/cache/test_util.rs @@ -46,7 +46,6 @@ fn parquet_file_data() -> Vec { } pub(crate) fn new_fs_store(path: &str) -> ObjectStore { - let mut builder = Fs::default(); - builder.root(path); - ObjectStore::new(builder).unwrap().finish() + let builder = Fs::default(); + ObjectStore::new(builder.root(path)).unwrap().finish() } diff --git a/src/mito2/src/compaction/picker.rs b/src/mito2/src/compaction/picker.rs index 3ae1012fd86d..30c8d2844638 100644 --- a/src/mito2/src/compaction/picker.rs +++ b/src/mito2/src/compaction/picker.rs @@ -133,7 +133,9 @@ pub fn new_picker( match compaction_options { CompactionOptions::Twcs(twcs_opts) => Arc::new(TwcsPicker::new( twcs_opts.max_active_window_runs, + twcs_opts.max_active_window_files, twcs_opts.max_inactive_window_runs, + twcs_opts.max_inactive_window_files, twcs_opts.time_window_seconds(), )) as Arc<_>, } diff --git a/src/mito2/src/compaction/test_util.rs b/src/mito2/src/compaction/test_util.rs index 902228623787..1df462004f8d 100644 --- a/src/mito2/src/compaction/test_util.rs +++ b/src/mito2/src/compaction/test_util.rs @@ -43,3 +43,29 @@ pub fn new_file_handle( file_purger, ) } + +pub(crate) fn new_file_handles(file_specs: &[(i64, i64, u64)]) -> Vec { + let file_purger = new_noop_file_purger(); + file_specs + .iter() + .map(|(start, end, size)| { + FileHandle::new( + FileMeta { + region_id: 0.into(), + file_id: FileId::random(), + time_range: ( + Timestamp::new_millisecond(*start), + Timestamp::new_millisecond(*end), + ), + level: 0, + file_size: *size, + available_indexes: Default::default(), + index_file_size: 0, + num_rows: 0, + num_row_groups: 0, + }, + file_purger.clone(), + ) + }) + .collect() +} diff --git a/src/mito2/src/compaction/twcs.rs b/src/mito2/src/compaction/twcs.rs index 81ad5268501f..4bbad692f05f 100644 --- a/src/mito2/src/compaction/twcs.rs +++ b/src/mito2/src/compaction/twcs.rs @@ -14,7 +14,7 @@ use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap}; -use std::fmt::{Debug, Formatter}; +use std::fmt::Debug; use common_telemetry::{debug, info}; use common_time::timestamp::TimeUnit; @@ -24,7 +24,7 @@ use common_time::Timestamp; use crate::compaction::buckets::infer_time_bucket; use crate::compaction::compactor::CompactionRegion; use crate::compaction::picker::{Picker, PickerOutput}; -use crate::compaction::run::{find_sorted_runs, reduce_runs}; +use crate::compaction::run::{find_sorted_runs, reduce_runs, Item}; use crate::compaction::{get_expired_ssts, CompactionOutput}; use crate::sst::file::{overlaps, FileHandle, FileId, Level}; use crate::sst::version::LevelMeta; @@ -33,31 +33,29 @@ const LEVEL_COMPACTED: Level = 1; /// `TwcsPicker` picks files of which the max timestamp are in the same time window as compaction /// candidates. +#[derive(Debug)] pub struct TwcsPicker { max_active_window_runs: usize, + max_active_window_files: usize, max_inactive_window_runs: usize, + max_inactive_window_files: usize, time_window_seconds: Option, } -impl Debug for TwcsPicker { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TwcsPicker") - .field("max_active_window_runs", &self.max_active_window_runs) - .field("max_inactive_window_runs", &self.max_inactive_window_runs) - .finish() - } -} - impl TwcsPicker { pub fn new( max_active_window_runs: usize, + max_active_window_files: usize, max_inactive_window_runs: usize, + max_inactive_window_files: usize, time_window_seconds: Option, ) -> Self { Self { max_inactive_window_runs, max_active_window_runs, time_window_seconds, + max_active_window_files, + max_inactive_window_files, } } @@ -73,12 +71,15 @@ impl TwcsPicker { for (window, files) in time_windows { let sorted_runs = find_sorted_runs(&mut files.files); - let max_runs = if let Some(active_window) = active_window + let (max_runs, max_files) = if let Some(active_window) = active_window && *window == active_window { - self.max_active_window_runs + (self.max_active_window_runs, self.max_active_window_files) } else { - self.max_inactive_window_runs + ( + self.max_inactive_window_runs, + self.max_inactive_window_files, + ) }; // we only remove deletion markers once no file in current window overlaps with any other window. @@ -87,16 +88,33 @@ impl TwcsPicker { if found_runs > max_runs { let files_to_compact = reduce_runs(sorted_runs, max_runs); - info!("Building compaction output, active window: {:?}, current window: {}, max runs: {}, found runs: {}, output size: {}", active_window, *window,max_runs, found_runs, files_to_compact.len()); + info!("Building compaction output, active window: {:?}, current window: {}, max runs: {}, found runs: {}, output size: {}, remove deletion markers: {}", active_window, *window,max_runs, found_runs, files_to_compact.len(), filter_deleted); for inputs in files_to_compact { output.push(CompactionOutput { output_file_id: FileId::random(), output_level: LEVEL_COMPACTED, // always compact to l1 inputs, filter_deleted, - output_time_range: None, // we do not enforce output time range in twcs compactions.}); + output_time_range: None, // we do not enforce output time range in twcs compactions. }); } + } else if files.files.len() > max_files { + debug!( + "Enforcing max file num in window: {}, active: {:?}, max: {}, current: {}", + *window, + active_window, + max_files, + files.files.len() + ); + // Files in window exceeds file num limit + let to_merge = enforce_file_num(&files.files, max_files); + output.push(CompactionOutput { + output_file_id: FileId::random(), + output_level: LEVEL_COMPACTED, // always compact to l1 + inputs: to_merge, + filter_deleted, + output_time_range: None, + }); } else { debug!("Skip building compaction output, active window: {:?}, current window: {}, max runs: {}, found runs: {}, ", active_window, *window, max_runs, found_runs); } @@ -105,6 +123,31 @@ impl TwcsPicker { } } +/// Merges consecutive files so that file num does not exceed `max_file_num`, and chooses +/// the solution with minimum overhead according to files sizes to be merged. +/// `enforce_file_num` only merges consecutive files so that it won't create overlapping outputs. +/// `runs` must be sorted according to time ranges. +fn enforce_file_num(files: &[T], max_file_num: usize) -> Vec { + debug_assert!(files.len() > max_file_num); + let to_merge = files.len() - max_file_num + 1; + let mut min_penalty = usize::MAX; + let mut min_idx = 0; + + for idx in 0..=(files.len() - to_merge) { + let current_penalty: usize = files + .iter() + .skip(idx) + .take(to_merge) + .map(|f| f.size()) + .sum(); + if current_penalty < min_penalty { + min_penalty = current_penalty; + min_idx = idx; + } + } + files.iter().skip(min_idx).take(to_merge).cloned().collect() +} + impl Picker for TwcsPicker { fn pick(&self, compaction_region: &CompactionRegion) -> Option { let region_id = compaction_region.region_id; @@ -264,7 +307,7 @@ mod tests { use std::collections::HashSet; use super::*; - use crate::compaction::test_util::new_file_handle; + use crate::compaction::test_util::{new_file_handle, new_file_handles}; use crate::sst::file::Level; #[test] @@ -482,7 +525,8 @@ mod tests { let mut windows = assign_to_windows(self.input_files.iter(), self.window_size); let active_window = find_latest_window_in_seconds(self.input_files.iter(), self.window_size); - let output = TwcsPicker::new(4, 1, None).build_output(&mut windows, active_window); + let output = TwcsPicker::new(4, usize::MAX, 1, usize::MAX, None) + .build_output(&mut windows, active_window); let output = output .iter() @@ -514,6 +558,43 @@ mod tests { output_level: Level, } + fn check_enforce_file_num( + input_files: &[(i64, i64, u64)], + max_file_num: usize, + files_to_merge: &[(i64, i64)], + ) { + let mut files = new_file_handles(input_files); + // ensure sorted + find_sorted_runs(&mut files); + let mut to_merge = enforce_file_num(&files, max_file_num); + to_merge.sort_unstable_by_key(|f| f.time_range().0); + assert_eq!( + files_to_merge.to_vec(), + to_merge + .iter() + .map(|f| { + let (start, end) = f.time_range(); + (start.value(), end.value()) + }) + .collect::>() + ); + } + + #[test] + fn test_enforce_file_num() { + check_enforce_file_num( + &[(0, 300, 2), (100, 200, 1), (200, 400, 1)], + 2, + &[(100, 200), (200, 400)], + ); + + check_enforce_file_num( + &[(0, 300, 200), (100, 200, 100), (200, 400, 100)], + 1, + &[(0, 300), (100, 200), (200, 400)], + ); + } + #[test] fn test_build_twcs_output() { let file_ids = (0..4).map(|_| FileId::random()).collect::>(); diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index 7fd87d774f6e..5f0917688f35 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -43,6 +43,8 @@ const SST_META_CACHE_SIZE_FACTOR: u64 = 32; const INDEX_CONTENT_CACHE_SIZE_FACTOR: u64 = 32; /// Use `1/MEM_CACHE_SIZE_FACTOR` of OS memory size as mem cache size in default mode const MEM_CACHE_SIZE_FACTOR: u64 = 16; +/// Use `1/PAGE_CACHE_SIZE_FACTOR` of OS memory size as page cache size in default mode +const PAGE_CACHE_SIZE_FACTOR: u64 = 8; /// Use `1/INDEX_CREATE_MEM_THRESHOLD_FACTOR` of OS memory size as mem threshold for creating index const INDEX_CREATE_MEM_THRESHOLD_FACTOR: u64 = 16; @@ -236,12 +238,13 @@ impl MitoConfig { ); // shouldn't be greater than 512MB in default mode. let mem_cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(512)); + let page_cache_size = sys_memory / PAGE_CACHE_SIZE_FACTOR; self.global_write_buffer_size = global_write_buffer_size; self.global_write_buffer_reject_size = global_write_buffer_reject_size; self.sst_meta_cache_size = sst_meta_cache_size; self.vector_cache_size = mem_cache_size; - self.page_cache_size = mem_cache_size; + self.page_cache_size = page_cache_size; self.selector_result_cache_size = mem_cache_size; } diff --git a/src/mito2/src/engine/compaction_test.rs b/src/mito2/src/engine/compaction_test.rs index 9de4a0ddf572..e19f95088c46 100644 --- a/src/mito2/src/engine/compaction_test.rs +++ b/src/mito2/src/engine/compaction_test.rs @@ -220,7 +220,9 @@ async fn test_compaction_region_with_overlapping_delete_all() { let request = CreateRequestBuilder::new() .insert_option("compaction.type", "twcs") .insert_option("compaction.twcs.max_active_window_runs", "2") + .insert_option("compaction.twcs.max_active_window_files", "2") .insert_option("compaction.twcs.max_inactive_window_runs", "2") + .insert_option("compaction.twcs.max_inactive_window_files", "2") .insert_option("compaction.twcs.time_window", "1h") .build(); diff --git a/src/mito2/src/manifest/storage.rs b/src/mito2/src/manifest/storage.rs index d470f2050b89..060a84f49de7 100644 --- a/src/mito2/src/manifest/storage.rs +++ b/src/mito2/src/manifest/storage.rs @@ -642,8 +642,7 @@ mod tests { fn new_test_manifest_store() -> ManifestObjectStore { common_telemetry::init_default_ut_logging(); let tmp_dir = create_temp_dir("test_manifest_log_store"); - let mut builder = Fs::default(); - let _ = builder.root(&tmp_dir.path().to_string_lossy()); + let builder = Fs::default().root(&tmp_dir.path().to_string_lossy()); let object_store = ObjectStore::new(builder).unwrap().finish(); ManifestObjectStore::new( "/", diff --git a/src/mito2/src/region/options.rs b/src/mito2/src/region/options.rs index 7a28cee977d6..71882fbfc130 100644 --- a/src/mito2/src/region/options.rs +++ b/src/mito2/src/region/options.rs @@ -186,9 +186,15 @@ pub struct TwcsOptions { /// Max num of sorted runs that can be kept in active writing time window. #[serde_as(as = "DisplayFromStr")] pub max_active_window_runs: usize, - /// Max num of files that can be kept in inactive time window. + /// Max num of files in the active window. + #[serde_as(as = "DisplayFromStr")] + pub max_active_window_files: usize, + /// Max num of sorted runs that can be kept in inactive time windows. #[serde_as(as = "DisplayFromStr")] pub max_inactive_window_runs: usize, + /// Max num of files in inactive time windows. + #[serde_as(as = "DisplayFromStr")] + pub max_inactive_window_files: usize, /// Compaction time window defined when creating tables. #[serde(with = "humantime_serde")] pub time_window: Option, @@ -217,7 +223,9 @@ impl Default for TwcsOptions { fn default() -> Self { Self { max_active_window_runs: 4, + max_active_window_files: 4, max_inactive_window_runs: 1, + max_inactive_window_files: 1, time_window: None, remote_compaction: false, } @@ -576,7 +584,9 @@ mod tests { let map = make_map(&[ ("ttl", "7d"), ("compaction.twcs.max_active_window_runs", "8"), + ("compaction.twcs.max_active_window_files", "11"), ("compaction.twcs.max_inactive_window_runs", "2"), + ("compaction.twcs.max_inactive_window_files", "3"), ("compaction.twcs.time_window", "2h"), ("compaction.type", "twcs"), ("compaction.twcs.remote_compaction", "false"), @@ -599,7 +609,9 @@ mod tests { ttl: Some(Duration::from_secs(3600 * 24 * 7)), compaction: CompactionOptions::Twcs(TwcsOptions { max_active_window_runs: 8, + max_active_window_files: 11, max_inactive_window_runs: 2, + max_inactive_window_files: 3, time_window: Some(Duration::from_secs(3600 * 2)), remote_compaction: false, }), @@ -628,7 +640,9 @@ mod tests { ttl: Some(Duration::from_secs(3600 * 24 * 7)), compaction: CompactionOptions::Twcs(TwcsOptions { max_active_window_runs: 8, + max_active_window_files: usize::MAX, max_inactive_window_runs: 2, + max_inactive_window_files: usize::MAX, time_window: Some(Duration::from_secs(3600 * 2)), remote_compaction: false, }), @@ -663,7 +677,9 @@ mod tests { "compaction": { "compaction.type": "twcs", "compaction.twcs.max_active_window_runs": "8", + "compaction.twcs.max_active_window_files": "11", "compaction.twcs.max_inactive_window_runs": "2", + "compaction.twcs.max_inactive_window_files": "7", "compaction.twcs.time_window": "2h" }, "storage": "S3", @@ -689,7 +705,9 @@ mod tests { ttl: Some(Duration::from_secs(3600 * 24 * 7)), compaction: CompactionOptions::Twcs(TwcsOptions { max_active_window_runs: 8, + max_active_window_files: 11, max_inactive_window_runs: 2, + max_inactive_window_files: 7, time_window: Some(Duration::from_secs(3600 * 2)), remote_compaction: false, }), diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs index 0753b1a3eb76..9e6c6c89e8eb 100644 --- a/src/mito2/src/sst/file_purger.rs +++ b/src/mito2/src/sst/file_purger.rs @@ -114,8 +114,7 @@ mod tests { let dir = create_temp_dir("file-purge"); let dir_path = dir.path().display().to_string(); - let mut builder = Fs::default(); - builder.root(&dir_path); + let builder = Fs::default().root(&dir_path); let sst_file_id = FileId::random(); let sst_dir = "table1"; let path = location::sst_file_path(sst_dir, sst_file_id); @@ -171,8 +170,7 @@ mod tests { let dir = create_temp_dir("file-purge"); let dir_path = dir.path().display().to_string(); - let mut builder = Fs::default(); - builder.root(&dir_path); + let builder = Fs::default().root(&dir_path); let sst_file_id = FileId::random(); let sst_dir = "table1"; diff --git a/src/mito2/src/sst/parquet/helper.rs b/src/mito2/src/sst/parquet/helper.rs index b3cc8f8279d3..e80f751af982 100644 --- a/src/mito2/src/sst/parquet/helper.rs +++ b/src/mito2/src/sst/parquet/helper.rs @@ -16,7 +16,7 @@ use std::ops::Range; use std::sync::Arc; use bytes::Bytes; -use object_store::{ErrorKind, ObjectStore}; +use object_store::ObjectStore; use parquet::basic::ColumnOrder; use parquet::file::metadata::{FileMetaData, ParquetMetaData, RowGroupMetaData}; use parquet::format; @@ -88,84 +88,26 @@ fn parse_column_orders( } } -/// Fetches data from object store. -/// If the object store supports blocking, use sequence blocking read. -/// Otherwise, use concurrent read. -pub async fn fetch_byte_ranges( - file_path: &str, - object_store: ObjectStore, - ranges: &[Range], -) -> object_store::Result> { - if object_store.info().full_capability().blocking { - fetch_ranges_seq(file_path, object_store, ranges).await - } else { - fetch_ranges_concurrent(file_path, object_store, ranges).await - } -} - -/// Fetches data from object store sequentially -async fn fetch_ranges_seq( - file_path: &str, - object_store: ObjectStore, - ranges: &[Range], -) -> object_store::Result> { - let block_object_store = object_store.blocking(); - let file_path = file_path.to_string(); - let ranges = ranges.to_vec(); - - let f = move || -> object_store::Result> { - ranges - .into_iter() - .map(|range| { - let data = block_object_store - .read_with(&file_path) - .range(range.start..range.end) - .call()?; - Ok::<_, object_store::Error>(data.to_bytes()) - }) - .collect::>>() - }; - - maybe_spawn_blocking(f).await -} +const FETCH_PARALLELISM: usize = 8; +const MERGE_GAP: usize = 512 * 1024; -/// Fetches data from object store concurrently. -async fn fetch_ranges_concurrent( +/// Asynchronously fetches byte ranges from an object store. +/// +/// * `FETCH_PARALLELISM` - The number of concurrent fetch operations. +/// * `MERGE_GAP` - The maximum gap size (in bytes) to merge small byte ranges for optimized fetching. +pub async fn fetch_byte_ranges( file_path: &str, object_store: ObjectStore, ranges: &[Range], ) -> object_store::Result> { - // TODO(QuenKar): may merge small ranges to a bigger range to optimize. - let mut handles = Vec::with_capacity(ranges.len()); - for range in ranges { - let future_read = object_store.read_with(file_path); - handles.push(async move { - let data = future_read.range(range.start..range.end).await?; - Ok::<_, object_store::Error>(data.to_bytes()) - }); - } - let results = futures::future::try_join_all(handles).await?; - Ok(results) -} - -// Port from https://github.com/apache/arrow-rs/blob/802ed428f87051fdca31180430ddb0ecb2f60e8b/object_store/src/util.rs#L74-L83 -/// Takes a function and spawns it to a tokio blocking pool if available -async fn maybe_spawn_blocking(f: F) -> object_store::Result -where - F: FnOnce() -> object_store::Result + Send + 'static, - T: Send + 'static, -{ - match tokio::runtime::Handle::try_current() { - Ok(runtime) => runtime - .spawn_blocking(f) - .await - .map_err(new_task_join_error)?, - Err(_) => f(), - } -} - -// https://github.com/apache/opendal/blob/v0.46.0/core/src/raw/tokio_util.rs#L21-L24 -/// Parse tokio error into opendal::Error. -fn new_task_join_error(e: tokio::task::JoinError) -> object_store::Error { - object_store::Error::new(ErrorKind::Unexpected, "tokio task join failed").set_source(e) + Ok(object_store + .reader_with(file_path) + .concurrent(FETCH_PARALLELISM) + .gap(MERGE_GAP) + .await? + .fetch(ranges.to_vec()) + .await? + .into_iter() + .map(|buf| buf.to_bytes()) + .collect::>()) } diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs index 08c43dd31c84..dcc461ab0991 100644 --- a/src/mito2/src/test_util.rs +++ b/src/mito2/src/test_util.rs @@ -365,8 +365,7 @@ impl TestEnv { .display() .to_string(); let mut builder = Fs::default(); - builder.root(&data_path); - let object_store = ObjectStore::new(builder).unwrap().finish(); + let object_store = ObjectStore::new(builder.root(&data_path)).unwrap().finish(); object_store_manager.add(storage_name, object_store); } let object_store_manager = Arc::new(object_store_manager); @@ -553,8 +552,7 @@ impl TestEnv { fn create_object_store_manager(&self) -> ObjectStoreManager { let data_home = self.data_home.path(); let data_path = data_home.join("data").as_path().display().to_string(); - let mut builder = Fs::default(); - builder.root(&data_path); + let builder = Fs::default().root(&data_path); let object_store = ObjectStore::new(builder).unwrap().finish(); ObjectStoreManager::new("default", object_store) } @@ -570,9 +568,10 @@ impl TestEnv { let data_home = self.data_home.path(); let manifest_dir = data_home.join("manifest").as_path().display().to_string(); - let mut builder = Fs::default(); - builder.root(&manifest_dir); - let object_store = ObjectStore::new(builder).unwrap().finish(); + let builder = Fs::default(); + let object_store = ObjectStore::new(builder.root(&manifest_dir)) + .unwrap() + .finish(); // The "manifest_dir" here should be the relative path from the `object_store`'s root. // Otherwise the OpenDal's list operation would fail with "StripPrefixError". This is diff --git a/src/mito2/src/test_util/scheduler_util.rs b/src/mito2/src/test_util/scheduler_util.rs index a6ffe0b2bf97..c1b85279deda 100644 --- a/src/mito2/src/test_util/scheduler_util.rs +++ b/src/mito2/src/test_util/scheduler_util.rs @@ -52,8 +52,7 @@ impl SchedulerEnv { pub(crate) async fn new() -> SchedulerEnv { let path = create_temp_dir(""); let path_str = path.path().display().to_string(); - let mut builder = Fs::default(); - builder.root(&path_str); + let builder = Fs::default().root(&path_str); let index_aux_path = path.path().join("index_aux"); let puffin_mgr = PuffinManagerFactory::new(&index_aux_path, 4096, None) diff --git a/src/object-store/Cargo.toml b/src/object-store/Cargo.toml index 00bb5a93acfd..ca2a3a7ab32f 100644 --- a/src/object-store/Cargo.toml +++ b/src/object-store/Cargo.toml @@ -17,7 +17,7 @@ futures.workspace = true lazy_static.workspace = true md5 = "0.7" moka = { workspace = true, features = ["future"] } -opendal = { version = "0.47", features = [ +opendal = { version = "0.48", features = [ "layers-tracing", "services-azblob", "services-fs", diff --git a/src/object-store/src/layers/lru_cache.rs b/src/object-store/src/layers/lru_cache.rs index ded6afe58bb6..3fea6945e745 100644 --- a/src/object-store/src/layers/lru_cache.rs +++ b/src/object-store/src/layers/lru_cache.rs @@ -25,12 +25,19 @@ use common_telemetry::info; use read_cache::ReadCache; /// An opendal layer with local LRU file cache supporting. -#[derive(Clone)] pub struct LruCacheLayer { // The read cache read_cache: ReadCache, } +impl Clone for LruCacheLayer { + fn clone(&self) -> Self { + Self { + read_cache: self.read_cache.clone(), + } + } +} + impl LruCacheLayer { /// Create a `[LruCacheLayer]` with local file cache and capacity in bytes. pub async fn new(file_cache: Arc, capacity: usize) -> Result { diff --git a/src/object-store/src/layers/prometheus.rs b/src/object-store/src/layers/prometheus.rs index 5a2d0b603261..29897db98711 100644 --- a/src/object-store/src/layers/prometheus.rs +++ b/src/object-store/src/layers/prometheus.rs @@ -552,11 +552,12 @@ impl oio::BlockingRead for PrometheusMetricWrapper { } impl oio::Write for PrometheusMetricWrapper { - async fn write(&mut self, bs: Buffer) -> Result { + async fn write(&mut self, bs: Buffer) -> Result<()> { + let bytes = bs.len(); match self.inner.write(bs).await { - Ok(n) => { - self.bytes += n as u64; - Ok(n) + Ok(_) => { + self.bytes += bytes as u64; + Ok(()) } Err(err) => { increment_errors_total(self.op, err.kind()); @@ -581,12 +582,12 @@ impl oio::Write for PrometheusMetricWrapper { } impl oio::BlockingWrite for PrometheusMetricWrapper { - fn write(&mut self, bs: Buffer) -> Result { + fn write(&mut self, bs: Buffer) -> Result<()> { + let bytes = bs.len(); self.inner .write(bs) - .map(|n| { - self.bytes += n as u64; - n + .map(|_| { + self.bytes += bytes as u64; }) .map_err(|err| { increment_errors_total(self.op, err.kind()); diff --git a/src/object-store/src/manager.rs b/src/object-store/src/manager.rs index fb6d73321967..6513923b52cc 100644 --- a/src/object-store/src/manager.rs +++ b/src/object-store/src/manager.rs @@ -61,8 +61,7 @@ mod tests { fn new_object_store(dir: &TempDir) -> ObjectStore { let store_dir = dir.path().to_str().unwrap(); - let mut builder = Builder::default(); - let _ = builder.root(store_dir); + let builder = Builder::default().root(store_dir); ObjectStore::new(builder).unwrap().finish() } diff --git a/src/object-store/tests/object_store_test.rs b/src/object-store/tests/object_store_test.rs index b5cedf6e651a..868cce33eefa 100644 --- a/src/object-store/tests/object_store_test.rs +++ b/src/object-store/tests/object_store_test.rs @@ -95,8 +95,7 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> { async fn test_fs_backend() -> Result<()> { let data_dir = create_temp_dir("test_fs_backend"); let tmp_dir = create_temp_dir("test_fs_backend"); - let mut builder = Fs::default(); - let _ = builder + let builder = Fs::default() .root(&data_dir.path().to_string_lossy()) .atomic_write_dir(&tmp_dir.path().to_string_lossy()); @@ -117,8 +116,7 @@ async fn test_s3_backend() -> Result<()> { let root = uuid::Uuid::new_v4().to_string(); - let mut builder = S3::default(); - let _ = builder + let builder = S3::default() .root(&root) .access_key_id(&env::var("GT_S3_ACCESS_KEY_ID")?) .secret_access_key(&env::var("GT_S3_ACCESS_KEY")?) @@ -146,8 +144,7 @@ async fn test_oss_backend() -> Result<()> { let root = uuid::Uuid::new_v4().to_string(); - let mut builder = Oss::default(); - let _ = builder + let builder = Oss::default() .root(&root) .access_key_id(&env::var("GT_OSS_ACCESS_KEY_ID")?) .access_key_secret(&env::var("GT_OSS_ACCESS_KEY")?) @@ -174,8 +171,7 @@ async fn test_azblob_backend() -> Result<()> { let root = uuid::Uuid::new_v4().to_string(); - let mut builder = Azblob::default(); - let _ = builder + let builder = Azblob::default() .root(&root) .account_name(&env::var("GT_AZBLOB_ACCOUNT_NAME")?) .account_key(&env::var("GT_AZBLOB_ACCOUNT_KEY")?) @@ -199,8 +195,7 @@ async fn test_gcs_backend() -> Result<()> { if !container.is_empty() { info!("Running azblob test."); - let mut builder = Gcs::default(); - builder + let builder = Gcs::default() .root(&uuid::Uuid::new_v4().to_string()) .bucket(&env::var("GT_GCS_BUCKET").unwrap()) .scope(&env::var("GT_GCS_SCOPE").unwrap()) @@ -224,8 +219,7 @@ async fn test_file_backend_with_lru_cache() -> Result<()> { let data_dir = create_temp_dir("test_file_backend_with_lru_cache"); let tmp_dir = create_temp_dir("test_file_backend_with_lru_cache"); - let mut builder = Fs::default(); - let _ = builder + let builder = Fs::default() .root(&data_dir.path().to_string_lossy()) .atomic_write_dir(&tmp_dir.path().to_string_lossy()); @@ -233,8 +227,7 @@ async fn test_file_backend_with_lru_cache() -> Result<()> { let cache_dir = create_temp_dir("test_file_backend_with_lru_cache"); let cache_layer = { - let mut builder = Fs::default(); - let _ = builder + let builder = Fs::default() .root(&cache_dir.path().to_string_lossy()) .atomic_write_dir(&cache_dir.path().to_string_lossy()); let file_cache = Arc::new(builder.build().unwrap()); @@ -307,8 +300,7 @@ async fn test_object_store_cache_policy() -> Result<()> { // create file cache layer let cache_dir = create_temp_dir("test_object_store_cache_policy_cache"); let atomic_temp_dir = create_temp_dir("test_object_store_cache_policy_cache_tmp"); - let mut builder = Fs::default(); - let _ = builder + let builder = Fs::default() .root(&cache_dir.path().to_string_lossy()) .atomic_write_dir(&atomic_temp_dir.path().to_string_lossy()); let file_cache = Arc::new(builder.build().unwrap()); diff --git a/src/operator/src/statement/copy_database.rs b/src/operator/src/statement/copy_database.rs index 64808fa86a04..a4b1b9267b2c 100644 --- a/src/operator/src/statement/copy_database.rs +++ b/src/operator/src/statement/copy_database.rs @@ -244,8 +244,7 @@ mod tests { async fn test_list_files_and_parse_table_name() { let dir = common_test_util::temp_dir::create_temp_dir("test_list_files_to_copy"); let store_dir = normalize_dir(dir.path().to_str().unwrap()); - let mut builder = Fs::default(); - let _ = builder.root(&store_dir); + let builder = Fs::default().root(&store_dir); let object_store = ObjectStore::new(builder).unwrap().finish(); object_store.write("a.parquet", "").await.unwrap(); object_store.write("b.parquet", "").await.unwrap(); diff --git a/src/pipeline/tests/date.rs b/src/pipeline/tests/date.rs new file mode 100644 index 000000000000..775f0688c146 --- /dev/null +++ b/src/pipeline/tests/date.rs @@ -0,0 +1,138 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod common; + +use api::v1::ColumnSchema; +use greptime_proto::v1::value::ValueData; +use greptime_proto::v1::{ColumnDataType, SemanticType}; +use lazy_static::lazy_static; + +const TEST_INPUT: &str = r#" +{ + "input_str": "2024-06-27T06:13:36.991Z" +}"#; + +const TEST_VALUE: Option = + Some(ValueData::TimestampNanosecondValue(1719468816991000000)); + +lazy_static! { + static ref EXPECTED_SCHEMA: Vec = vec![ + common::make_column_schema( + "ts".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; +} + +#[test] +fn test_parse_date() { + let pipeline_yaml = r#" +processors: + - date: + fields: + - input_str + formats: + - "%Y-%m-%dT%H:%M:%S%.3fZ" + +transform: + - fields: + - input_str, ts + type: time +"#; + + let output = common::parse_and_exec(TEST_INPUT, pipeline_yaml); + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!(output.rows[0].values[0].value_data, TEST_VALUE); +} + +#[test] +fn test_multi_formats() { + let pipeline_yaml = r#" +processors: + - date: + fields: + - input_str + formats: + - "%Y-%m-%dT%H:%M:%S" + - "%Y-%m-%dT%H:%M:%S%.3fZ" + +transform: + - fields: + - input_str, ts + type: time +"#; + + let output = common::parse_and_exec(TEST_INPUT, pipeline_yaml); + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!(output.rows[0].values[0].value_data, TEST_VALUE); +} + +#[test] +fn test_ignore_missing() { + let empty_input = r#"{}"#; + + let pipeline_yaml = r#" +processors: + - date: + fields: + - input_str + formats: + - "%Y-%m-%dT%H:%M:%S" + - "%Y-%m-%dT%H:%M:%S%.3fZ" + ignore_missing: true + +transform: + - fields: + - input_str, ts + type: time +"#; + + let output = common::parse_and_exec(empty_input, pipeline_yaml); + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!(output.rows[0].values[0].value_data, None); +} + +#[test] +fn test_timezone() { + let pipeline_yaml = r#" +processors: + - date: + fields: + - input_str + formats: + - "%Y-%m-%dT%H:%M:%S" + - "%Y-%m-%dT%H:%M:%S%.3fZ" + ignore_missing: true + timezone: 'Asia/Shanghai' + +transform: + - fields: + - input_str, ts + type: time +"#; + + let output = common::parse_and_exec(TEST_INPUT, pipeline_yaml); + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!( + output.rows[0].values[0].value_data, + Some(ValueData::TimestampNanosecondValue(1719440016991000000)) + ); +} diff --git a/src/pipeline/tests/dissect.rs b/src/pipeline/tests/dissect.rs index bc9ca263ca40..10f9e2799616 100644 --- a/src/pipeline/tests/dissect.rs +++ b/src/pipeline/tests/dissect.rs @@ -17,6 +17,10 @@ mod common; use greptime_proto::v1::value::ValueData::StringValue; use greptime_proto::v1::{ColumnDataType, SemanticType}; +fn make_string_column_schema(name: String) -> greptime_proto::v1::ColumnSchema { + common::make_column_schema(name, ColumnDataType::String, SemanticType::Field) +} + #[test] fn test_dissect_pattern() { let input_value_str = r#" @@ -43,8 +47,8 @@ transform: let output = common::parse_and_exec(input_value_str, pipeline_yaml); let expected_schema = vec![ - common::make_column_schema("a".to_string(), ColumnDataType::String, SemanticType::Field), - common::make_column_schema("b".to_string(), ColumnDataType::String, SemanticType::Field), + make_string_column_schema("a".to_string()), + make_string_column_schema("b".to_string()), common::make_column_schema( "greptime_timestamp".to_string(), ColumnDataType::TimestampNanosecond, @@ -91,8 +95,8 @@ transform: let output = common::parse_and_exec(input_value_str, pipeline_yaml); let expected_schema = vec![ - common::make_column_schema("a".to_string(), ColumnDataType::String, SemanticType::Field), - common::make_column_schema("b".to_string(), ColumnDataType::String, SemanticType::Field), + make_string_column_schema("a".to_string()), + make_string_column_schema("b".to_string()), common::make_column_schema( "greptime_timestamp".to_string(), ColumnDataType::TimestampNanosecond, @@ -111,3 +115,141 @@ transform: Some(StringValue("456".to_string())) ); } + +#[test] +fn test_ignore_missing() { + let empty_str = r#"{}"#; + + let pipeline_yaml = r#" +processors: + - dissect: + field: str + patterns: + - "%{a} %{b}" + ignore_missing: true + +transform: + - fields: + - a + - b + type: string +"#; + + let output = common::parse_and_exec(empty_str, pipeline_yaml); + + let expected_schema = vec![ + make_string_column_schema("a".to_string()), + make_string_column_schema("b".to_string()), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + assert_eq!(output.schema, expected_schema); + + assert_eq!(output.rows[0].values[0].value_data, None); + assert_eq!(output.rows[0].values[1].value_data, None); +} + +#[test] +fn test_modifier() { + let empty_str = r#" +{ + "str": "key1 key2 key3 key4 key5 key6 key7 key8" +}"#; + + let pipeline_yaml = r#" +processors: + - dissect: + field: str + patterns: + - "%{key1} %{key2} %{+key3} %{+key3/2} %{key5->} %{?key6} %{*key_7} %{&key_7}" + +transform: + - fields: + - key1 + - key2 + - key3 + - key5 + - key7 + type: string +"#; + + let output = common::parse_and_exec(empty_str, pipeline_yaml); + + let expected_schema = vec![ + make_string_column_schema("key1".to_string()), + make_string_column_schema("key2".to_string()), + make_string_column_schema("key3".to_string()), + make_string_column_schema("key5".to_string()), + make_string_column_schema("key7".to_string()), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + assert_eq!(output.schema, expected_schema); + assert_eq!( + output.rows[0].values[0].value_data, + Some(StringValue("key1".to_string())) + ); + assert_eq!( + output.rows[0].values[1].value_data, + Some(StringValue("key2".to_string())) + ); + assert_eq!( + output.rows[0].values[2].value_data, + Some(StringValue("key3 key4".to_string())) + ); + assert_eq!( + output.rows[0].values[3].value_data, + Some(StringValue("key5".to_string())) + ); + assert_eq!( + output.rows[0].values[4].value_data, + Some(StringValue("key8".to_string())) + ); +} + +#[test] +fn test_append_separator() { + let empty_str = r#" +{ + "str": "key1 key2" +}"#; + + let pipeline_yaml = r#" +processors: + - dissect: + field: str + patterns: + - "%{+key1} %{+key1}" + append_separator: "_" + +transform: + - fields: + - key1 + type: string +"#; + + let output = common::parse_and_exec(empty_str, pipeline_yaml); + + let expected_schema = vec![ + make_string_column_schema("key1".to_string()), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + assert_eq!(output.schema, expected_schema); + assert_eq!( + output.rows[0].values[0].value_data, + Some(StringValue("key1_key2".to_string())) + ); +} diff --git a/src/pipeline/tests/epoch.rs b/src/pipeline/tests/epoch.rs new file mode 100644 index 000000000000..35a2ab635c00 --- /dev/null +++ b/src/pipeline/tests/epoch.rs @@ -0,0 +1,255 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod common; + +use api::v1::ColumnSchema; +use greptime_proto::v1::value::ValueData; +use greptime_proto::v1::{ColumnDataType, SemanticType}; + +#[test] +fn test_parse_epoch() { + let test_input = r#" + { + "input_s": "1722580862", + "input_sec": "1722580862", + "input_second": "1722580862", + "input_ms": "1722580887794", + "input_millisecond": "1722580887794", + "input_milli": "1722580887794", + "input_default": "1722580887794", + "input_us": "1722580905423969", + "input_microsecond": "1722580905423969", + "input_micro": "1722580905423969", + "input_ns": "1722580929863842048", + "input_nanosecond": "1722580929863842048", + "input_nano": "1722580929863842048" + }"#; + + let pipeline_yaml = r#" +processors: + - epoch: + field: input_s + resolution: s + - epoch: + field: input_sec + resolution: sec + - epoch: + field: input_second + resolution: second + - epoch: + field: input_ms + resolution: ms + - epoch: + field: input_millisecond + resolution: millisecond + - epoch: + field: input_milli + resolution: milli + - epoch: + field: input_default + - epoch: + field: input_us + resolution: us + - epoch: + field: input_microsecond + resolution: microsecond + - epoch: + field: input_micro + resolution: micro + - epoch: + field: input_ns + resolution: ns + - epoch: + field: input_nanosecond + resolution: nanosecond + - epoch: + field: input_nano + resolution: nano + +transform: + - field: input_s + type: epoch, s + - field: input_sec + type: epoch, sec + - field: input_second + type: epoch, second + + - field: input_ms + type: epoch, ms + - field: input_millisecond + type: epoch, millisecond + - field: input_milli + type: epoch, milli + - field: input_default + type: epoch, milli + + - field: input_us + type: epoch, us + - field: input_microsecond + type: epoch, microsecond + - field: input_micro + type: epoch, micro + + - field: input_ns + type: epoch, ns + - field: input_nanosecond + type: epoch, nanosecond + - field: input_nano + type: epoch, nano +"#; + fn make_time_field(name: &str, datatype: ColumnDataType) -> ColumnSchema { + common::make_column_schema(name.to_string(), datatype, SemanticType::Field) + } + + let expected_schema = vec![ + make_time_field("input_s", ColumnDataType::TimestampSecond), + make_time_field("input_sec", ColumnDataType::TimestampSecond), + make_time_field("input_second", ColumnDataType::TimestampSecond), + make_time_field("input_ms", ColumnDataType::TimestampMillisecond), + make_time_field("input_millisecond", ColumnDataType::TimestampMillisecond), + make_time_field("input_milli", ColumnDataType::TimestampMillisecond), + make_time_field("input_default", ColumnDataType::TimestampMillisecond), + make_time_field("input_us", ColumnDataType::TimestampMicrosecond), + make_time_field("input_microsecond", ColumnDataType::TimestampMicrosecond), + make_time_field("input_micro", ColumnDataType::TimestampMicrosecond), + make_time_field("input_ns", ColumnDataType::TimestampNanosecond), + make_time_field("input_nanosecond", ColumnDataType::TimestampNanosecond), + make_time_field("input_nano", ColumnDataType::TimestampNanosecond), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, expected_schema); + + for i in 0..2 { + assert_eq!( + output.rows[0].values[i].value_data, + Some(ValueData::TimestampSecondValue(1722580862)) + ); + } + for i in 3..6 { + assert_eq!( + output.rows[0].values[i].value_data, + Some(ValueData::TimestampMillisecondValue(1722580887794)) + ); + } + for i in 7..9 { + assert_eq!( + output.rows[0].values[i].value_data, + Some(ValueData::TimestampMicrosecondValue(1722580905423969)) + ); + } + for i in 10..12 { + assert_eq!( + output.rows[0].values[i].value_data, + Some(ValueData::TimestampNanosecondValue(1722580929863842048)) + ); + } +} + +#[test] +fn test_ignore_missing() { + let empty_input = r#"{}"#; + + let pipeline_yaml = r#" +processors: + - epoch: + field: input_s + resolution: s + ignore_missing: true + +transform: + - fields: + - input_s, ts + type: epoch, s +"#; + + let expected_schema = vec![ + common::make_column_schema( + "ts".to_string(), + ColumnDataType::TimestampSecond, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + let output = common::parse_and_exec(empty_input, pipeline_yaml); + assert_eq!(output.schema, expected_schema); + assert_eq!(output.rows[0].values[0].value_data, None); +} + +#[test] +fn test_default_wrong_resolution() { + let test_input = r#" + { + "input_s": "1722580862", + "input_nano": "1722583122284583936" + }"#; + + let pipeline_yaml = r#" +processors: + - epoch: + fields: + - input_s + - input_nano + +transform: + - fields: + - input_s + type: epoch, s + - fields: + - input_nano + type: epoch, nano +"#; + + let expected_schema = vec![ + common::make_column_schema( + "input_s".to_string(), + ColumnDataType::TimestampSecond, + SemanticType::Field, + ), + common::make_column_schema( + "input_nano".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, expected_schema); + // this is actually wrong + // TODO(shuiyisong): add check for type when converting epoch + assert_eq!( + output.rows[0].values[0].value_data, + Some(ValueData::TimestampMillisecondValue(1722580862)) + ); + assert_eq!( + output.rows[0].values[1].value_data, + Some(ValueData::TimestampMillisecondValue(1722583122284583936)) + ); +} diff --git a/src/pipeline/tests/gsub.rs b/src/pipeline/tests/gsub.rs index 2724c9910403..55f8ced922dd 100644 --- a/src/pipeline/tests/gsub.rs +++ b/src/pipeline/tests/gsub.rs @@ -61,3 +61,37 @@ transform: Some(TimestampMillisecondValue(1573840000000)) ); } + +#[test] +fn test_ignore_missing() { + let empty_string = r#"{}"#; + + let pipeline_yaml = r#" +processors: + - gsub: + field: reqTimeSec + pattern: "\\." + replacement: "" + ignore_missing: true + - epoch: + field: reqTimeSec + resolution: millisecond + ignore_missing: true + +transform: + - field: reqTimeSec + type: epoch, millisecond + index: timestamp +"#; + + let output = common::parse_and_exec(empty_string, pipeline_yaml); + + let expected_schema = vec![common::make_column_schema( + "reqTimeSec".to_string(), + ColumnDataType::TimestampMillisecond, + SemanticType::Timestamp, + )]; + + assert_eq!(output.schema, expected_schema); + assert_eq!(output.rows[0].values[0].value_data, None); +} diff --git a/src/pipeline/tests/join.rs b/src/pipeline/tests/join.rs index 9ffa35909c76..3625160361a6 100644 --- a/src/pipeline/tests/join.rs +++ b/src/pipeline/tests/join.rs @@ -117,3 +117,41 @@ fn test_float() { Some(StringValue("1.1-1.2-1.3".to_string())) ); } + +#[test] +fn test_mix_type() { + let input_value_str = r#" + [ + { + "join_test": [1, true, "a", 1.1] + } + ] +"#; + let output = common::parse_and_exec(input_value_str, PIPELINE_YAML); + + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!( + output.rows[0].values[0].value_data, + Some(StringValue("1-true-a-1.1".to_string())) + ); +} + +#[test] +fn test_ignore_missing() { + let empty_string = r#"{}"#; + let pipeline_yaml = r#" +processors: + - join: + field: join_test + separator: "-" + ignore_missing: true + +transform: + - field: join_test + type: string +"#; + let output = common::parse_and_exec(empty_string, pipeline_yaml); + + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!(output.rows[0].values[0].value_data, None); +} diff --git a/src/pipeline/tests/letter.rs b/src/pipeline/tests/letter.rs new file mode 100644 index 000000000000..d6d9a2cccbf1 --- /dev/null +++ b/src/pipeline/tests/letter.rs @@ -0,0 +1,188 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod common; + +use api::v1::ColumnSchema; +use greptime_proto::v1::value::ValueData; +use greptime_proto::v1::{ColumnDataType, SemanticType}; +use lazy_static::lazy_static; + +lazy_static! { + static ref EXPECTED_SCHEMA: Vec = vec![ + common::make_column_schema( + "input_str".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; +} + +#[test] +fn test_upper() { + let test_input = r#" +{ + "input_str": "aaa" +}"#; + + let pipeline_yaml = r#" +processors: + - letter: + fields: + - input_str + method: upper + +transform: + - fields: + - input_str + type: string +"#; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!( + output.rows[0].values[0].value_data, + Some(ValueData::StringValue("AAA".to_string())) + ); +} + +#[test] +fn test_lower() { + let test_input = r#" +{ + "input_str": "AAA" +}"#; + + let pipeline_yaml = r#" +processors: + - letter: + fields: + - input_str + method: lower + +transform: + - fields: + - input_str + type: string +"#; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, *EXPECTED_SCHEMA); + assert_eq!( + output.rows[0].values[0].value_data, + Some(ValueData::StringValue("aaa".to_string())) + ); +} + +#[test] +fn test_capital() { + let test_input = r#" +{ + "upper": "AAA", + "lower": "aaa" +}"#; + + let pipeline_yaml = r#" +processors: + - letter: + fields: + - upper + - lower + method: capital + +transform: + - fields: + - upper + - lower + type: string +"#; + + let expected_schema = vec![ + common::make_column_schema( + "upper".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "lower".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, expected_schema); + assert_eq!( + output.rows[0].values[0].value_data, + Some(ValueData::StringValue("AAA".to_string())) + ); + assert_eq!( + output.rows[0].values[1].value_data, + Some(ValueData::StringValue("Aaa".to_string())) + ); +} + +#[test] +fn test_ignore_missing() { + let test_input = r#"{}"#; + + let pipeline_yaml = r#" +processors: + - letter: + fields: + - upper + - lower + method: capital + ignore_missing: true + +transform: + - fields: + - upper + - lower + type: string +"#; + + let expected_schema = vec![ + common::make_column_schema( + "upper".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "lower".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, expected_schema); + assert_eq!(output.rows[0].values[0].value_data, None); + assert_eq!(output.rows[0].values[1].value_data, None); +} diff --git a/src/pipeline/tests/regex.rs b/src/pipeline/tests/regex.rs index 5519c613951f..5be60c987525 100644 --- a/src/pipeline/tests/regex.rs +++ b/src/pipeline/tests/regex.rs @@ -14,8 +14,25 @@ mod common; +use api::v1::ColumnSchema; use greptime_proto::v1::value::ValueData::StringValue; use greptime_proto::v1::{ColumnDataType, SemanticType}; +use lazy_static::lazy_static; + +lazy_static! { + static ref EXPECTED_SCHEMA: Vec = vec![ + common::make_column_schema( + "str_id".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; +} #[test] fn test_regex_pattern() { @@ -41,20 +58,7 @@ transform: let output = common::parse_and_exec(input_value_str, pipeline_yaml); - let expected_schema = vec![ - common::make_column_schema( - "str_id".to_string(), - ColumnDataType::String, - SemanticType::Field, - ), - common::make_column_schema( - "greptime_timestamp".to_string(), - ColumnDataType::TimestampNanosecond, - SemanticType::Timestamp, - ), - ]; - - assert_eq!(output.schema, expected_schema); + assert_eq!(output.schema, *EXPECTED_SCHEMA); assert_eq!( output.rows[0].values[0].value_data, @@ -87,23 +91,34 @@ transform: let output = common::parse_and_exec(input_value_str, pipeline_yaml); - let expected_schema = vec![ - common::make_column_schema( - "str_id".to_string(), - ColumnDataType::String, - SemanticType::Field, - ), - common::make_column_schema( - "greptime_timestamp".to_string(), - ColumnDataType::TimestampNanosecond, - SemanticType::Timestamp, - ), - ]; - - assert_eq!(output.schema, expected_schema); + assert_eq!(output.schema, *EXPECTED_SCHEMA); assert_eq!( output.rows[0].values[0].value_data, Some(StringValue("123".to_string())) ); } + +#[test] +fn test_ignore_missing() { + let input_value_str = r#"{}"#; + + let pipeline_yaml = r#" +processors: + - regex: + fields: + - str + pattern: "(?\\d+)" + ignore_missing: true + +transform: + - field: str_id + type: string +"#; + + let output = common::parse_and_exec(input_value_str, pipeline_yaml); + + assert_eq!(output.schema, *EXPECTED_SCHEMA); + + assert_eq!(output.rows[0].values[0].value_data, None); +} diff --git a/src/pipeline/tests/urlencoding.rs b/src/pipeline/tests/urlencoding.rs new file mode 100644 index 000000000000..dd0c4ffe9f2f --- /dev/null +++ b/src/pipeline/tests/urlencoding.rs @@ -0,0 +1,112 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod common; + +use greptime_proto::v1::value::ValueData; +use greptime_proto::v1::{ColumnDataType, SemanticType}; + +#[test] +fn test() { + let test_input = r#" +{ + "encoding": "2024-06-27T06:13:36.991Z", + "decoding": "2024-06-27T06%3A13%3A36.991Z" +}"#; + + let pipeline_yaml = r#" +processors: + - urlencoding: + field: encoding + method: encode + + - urlencoding: + field: decoding + method: decode + +transform: + - fields: + - encoding + - decoding + type: string +"#; + + let expected_schema = vec![ + common::make_column_schema( + "encoding".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "decoding".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, expected_schema); + assert_eq!( + output.rows[0].values[0].value_data, + Some(ValueData::StringValue( + "2024-06-27T06%3A13%3A36.991Z".to_string() + )) + ); + assert_eq!( + output.rows[0].values[1].value_data, + Some(ValueData::StringValue( + "2024-06-27T06:13:36.991Z".to_string() + )) + ); +} + +#[test] +fn test_ignore_missing() { + let test_input = r#"{}"#; + + let pipeline_yaml = r#" +processors: + - urlencoding: + field: encoding + method: encode + ignore_missing: true + +transform: + - fields: + - encoding + type: string +"#; + + let expected_schema = vec![ + common::make_column_schema( + "encoding".to_string(), + ColumnDataType::String, + SemanticType::Field, + ), + common::make_column_schema( + "greptime_timestamp".to_string(), + ColumnDataType::TimestampNanosecond, + SemanticType::Timestamp, + ), + ]; + + let output = common::parse_and_exec(test_input, pipeline_yaml); + assert_eq!(output.schema, expected_schema); + assert_eq!(output.rows[0].values[0].value_data, None); +} diff --git a/src/script/src/python/pyo3/copr_impl.rs b/src/script/src/python/pyo3/copr_impl.rs index 4ee56021e1f0..2025c9d3a27d 100644 --- a/src/script/src/python/pyo3/copr_impl.rs +++ b/src/script/src/python/pyo3/copr_impl.rs @@ -22,7 +22,7 @@ use datatypes::vectors::{Helper, VectorRef}; use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyModule, PyString, PyTuple}; use pyo3::{pymethods, IntoPy, PyAny, PyCell, PyObject, PyResult, Python, ToPyObject}; -use snafu::{ensure, Location, ResultExt}; +use snafu::{ensure, ResultExt}; use crate::engine::EvalContext; use crate::python::error::{self, NewRecordBatchSnafu, OtherSnafu, Result}; diff --git a/src/servers/dashboard/VERSION b/src/servers/dashboard/VERSION index 4bc4a916105d..8ea9cc1eb196 100644 --- a/src/servers/dashboard/VERSION +++ b/src/servers/dashboard/VERSION @@ -1 +1 @@ -v0.5.3 +v0.5.4 diff --git a/src/servers/src/mysql/handler.rs b/src/servers/src/mysql/handler.rs index d7b5173bd9c3..8e7b4630ebb4 100644 --- a/src/servers/src/mysql/handler.rs +++ b/src/servers/src/mysql/handler.rs @@ -221,6 +221,10 @@ impl MysqlInstanceShim { impl AsyncMysqlShim for MysqlInstanceShim { type Error = error::Error; + fn version(&self) -> String { + std::env::var("GREPTIMEDB_MYSQL_SERVER_VERSION").unwrap_or_else(|_| "8.4.2".to_string()) + } + fn salt(&self) -> [u8; 20] { self.salt } diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index 5621a1bed72b..fd50cca54bbd 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -950,7 +950,7 @@ fn ensure_exprs_are_binary(exprs: &[Expr], columns: &[&Column]) -> Result<()> { ensure_one_expr(right, columns)?; } else { return error::InvalidSqlSnafu { - msg: format!("Partition rule expr {:?} is not a binary expr!", expr), + msg: format!("Partition rule expr {:?} is not a binary expr", expr), } .fail(); } @@ -974,7 +974,7 @@ fn ensure_one_expr(expr: &Expr, columns: &[&Column]) -> Result<()> { columns.iter().any(|c| &c.name().value == column_name), error::InvalidSqlSnafu { msg: format!( - "Column {:?} in rule expr is not referenced in PARTITION ON!", + "Column {:?} in rule expr is not referenced in PARTITION ON", column_name ), } @@ -987,7 +987,7 @@ fn ensure_one_expr(expr: &Expr, columns: &[&Column]) -> Result<()> { Ok(()) } _ => error::InvalidSqlSnafu { - msg: format!("Partition rule expr {:?} is not a binary expr!", expr), + msg: format!("Partition rule expr {:?} is not a binary expr", expr), } .fail(), } @@ -1002,13 +1002,14 @@ fn ensure_partition_columns_defined<'a>( .column_list .iter() .map(|x| { + let x = ParserContext::canonicalize_identifier(x.clone()); // Normally the columns in "create table" won't be too many, // a linear search to find the target every time is fine. columns .iter() - .find(|c| c.name() == x) + .find(|c| *c.name().value == x.value) .context(error::InvalidSqlSnafu { - msg: format!("Partition column {:?} not defined!", x.value), + msg: format!("Partition column {:?} not defined", x.value), }) }) .collect::>>() @@ -1320,7 +1321,7 @@ ENGINE=mito"; assert!(result .unwrap_err() .to_string() - .contains("Partition column \"x\" not defined!")); + .contains("Partition column \"x\" not defined")); } #[test] @@ -1447,6 +1448,30 @@ ENGINE=mito"; } } + #[test] + fn test_parse_create_table_with_quoted_partitions() { + let sql = r" +CREATE TABLE monitor ( + `host_id` INT, + idc STRING, + ts TIMESTAMP, + cpu DOUBLE DEFAULT 0, + memory DOUBLE, + TIME INDEX (ts), + PRIMARY KEY (host), +) +PARTITION ON COLUMNS(IdC, host_id) ( + idc <= 'hz' AND host_id < 1000, + idc > 'hz' AND idc <= 'sh' AND host_id < 2000, + idc > 'sh' AND host_id < 3000, + idc > 'sh' AND host_id >= 3000, +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(result.len(), 1); + } + #[test] fn test_parse_create_table_with_timestamp_index() { let sql1 = r" @@ -1728,7 +1753,7 @@ ENGINE=mito"; ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()); assert_eq!( result.unwrap_err().output_msg(), - "Invalid SQL, error: Column \"b\" in rule expr is not referenced in PARTITION ON!" + "Invalid SQL, error: Column \"b\" in rule expr is not referenced in PARTITION ON" ); } @@ -1744,7 +1769,7 @@ ENGINE=mito"; ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()); assert_eq!( result.unwrap_err().output_msg(), - "Invalid SQL, error: Partition rule expr Identifier(Ident { value: \"b\", quote_style: None }) is not a binary expr!" + "Invalid SQL, error: Partition rule expr Identifier(Ident { value: \"b\", quote_style: None }) is not a binary expr" ); } diff --git a/src/sql/src/parsers/error.rs b/src/sql/src/parsers/error.rs index 8feb80b988e3..c591701ab39b 100644 --- a/src/sql/src/parsers/error.rs +++ b/src/sql/src/parsers/error.rs @@ -31,4 +31,12 @@ pub enum TQLError { #[snafu(display("Failed to evaluate TQL expression: {}", msg))] Evaluation { msg: String }, + + #[snafu(display("Failed to convert TQL expression to logical expression"))] + ConvertToLogicalExpression { + #[snafu(source)] + error: Box, + #[snafu(implicit)] + location: Location, + }, } diff --git a/src/sql/src/parsers/tql_parser.rs b/src/sql/src/parsers/tql_parser.rs index 985f66720bed..078cf0f7268d 100644 --- a/src/sql/src/parsers/tql_parser.rs +++ b/src/sql/src/parsers/tql_parser.rs @@ -31,6 +31,7 @@ const VERBOSE: &str = "VERBOSE"; use sqlparser::parser::Parser; +use super::error::ConvertToLogicalExpressionSnafu; use crate::dialect::GreptimeDbDialect; use crate::parsers::error::{EvaluationSnafu, ParserSnafu, TQLError}; @@ -182,7 +183,9 @@ impl<'a> ParserContext<'a> { fn parse_tokens(tokens: Vec) -> std::result::Result { let parser_expr = Self::parse_to_expr(tokens)?; - let lit = utils::parser_expr_to_scalar_value(parser_expr).unwrap(); + let lit = utils::parser_expr_to_scalar_value(parser_expr) + .map_err(Box::new) + .context(ConvertToLogicalExpressionSnafu)?; let second = match lit { ScalarValue::TimestampNanosecond(ts_nanos, _) @@ -270,6 +273,11 @@ mod tests { } _ => unreachable!(), } + + let sql = "TQL EVAL (now(), now()-'5m', '30s') http_requests_total"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()); + assert!(result.is_err()); } #[test] diff --git a/src/store-api/src/mito_engine_options.rs b/src/store-api/src/mito_engine_options.rs index 9252c970b37f..98ceb6758552 100644 --- a/src/store-api/src/mito_engine_options.rs +++ b/src/store-api/src/mito_engine_options.rs @@ -28,7 +28,9 @@ pub fn is_mito_engine_option_key(key: &str) -> bool { "ttl", "compaction.type", "compaction.twcs.max_active_window_runs", + "compaction.twcs.max_active_window_files", "compaction.twcs.max_inactive_window_runs", + "compaction.twcs.max_inactive_window_files", "compaction.twcs.time_window", "compaction.twcs.remote_compaction", "storage", diff --git a/src/table/src/table/metrics.rs b/src/table/src/table/metrics.rs index e24f0ff90a6b..5fbdf641f507 100644 --- a/src/table/src/table/metrics.rs +++ b/src/table/src/table/metrics.rs @@ -12,23 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::time::Duration; + use datafusion::physical_plan::metrics::{ - Count, ExecutionPlanMetricsSet, Gauge, MetricBuilder, Timestamp, + Count, ExecutionPlanMetricsSet, Gauge, MetricBuilder, ScopedTimerGuard, Time, Timestamp, }; -/// This metrics struct is used to record and hold memory usage +/// This metrics struct is used to record and hold metrics like memory usage /// of result batch in [`crate::table::scan::StreamWithMetricWrapper`] -/// during query execution, indicating size of the dataset. +/// during query execution. #[derive(Debug, Clone)] -pub struct MemoryUsageMetrics { +pub struct StreamMetrics { + /// Timestamp when the stream finished end_time: Timestamp, - // used memory in bytes + /// Used memory in bytes mem_used: Gauge, - // number of rows in output + /// Number of rows in output output_rows: Count, + /// Elapsed time used to `poll` the stream + poll_elapsed: Time, + /// Elapsed time used to `.await`ing the stream + await_elapsed: Time, } -impl MemoryUsageMetrics { +impl StreamMetrics { /// Create a new MemoryUsageMetrics structure, and set `start_time` to now pub fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { let start_time = MetricBuilder::new(metrics).start_timestamp(partition); @@ -38,6 +45,8 @@ impl MemoryUsageMetrics { end_time: MetricBuilder::new(metrics).end_timestamp(partition), mem_used: MetricBuilder::new(metrics).mem_used(partition), output_rows: MetricBuilder::new(metrics).output_rows(partition), + poll_elapsed: MetricBuilder::new(metrics).subset_time("elapsed_poll", partition), + await_elapsed: MetricBuilder::new(metrics).subset_time("elapsed_await", partition), } } @@ -55,9 +64,18 @@ impl MemoryUsageMetrics { self.end_time.record() } } + + /// Return a timer guard that records the time elapsed in poll + pub fn poll_timer(&self) -> ScopedTimerGuard { + self.poll_elapsed.timer() + } + + pub fn record_await_duration(&self, duration: Duration) { + self.await_elapsed.add_duration(duration); + } } -impl Drop for MemoryUsageMetrics { +impl Drop for StreamMetrics { fn drop(&mut self) { self.try_done() } diff --git a/src/table/src/table/scan.rs b/src/table/src/table/scan.rs index 70e76586b648..02c1147875aa 100644 --- a/src/table/src/table/scan.rs +++ b/src/table/src/table/scan.rs @@ -16,6 +16,7 @@ use std::any::Any; use std::pin::Pin; use std::sync::{Arc, Mutex}; use std::task::{Context, Poll}; +use std::time::Instant; use common_error::ext::BoxedError; use common_recordbatch::{DfRecordBatch, DfSendableRecordBatchStream, SendableRecordBatchStream}; @@ -34,7 +35,7 @@ use datatypes::arrow::datatypes::SchemaRef as ArrowSchemaRef; use futures::{Stream, StreamExt}; use store_api::region_engine::{PartitionRange, RegionScannerRef}; -use crate::table::metrics::MemoryUsageMetrics; +use crate::table::metrics::StreamMetrics; /// A plan to read multiple partitions from a region of a table. #[derive(Debug)] @@ -139,11 +140,12 @@ impl ExecutionPlan for RegionScanExec { .unwrap() .scan_partition(partition) .map_err(|e| DataFusionError::External(Box::new(e)))?; - let mem_usage_metrics = MemoryUsageMetrics::new(&self.metric, partition); + let stream_metrics = StreamMetrics::new(&self.metric, partition); Ok(Box::pin(StreamWithMetricWrapper { stream, - metric: mem_usage_metrics, + metric: stream_metrics, span, + await_timer: None, })) } @@ -164,8 +166,9 @@ impl DisplayAs for RegionScanExec { pub struct StreamWithMetricWrapper { stream: SendableRecordBatchStream, - metric: MemoryUsageMetrics, + metric: StreamMetrics, span: Span, + await_timer: Option, } impl Stream for StreamWithMetricWrapper { @@ -174,22 +177,32 @@ impl Stream for StreamWithMetricWrapper { fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let this = self.get_mut(); let _enter = this.span.enter(); - match this.stream.poll_next_unpin(cx) { - Poll::Ready(Some(result)) => match result { - Ok(record_batch) => { - let batch_mem_size = record_batch - .columns() - .iter() - .map(|vec_ref| vec_ref.memory_size()) - .sum::(); - // we don't record elapsed time here - // since it's calling storage api involving I/O ops - this.metric.record_mem_usage(batch_mem_size); - this.metric.record_output(record_batch.num_rows()); - Poll::Ready(Some(Ok(record_batch.into_df_record_batch()))) + let poll_timer = this.metric.poll_timer(); + this.await_timer.get_or_insert(Instant::now()); + let poll_result = this.stream.poll_next_unpin(cx); + drop(poll_timer); + match poll_result { + Poll::Ready(Some(result)) => { + if let Some(instant) = this.await_timer.take() { + let elapsed = instant.elapsed(); + this.metric.record_await_duration(elapsed); } - Err(e) => Poll::Ready(Some(Err(DataFusionError::External(Box::new(e))))), - }, + match result { + Ok(record_batch) => { + let batch_mem_size = record_batch + .columns() + .iter() + .map(|vec_ref| vec_ref.memory_size()) + .sum::(); + // we don't record elapsed time here + // since it's calling storage api involving I/O ops + this.metric.record_mem_usage(batch_mem_size); + this.metric.record_output(record_batch.num_rows()); + Poll::Ready(Some(Ok(record_batch.into_df_record_batch()))) + } + Err(e) => Poll::Ready(Some(Err(DataFusionError::External(Box::new(e))))), + } + } Poll::Ready(None) => Poll::Ready(None), Poll::Pending => Poll::Pending, } diff --git a/tests-integration/src/standalone.rs b/tests-integration/src/standalone.rs index 9de4e594980d..9f7188568fa4 100644 --- a/tests-integration/src/standalone.rs +++ b/tests-integration/src/standalone.rs @@ -156,6 +156,7 @@ impl GreptimeDbStandaloneBuilder { plugins.clone(), table_metadata_manager.clone(), catalog_manager.clone(), + flow_metadata_manager.clone(), ); let flownode = Arc::new(flow_builder.build().await.unwrap()); diff --git a/tests-integration/src/test_util.rs b/tests-integration/src/test_util.rs index a1b132dad0c2..cefd8bed6b94 100644 --- a/tests-integration/src/test_util.rs +++ b/tests-integration/src/test_util.rs @@ -164,8 +164,7 @@ pub fn get_test_store_config(store_type: &StorageType) -> (ObjectStoreConfig, Te ..Default::default() }; - let mut builder = Gcs::default(); - builder + let builder = Gcs::default() .root(&gcs_config.root) .bucket(&gcs_config.bucket) .scope(&gcs_config.scope) @@ -186,8 +185,7 @@ pub fn get_test_store_config(store_type: &StorageType) -> (ObjectStoreConfig, Te ..Default::default() }; - let mut builder = Azblob::default(); - let _ = builder + let mut builder = Azblob::default() .root(&azblob_config.root) .endpoint(&azblob_config.endpoint) .account_name(azblob_config.account_name.expose_secret()) @@ -195,8 +193,8 @@ pub fn get_test_store_config(store_type: &StorageType) -> (ObjectStoreConfig, Te .container(&azblob_config.container); if let Ok(sas_token) = env::var("GT_AZBLOB_SAS_TOKEN") { - let _ = builder.sas_token(&sas_token); - } + builder = builder.sas_token(&sas_token); + }; let config = ObjectStoreConfig::Azblob(azblob_config); @@ -214,8 +212,7 @@ pub fn get_test_store_config(store_type: &StorageType) -> (ObjectStoreConfig, Te ..Default::default() }; - let mut builder = Oss::default(); - let _ = builder + let builder = Oss::default() .root(&oss_config.root) .endpoint(&oss_config.endpoint) .access_key_id(oss_config.access_key_id.expose_secret()) @@ -235,19 +232,18 @@ pub fn get_test_store_config(store_type: &StorageType) -> (ObjectStoreConfig, Te s3_config.cache.cache_path = Some("/tmp/greptimedb_cache".to_string()); } - let mut builder = S3::default(); - let _ = builder + let mut builder = S3::default() .root(&s3_config.root) .access_key_id(s3_config.access_key_id.expose_secret()) .secret_access_key(s3_config.secret_access_key.expose_secret()) .bucket(&s3_config.bucket); if s3_config.endpoint.is_some() { - let _ = builder.endpoint(s3_config.endpoint.as_ref().unwrap()); - } + builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap()); + }; if s3_config.region.is_some() { - let _ = builder.region(s3_config.region.as_ref().unwrap()); - } + builder = builder.region(s3_config.region.as_ref().unwrap()); + }; let config = ObjectStoreConfig::S3(s3_config); diff --git a/tests-integration/tests/sql.rs b/tests-integration/tests/sql.rs index f133d2581281..a41968496b05 100644 --- a/tests-integration/tests/sql.rs +++ b/tests-integration/tests/sql.rs @@ -469,7 +469,7 @@ pub async fn test_postgres_bytea(store_type: StorageType) { .await .unwrap(); let get_row = |mess: Vec| -> String { - match &mess[0] { + match &mess[1] { SimpleQueryMessage::Row(row) => row.get(0).unwrap().to_string(), _ => unreachable!(), } @@ -595,9 +595,9 @@ pub async fn test_postgres_datestyle(store_type: StorageType) { .expect("INSERT INTO dt_test ERROR"); let get_row = |mess: Vec| -> String { - match &mess[0] { + match &mess[1] { SimpleQueryMessage::Row(row) => row.get(0).unwrap().to_string(), - _ => unreachable!(), + _ => unreachable!("Unexpected messages: {:?}", mess), } }; @@ -759,7 +759,7 @@ pub async fn test_postgres_timezone(store_type: StorageType) { }); let get_row = |mess: Vec| -> String { - match &mess[0] { + match &mess[1] { SimpleQueryMessage::Row(row) => row.get(0).unwrap().to_string(), _ => unreachable!(), } diff --git a/tests/cases/standalone/common/flow/flow_basic.result b/tests/cases/standalone/common/flow/flow_basic.result index e5983ea8d424..3c49535b0fd3 100644 --- a/tests/cases/standalone/common/flow/flow_basic.result +++ b/tests/cases/standalone/common/flow/flow_basic.result @@ -24,6 +24,7 @@ select flush_flow('test_numbers_basic')<=1; | true | +----------------------------------------------------+ +-- SQLNESS ARG restart=true INSERT INTO numbers_input_basic VALUES (20, "2021-07-01 00:00:00.200"), diff --git a/tests/cases/standalone/common/flow/flow_basic.sql b/tests/cases/standalone/common/flow/flow_basic.sql index dab3d78f836b..1f282cca2e03 100644 --- a/tests/cases/standalone/common/flow/flow_basic.sql +++ b/tests/cases/standalone/common/flow/flow_basic.sql @@ -14,6 +14,7 @@ SELECT sum(number) FROM numbers_input_basic GROUP BY tumble(ts, '1 second', '202 -- because flush_flow result is at most 1 select flush_flow('test_numbers_basic')<=1; +-- SQLNESS ARG restart=true INSERT INTO numbers_input_basic VALUES (20, "2021-07-01 00:00:00.200"), diff --git a/tests/cases/standalone/common/order/nulls_first.result b/tests/cases/standalone/common/order/nulls_first.result index 9b0c28da795c..8aae8ccadb5e 100644 --- a/tests/cases/standalone/common/order/nulls_first.result +++ b/tests/cases/standalone/common/order/nulls_first.result @@ -36,16 +36,8 @@ SELECT * FROM test ORDER BY i NULLS LAST, j NULLS FIRST; | | 1 | 1970-01-01T00:00:00.002 | +---+---+-------------------------+ -SELECT i, j, row_number() OVER (PARTITION BY i ORDER BY j NULLS FIRST) FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; - -+---+---+------------------------------------------------------------------------------------------------------------------------+ -| i | j | ROW_NUMBER() PARTITION BY [test.i] ORDER BY [test.j ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW | -+---+---+------------------------------------------------------------------------------------------------------------------------+ -| | 1 | 1 | -| 1 | | 1 | -| 1 | 1 | 2 | -+---+---+------------------------------------------------------------------------------------------------------------------------+ - +-- Temporary disable. Waiting for next upgrade of DataFusion. +-- SELECT i, j, row_number() OVER (PARTITION BY i ORDER BY j NULLS FIRST) FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; SELECT i, j, row_number() OVER (PARTITION BY i ORDER BY j NULLS LAST) FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; +---+---+-----------------------------------------------------------------------------------------------------------------------+ diff --git a/tests/cases/standalone/common/order/nulls_first.sql b/tests/cases/standalone/common/order/nulls_first.sql index e66b0f12fb4f..c22a2cfc381d 100644 --- a/tests/cases/standalone/common/order/nulls_first.sql +++ b/tests/cases/standalone/common/order/nulls_first.sql @@ -8,7 +8,8 @@ SELECT * FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; SELECT * FROM test ORDER BY i NULLS LAST, j NULLS FIRST; -SELECT i, j, row_number() OVER (PARTITION BY i ORDER BY j NULLS FIRST) FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; +-- Temporary disable. Waiting for next upgrade of DataFusion. +-- SELECT i, j, row_number() OVER (PARTITION BY i ORDER BY j NULLS FIRST) FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; SELECT i, j, row_number() OVER (PARTITION BY i ORDER BY j NULLS LAST) FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; diff --git a/v0.9.1.md b/v0.9.1.md new file mode 100644 index 000000000000..63f967ebcace --- /dev/null +++ b/v0.9.1.md @@ -0,0 +1,58 @@ +# TSBS benchmark - v0.9.1 + +## Environment + +### Local + +| | | +| ------ | ---------------------------------- | +| CPU | AMD Ryzen 7 7735HS (8 core 3.2GHz) | +| Memory | 32GB | +| Disk | SOLIDIGM SSDPFKNU010TZ | +| OS | Ubuntu 22.04.2 LTS | + +### Amazon EC2 + +| | | +| ------- | ----------------------- | +| Machine | c5d.2xlarge | +| CPU | 8 core | +| Memory | 16GB | +| Disk | 100GB (GP3) | +| OS | Ubuntu Server 24.04 LTS | + +## Write performance + +| Environment | Ingest rate (rows/s) | +| --------------- | -------------------- | +| Local | 387697.68 | +| EC2 c5d.2xlarge | 234620.19 | + +## Query performance + +| Query type | Local (ms) | EC2 c5d.2xlarge (ms) | +| --------------------- | ---------- | -------------------- | +| cpu-max-all-1 | 21.14 | 14.75 | +| cpu-max-all-8 | 36.79 | 30.69 | +| double-groupby-1 | 529.02 | 987.85 | +| double-groupby-5 | 1064.53 | 1455.95 | +| double-groupby-all | 1625.33 | 2143.96 | +| groupby-orderby-limit | 529.19 | 1353.49 | +| high-cpu-1 | 12.09 | 8.24 | +| high-cpu-all | 3619.47 | 5312.82 | +| lastpoint | 224.91 | 576.06 | +| single-groupby-1-1-1 | 10.82 | 6.01 | +| single-groupby-1-1-12 | 11.16 | 7.42 | +| single-groupby-1-8-1 | 13.50 | 10.20 | +| single-groupby-5-1-1 | 11.99 | 6.70 | +| single-groupby-5-1-12 | 13.17 | 8.72 | +| single-groupby-5-8-1 | 16.01 | 12.07 | + +`single-groupby-1-1-1` query throughput + +| Environment | Client concurrency | mean time (ms) | qps (queries/sec) | +| --------------- | ------------------ | -------------- | ----------------- | +| Local | 50 | 33.04 | 1511.74 | +| Local | 100 | 67.70 | 1476.14 | +| EC2 c5d.2xlarge | 50 | 61.93 | 806.97 | +| EC2 c5d.2xlarge | 100 | 126.31 | 791.40 |