From e360b4dff71672253ebc253e08d9fedccfc8e0f3 Mon Sep 17 00:00:00 2001 From: Jiacai Liu Date: Mon, 28 Oct 2024 16:58:59 +0800 Subject: [PATCH 1/4] chore: update community (#1582) ## Rationale Sync with website ## Detailed Changes - Add URL check ci ## Test Plan CI --- .github/workflows/links.yml | 45 +++++ CONTRIBUTING.md | 4 +- README-CN.md | 6 +- README.md | 14 +- .../20220702-prometheus-read-extension.md | 176 +++++++++--------- horaemeta/CONTRIBUTING.md | 8 +- 6 files changed, 147 insertions(+), 106 deletions(-) create mode 100644 .github/workflows/links.yml diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml new file mode 100644 index 0000000000..f77fe20b42 --- /dev/null +++ b/.github/workflows/links.yml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Check markdown links + +on: + merge_group: + workflow_dispatch: + schedule: + - cron: '2 0 * * *' + push: + branches: + - main + - dev + pull_request: + +jobs: + url-check: + name: url-check + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + - name: Install deps + run: | + pip3 install urlchecker + - name: Check markdown links + run: | + urlchecker check --file-types '*.md' \ + --exclude-urls 'http://127.0.0.1:5440/sql,https://github.com/apache/horaedb/issues/new' \ + . diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2f9bd85713..8492c80a2d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,7 +24,7 @@ For features, we don't plan to support we will close the feature request ticket ## Contributing Changes -HoraeDB is written mostly in idiomatic Rust—please see the [Style Guide](https://horaedb.apache.org/dev/style_guide.html) for more details. +HoraeDB is written mostly in idiomatic Rust—please see the [Style Guide](https://horaedb.apache.org/docs/dev/style_guide/) for more details. All code must adhere to the `rustfmt` format, and pass all of the `clippy` checks we run in CI (there are more details further down this README). ### Making a PR @@ -38,7 +38,7 @@ PR title. For PRs that you consider ready for review, verify the following locally before you submit it: -* you have a coherent set of logical commits, with messages conforming to the [Conventional Commits](https://horaedb.apache.org/docs/dev/conventional_commit/) specification; +* you have a coherent set of logical commits, with messages conforming to the [Conventional Commits](https://horaedb.apache.org/docs/dev/style_guide/) specification; * all the tests and/or benchmarks pass, including documentation tests; * the code is correctly formatted and all `clippy` checks pass; and * you haven't left any "code cruft" (commented out code blocks etc). diff --git a/README-CN.md b/README-CN.md index 81d1bad725..1cf9bba2e7 100644 --- a/README-CN.md +++ b/README-CN.md @@ -79,11 +79,7 @@ Drop TABLE `demo` 与来自世界各地的用户和开发人员一起在 Apache HoraeDB (incubating) 社区中茁壮成长。 -- [订阅邮箱参与讨论](mailto:dev-subscribe@horaedb.apache.org) ([订阅](mailto:dev-subscribe@horaedb.apache.org?subject=(send%20this%20email%20to%20subscribe)) / [取消订阅](mailto:dev-unsubscribe@horaedb.apache.org?subject=(send%20this%20email%20to%20unsubscribe)) / [查看邮件历史记录](https://lists.apache.org/list.html?dev@horaedb.apache.org)) -- 发送 [请求](mailto:dev@horaedb.apache.org?subject=(Request%to%20join%20HoraeDB%20slack)) 至 `dev@horaedb.apache.org` 加入HoraeDB Slack -- 通过[这里的链接](http://horaedb.apache.org/community/),加入我们的社区。 - -[如何参与贡献](CONTRIBUTING.md) +- https://horaedb.apache.org/community/ ## 致谢 diff --git a/README.md b/README.md index 26ef706bc9..423838c192 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ ![HoraeDB](docs/logo/horaedb-banner-white-small.jpg) ![License](https://img.shields.io/badge/license-Apache--2.0-green.svg) -[![CI](https://github.com/apache/incubator-horaedb/actions/workflows/ci.yml/badge.svg)](https://github.com/apache/incubator-horaedb/actions/workflows/ci.yml) -[![OpenIssue](https://img.shields.io/github/issues/apache/incubator-horaedb)](https://github.com/apache/incubator-horaedb/issues) +[![CI](https://github.com/apache/horaedb/actions/workflows/ci.yml/badge.svg)](https://github.com/apache/horaedb/actions/workflows/ci.yml) +[![OpenIssue](https://img.shields.io/github/issues/apache/horaedb)](https://github.com/apache/horaedb/issues) [![HoraeDB Docker](https://img.shields.io/docker/v/apache/horaedb-server?logo=docker&label=horaedb-server)](https://hub.docker.com/r/apache/horaedb-server) [![HoraeMeta Docker](https://img.shields.io/docker/v/apache/horaemeta-server?logo=docker&label=horaemeta-server)](https://hub.docker.com/r/apache/horaemeta-server) @@ -43,7 +43,7 @@ docker compose -f docker/docker-compose.yaml up ### Run from source code -See details [here](https://horaedb.apache.org/dev/compile_run.html). +Please read the [development guide](https://horaedb.apache.org/docs/dev/compile_run/) guide for instructions on how to build. ### Create Table and Write/Read data Create Table. @@ -95,11 +95,11 @@ Drop TABLE `demo` Thrive together in Apache HoraeDB (incubating) community with users and developers from all around the world. -- Discuss at [dev mailing list](mailto:dev-subscribe@horaedb.apache.org) ([subscribe](mailto:dev-subscribe@horaedb.apache.org?subject=(send%20this%20email%20to%20subscribe)) / [unsubscribe](mailto:dev-unsubscribe@horaedb.apache.org?subject=(send%20this%20email%20to%20unsubscribe)) / [archives](https://lists.apache.org/list.html?dev@horaedb.apache.org)) -- Send [request](mailto:dev@horaedb.apache.org?subject=(Request%to%20join%20HoraeDB%20slack)) to `dev@horaedb.apache.org` to join HoraeDB slack channel -- Or you can join our community [here](http://horaedb.apache.org/community/) +- Ask questions on [GitHub Discussion](https://github.com/apache/horaedb/discussions). +- Chat with developers/users on [Discord](https://discord.gg/h5r4kVMRYN) or [DingTalk](https://horaedb.apache.org/images/dingtalk.jpg). +- Mailing lists are a form of communication used by the Apache community. See guide [here](http://horaedb.apache.org/community/) to subscribe to our list. -Read our [Contributing Guide](CONTRIBUTING.md) and make your first contribution! +> Read our [Contributing Guide](CONTRIBUTING.md) and make your first contribution! ## Acknowledgment diff --git a/docs/rfcs/20220702-prometheus-read-extension.md b/docs/rfcs/20220702-prometheus-read-extension.md index b35c1dc2d1..e1044f95d4 100644 --- a/docs/rfcs/20220702-prometheus-read-extension.md +++ b/docs/rfcs/20220702-prometheus-read-extension.md @@ -1,18 +1,18 @@ -Prometheus read extension for HoraeDB +Prometheus read extension for HoraeDB --------------------------- - Feature Name: prometheus-read-extension -- Tracking Issue: https://github.com/apache/incubator-horaedb/issues/90 +- Tracking Issue: https://github.com/apache/horaedb/issues/90 # Summary Drop-in and full-featured Prometheus read extension for HoraeDB # Motivation -Prometheus and PromQL are wide used in monitoring scenarios. It would be great if HoraeDB can be queried using PromQL. +Prometheus and PromQL are wide used in monitoring scenarios. It would be great if HoraeDB can be queried using PromQL. HoraeDB has the ability to store and compute a large amount of data. But PromQL contains some specific operators. Though HoraeDB supports a subset, it is hard and infeasible to implement all of them. -There are some brilliant distributed solutions like `Thanos` and `Cortex`. But the computation ability is limited in aspects of distributed execution or extensible (`Thanos` supports split query on time range (https://thanos.io/tip/components/query-frontend.md/#splitting). Combining `Prometheus` with `HoraeDB` can gain both high performance computation and the ability to query in other forms like SQL. +There are some brilliant distributed solutions like `Thanos` and `Cortex`. But the computation ability is limited in aspects of distributed execution or extensible (`Thanos` supports split query on time range (https://thanos.io/tip/components/query-frontend.md/#splitting). Combining `Prometheus` with `HoraeDB` can gain both high performance computation and the ability to query in other forms like SQL. This proposal aims to provide a way that: @@ -110,7 +110,7 @@ Query Frontend has to feed PromQL and SQL to servers separately because this int 1. `Query Frontend` accepts a PromQL. 2. `Query Frontend` splits the original PromQL into two sub queries and assigns a `TaskID`. -3. `Query Frontend` sends sub PromQL to `Prometheus` and sub SQL to HoraeDB. +3. `Query Frontend` sends sub PromQL to `Prometheus` and sub SQL to HoraeDB. 4. `Prometheus` processes the sub PromQL. It will query the data source (HoraeDB) for data. 5. `HoraeDB` receives a request from `Prometheus`, and a sub-SQL with the same `TaskID` from `Query Frontend`. 6. `HoraeDB` processes and returns result to `Prometheus`. @@ -137,48 +137,48 @@ Query_Frontend -> Client : response PromQL request --> ```plaintext - ,.-^^-._ - ,-. |-.____.-| - `-' | | - /|\ | | - | ,--------------. ,----------. | | - / \ |Query_Frontend| |Prometheus| '-.____.-' - Client `------+-------' `----+-----' HoraeDB - | PromQL request | | | - | -----------------------> | | - | | | | - | | sub PromQL request with TaskID| | - | | ------------------------------> | - | | | | - | | sub SQL request with TaskID | - | | ---------------------------------------------------------------> - | | | | - | | | remote storage read with TaskID| - | | | -------------------------------> - | | | | - | | | |----. + ,.-^^-._ + ,-. |-.____.-| + `-' | | + /|\ | | + | ,--------------. ,----------. | | + / \ |Query_Frontend| |Prometheus| '-.____.-' + Client `------+-------' `----+-----' HoraeDB + | PromQL request | | | + | -----------------------> | | + | | | | + | | sub PromQL request with TaskID| | + | | ------------------------------> | + | | | | + | | sub SQL request with TaskID | + | | ---------------------------------------------------------------> + | | | | + | | | remote storage read with TaskID| + | | | -------------------------------> + | | | | + | | | |----. | | | | | pull data and compute - | | | |<---' - | | | | - | | | response remote read request | - | | | <------------------------------- - | | | | - | | |----. | - | | | | compute | - | | |<---' | - | | | | - | | response PromQL request | | - | | <------------------------------ | - | | | | - | response PromQL request| | | - | <----------------------- | | - Client ,------+-------. ,----+-----. HoraeDB - ,-. |Query_Frontend| |Prometheus| ,.-^^-._ - `-' `--------------' `----------' |-.____.-| - /|\ | | - | | | - / \ | | - '-.____.-' + | | | |<---' + | | | | + | | | response remote read request | + | | | <------------------------------- + | | | | + | | |----. | + | | | | compute | + | | |<---' | + | | | | + | | response PromQL request | | + | | <------------------------------ | + | | | | + | response PromQL request| | | + | <----------------------- | | + Client ,------+-------. ,----+-----. HoraeDB + ,-. |Query_Frontend| |Prometheus| ,.-^^-._ + `-' `--------------' `----------' |-.____.-| + /|\ | | + | | | + / \ | | + '-.____.-' ``` ### Separated HoraeDB cluster @@ -216,50 +216,50 @@ Query_Frontend -> Client : response PromQL request --> ```plaintext - ,.-^^-._ + ,.-^^-._ ,-. |-.____.-| `-' | | /|\ | | | ,--------------. ,----------. | | / \ |Query_Frontend| |Prometheus| '-.____.-' - Client `------+-------' `----+-----' HoraeDB - | PromQL request | | | - | -----------------------> | | - | | | | - | | sub PromQL request | | - | | ----------------------------> | - | | | | - | |----. | | - | | | store the sub SQL | | - | |<---' | | - | | | | - | | remote storage read | | - | | <---------------------------- | - | | | | - | | query sub SQL using HoraeDB Client | - | | ------------------------------------------------>| - | | | | - | | sub SQL query result | - | | <------------------------------------------------| - | | | | - | |----. | | - | | | transform data format | | - | |<---' | | - | | | | - | | response remote read request| | - | | ----------------------------> | - | | | | - | | |----. | - | | | | compute | - | | |<---' | - | | | | - | | response sub PromQL request | | - | | <---------------------------- | - | | | | - | response PromQL request| | | - | <----------------------- | | - Client ,------+-------. ,----+-----. HoraeDB - ,-. |Query_Frontend| |Prometheus| ,.-^^-._ + Client `------+-------' `----+-----' HoraeDB + | PromQL request | | | + | -----------------------> | | + | | | | + | | sub PromQL request | | + | | ----------------------------> | + | | | | + | |----. | | + | | | store the sub SQL | | + | |<---' | | + | | | | + | | remote storage read | | + | | <---------------------------- | + | | | | + | | query sub SQL using HoraeDB Client | + | | ------------------------------------------------>| + | | | | + | | sub SQL query result | + | | <------------------------------------------------| + | | | | + | |----. | | + | | | transform data format | | + | |<---' | | + | | | | + | | response remote read request| | + | | ----------------------------> | + | | | | + | | |----. | + | | | | compute | + | | |<---' | + | | | | + | | response sub PromQL request | | + | | <---------------------------- | + | | | | + | response PromQL request| | | + | <----------------------- | | + Client ,------+-------. ,----+-----. HoraeDB + ,-. |Query_Frontend| |Prometheus| ,.-^^-._ `-' `--------------' `----------' |-.____.-| /|\ | | | | | @@ -268,13 +268,13 @@ Query_Frontend -> Client : response PromQL request ``` ## Comparison -Both ways can achieve our initial requirements and are able to implement distributed execution in the future. +Both ways can achieve our initial requirements and are able to implement distributed execution in the future. - Embedded `HoraeDB` - Pros. - `HoraeDB` feeds data to `Prometheus` directly, reducing some computation and transmission. - Cons. - - Need to customize a `Prometheus` specific interface in `HoraeDB`. + - Need to customize a `Prometheus` specific interface in `HoraeDB`. - The deployment may requires all three components bound together for simplicity. - Separated `HoraeDB` cluster - Pros. @@ -282,7 +282,7 @@ Both ways can achieve our initial requirements and are able to implement distrib - The deployment only requires one `Query Frontend` along with `Prometheus` which is more lightweight and less invasive. - States of `HoraeDB` and `Query Frontend` are simple and clear. - Cons. - - One more data transforming and forwarding in `Query Frontend` (pass results from `HoraeDB` to `Prometheus`). + - One more data transforming and forwarding in `Query Frontend` (pass results from `HoraeDB` to `Prometheus`). # Drawbacks Detailed in the "Comparison" section above. diff --git a/horaemeta/CONTRIBUTING.md b/horaemeta/CONTRIBUTING.md index 97aa18b210..e361cf0b59 100644 --- a/horaemeta/CONTRIBUTING.md +++ b/horaemeta/CONTRIBUTING.md @@ -5,14 +5,14 @@ To make the process easier and more valuable for everyone involved we have a few ## Submitting Issues and Feature Requests -Before you file an [issue](https://github.com/apache/incubator-horaedb-meta/issues/new), please search existing issues in case the same or similar issues have already been filed. +Before you file an [issue](https://github.com/apache/horaedb/issues/new), please search existing issues in case the same or similar issues have already been filed. If you find an existing open ticket covering your issue then please avoid adding "👍" or "me too" comments; Github notifications can cause a lot of noise for the project maintainers who triage the back-log. However, if you have a new piece of information for an existing ticket and you think it may help the investigation or resolution, then please do add it as a comment! You can signal to the team that you're experiencing an existing issue with one of Github's emoji reactions (these are a good way to add "weight" to an issue from a prioritisation perspective). ### Submitting an Issue -The [New Issue]((https://github.com/apache/incubator-horaedb-meta/issues/new)) page has templates for both bug reports and feature requests. +The [New Issue]((https://github.com/apache/horaedb/issues/new)) page has templates for both bug reports and feature requests. Please fill one of them out! The issue templates provide details on what information we will find useful to help us fix an issue. In short though, the more information you can provide us about your environment and what behaviour you're seeing, the easier we can fix the issue. @@ -30,14 +30,14 @@ Please see the [Style Guide](docs/style_guide.md) for more details. To open a PR you will need to have a Github account. Fork the `horaemeta` repo and work on a branch on your fork. -When you have completed your changes, or you want some incremental feedback make a Pull Request to HoraeDB [here](https://github.com/apache/incubator-horaedb-meta/compare). +When you have completed your changes, or you want some incremental feedback make a Pull Request to HoraeDB [here](https://github.com/apache/horaedb/compare). If you want to discuss some work in progress then please prefix `[WIP]` to the PR title. For PRs that you consider ready for review, verify the following locally before you submit it: -* you have a coherent set of logical commits, with messages conforming to the [Conventional Commits](https://github.com/apache/incubator-horaedb-docs/blob/main/docs/src/en/dev/conventional_commit.md) specification; +* you have a coherent set of logical commits, with messages conforming to the [Conventional Commits](https://horaedb.apache.org/docs/dev/conventional_commit/) specification; * all the tests and/or benchmarks pass, including documentation tests; * the code is correctly formatted and all linter checks pass; and * you haven't left any "code cruft" (commented out code blocks etc). From 60b52177fe102ac0a0cbf4805348eb70add2f23c Mon Sep 17 00:00:00 2001 From: Jiacai Liu Date: Mon, 28 Oct 2024 17:38:28 +0800 Subject: [PATCH 2/4] feat: impl write procedure scaffold (#1580) ## Rationale Implement write procedure for TimeMergeStorage ## Detailed Changes - Add basic write implementation. ## Test Plan CI --- .github/workflows/metric-engine-ci.yml | 4 + horaedb/Cargo.lock | 390 ++++++++++++++++++------- horaedb/Cargo.toml | 6 +- horaedb/Makefile | 6 +- horaedb/metric_engine/Cargo.toml | 5 + horaedb/metric_engine/src/error.rs | 1 + horaedb/metric_engine/src/lib.rs | 2 +- horaedb/metric_engine/src/manifest.rs | 116 +++++++- horaedb/metric_engine/src/sst.rs | 76 ++++- horaedb/metric_engine/src/storage.rs | 118 ++++++-- horaedb/metric_engine/src/types.rs | 3 +- horaedb/pb_types/Cargo.toml | 37 +++ horaedb/pb_types/build.rs | 23 ++ horaedb/pb_types/protos/sst.proto | 50 ++++ horaedb/pb_types/src/lib.rs | 22 ++ horaedb/rust-toolchain.toml | 20 ++ 16 files changed, 749 insertions(+), 130 deletions(-) create mode 100644 horaedb/pb_types/Cargo.toml create mode 100644 horaedb/pb_types/build.rs create mode 100644 horaedb/pb_types/protos/sst.proto create mode 100644 horaedb/pb_types/src/lib.rs create mode 100644 horaedb/rust-toolchain.toml diff --git a/.github/workflows/metric-engine-ci.yml b/.github/workflows/metric-engine-ci.yml index db3bf5bddc..be80ebe8fb 100644 --- a/.github/workflows/metric-engine-ci.yml +++ b/.github/workflows/metric-engine-ci.yml @@ -53,6 +53,10 @@ jobs: - name: Release Disk Quota run: | sudo make ensure-disk-quota + - name: Setup Build Environment + run: | + sudo apt update + sudo apt install --yes protobuf-compiler - name: Install check binaries run: | rustup component add clippy diff --git a/horaedb/Cargo.lock b/horaedb/Cargo.lock index df1d9e0d23..632b74b743 100644 --- a/horaedb/Cargo.lock +++ b/horaedb/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -122,17 +122,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45aef0d9cf9a039bf6cd1acc451b137aca819977b0928dece52bd92811b640ba" dependencies = [ "arrow-arith 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-cast 53.1.0", "arrow-csv 53.0.0", - "arrow-data 53.0.0", - "arrow-ipc 53.0.0", + "arrow-data 53.1.0", + "arrow-ipc 53.1.0", "arrow-json 53.0.0", "arrow-ord 53.0.0", "arrow-row 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-schema 53.1.0", + "arrow-select 53.1.0", "arrow-string 53.0.0", ] @@ -157,10 +157,10 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03675e42d1560790f3524800e41403b40d0da1c793fe9528929fde06d8c7649a" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", "chrono", "half", "num", @@ -185,14 +185,14 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd2bf348cf9f02a5975c5962c7fa6dee107a2009a7b41ac5fb1a027e12dc033f" +checksum = "7f16835e8599dbbb1659fd869d865254c4cf32c6c2bb60b6942ac9fc36bfa5da" dependencies = [ "ahash", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-buffer 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", "chrono", "half", "hashbrown", @@ -212,9 +212,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3092e37715f168976012ce52273c3989b5793b0db5f06cbaa246be25e5f0924d" +checksum = "1a1f34f0faae77da6b142db61deba2cb6d60167592b178be317b341440acba80" dependencies = [ "bytes", "half", @@ -237,28 +237,28 @@ dependencies = [ "chrono", "comfy-table", "half", - "lexical-core", + "lexical-core 0.8.5", "num", "ryu", ] [[package]] name = "arrow-cast" -version = "53.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ce1018bb710d502f9db06af026ed3561552e493e989a79d0d0f5d9cf267a785" +checksum = "450e4abb5775bca0740bec0bcf1b1a5ae07eff43bd625661c4436d8e8e4540c4" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", + "arrow-select 53.1.0", "atoi", "base64", "chrono", "comfy-table", "half", - "lexical-core", + "lexical-core 1.0.2", "num", "ryu", ] @@ -278,7 +278,7 @@ dependencies = [ "csv", "csv-core", "lazy_static", - "lexical-core", + "lexical-core 0.8.5", "regex", ] @@ -288,16 +288,16 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd178575f45624d045e4ebee714e246a05d9652e41363ee3f57ec18cca97f740" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-cast 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", + "lexical-core 0.8.5", "regex", ] @@ -315,12 +315,12 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4ac0c4ee79150afe067dc4857154b3ee9c1cd52b5f40d59a77306d0ed18d65" +checksum = "2b1e618bbf714c7a9e8d97203c806734f012ff71ae3adc8ad1b075689f540634" dependencies = [ - "arrow-buffer 53.0.0", - "arrow-schema 53.0.0", + "arrow-buffer 53.1.0", + "arrow-schema 53.1.0", "half", "num", ] @@ -342,15 +342,15 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb307482348a1267f91b0912e962cd53440e5de0f7fb24c5f7b10da70b38c94a" +checksum = "f98e983549259a2b97049af7edfb8f28b8911682040e99a94e4ceb1196bd65c2" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-cast 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", "flatbuffers", ] @@ -368,7 +368,7 @@ dependencies = [ "chrono", "half", "indexmap", - "lexical-core", + "lexical-core 0.8.5", "num", "serde", "serde_json", @@ -380,15 +380,15 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d24805ba326758effdd6f2cbdd482fcfab749544f21b134701add25b33f474e6" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-cast 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", "chrono", "half", "indexmap", - "lexical-core", + "lexical-core 0.8.5", "num", "serde", "serde_json", @@ -415,11 +415,11 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "644046c479d80ae8ed02a7f1e1399072ea344ca6a7b0e293ab2d5d9ed924aa3b" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", + "arrow-select 53.1.0", "half", "num", ] @@ -445,10 +445,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a29791f8eb13b340ce35525b723f5f0df17ecb955599e11f65c2a94ab34e2efb" dependencies = [ "ahash", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", "half", ] @@ -460,9 +460,9 @@ checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" [[package]] name = "arrow-schema" -version = "53.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85320a3a2facf2b2822b57aa9d6d9d55edb8aee0b6b5d3b8df158e503d10858" +checksum = "fbf0388a18fd7f7f3fe3de01852d30f54ed5182f9004db700fbe3ba843ed2794" [[package]] name = "arrow-select" @@ -480,15 +480,15 @@ dependencies = [ [[package]] name = "arrow-select" -version = "53.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cc7e6b582e23855fd1625ce46e51647aa440c20ea2e71b1d748e0839dd73cba" +checksum = "b83e5723d307a38bf00ecd2972cd078d1339c7fd3eb044f609958a9a24463f3a" dependencies = [ "ahash", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", "num", ] @@ -515,11 +515,11 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0775b6567c66e56ded19b87a954b6b1beffbdd784ef95a3a2b03f59570c1d230" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-data 53.1.0", + "arrow-schema 53.1.0", + "arrow-select 53.1.0", "memchr", "num", "regex", @@ -552,7 +552,7 @@ checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -905,7 +905,7 @@ dependencies = [ "num_cpus", "object_store 0.10.2", "parking_lot", - "parquet", + "parquet 52.2.0", "paste", "pin-project-lite", "rand", @@ -951,7 +951,7 @@ dependencies = [ "libc", "num_cpus", "object_store 0.10.2", - "parquet", + "parquet 52.2.0", "sqlparser", ] @@ -1334,7 +1334,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -1583,11 +1583,24 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "lexical-parse-float 0.8.5", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "lexical-write-float 0.8.5", + "lexical-write-integer 0.8.5", +] + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float 1.0.2", + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", + "lexical-write-float 1.0.2", + "lexical-write-integer 1.0.2", ] [[package]] @@ -1596,8 +1609,19 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "lexical-parse-integer", - "lexical-util", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", "static_assertions", ] @@ -1607,7 +1631,17 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -1620,14 +1654,34 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + [[package]] name = "lexical-write-float" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "lexical-util", - "lexical-write-integer", + "lexical-util 0.8.5", + "lexical-write-integer 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util 1.0.3", + "lexical-write-integer 1.0.2", "static_assertions", ] @@ -1637,7 +1691,17 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -1695,6 +1759,10 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "macros" +version = "2.1.0" + [[package]] name = "md-5" version = "0.10.6" @@ -1718,11 +1786,16 @@ dependencies = [ "anyhow", "arrow 53.0.0", "async-trait", + "bytes", "datafusion", "futures", "itertools 0.3.25", "lazy_static", + "macros", "object_store 0.11.0", + "parquet 53.1.0", + "pb_types", + "prost", "thiserror", "tokio", ] @@ -1748,6 +1821,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "multimap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1973,6 +2052,42 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "parquet" +version = "53.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310c46a70a3ba90d98fec39fa2da6d9d731e544191da6fb56c9d199484d0dd3e" +dependencies = [ + "ahash", + "arrow-array 53.1.0", + "arrow-buffer 53.1.0", + "arrow-cast 53.1.0", + "arrow-data 53.1.0", + "arrow-ipc 53.1.0", + "arrow-schema 53.1.0", + "arrow-select 53.1.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown", + "lz4_flex", + "num", + "num-bigint", + "object_store 0.11.0", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", + "zstd-sys", +] + [[package]] name = "parse-zoneinfo" version = "0.3.1" @@ -1988,6 +2103,14 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pb_types" +version = "2.0.0" +dependencies = [ + "prost", + "prost-build", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2069,6 +2192,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "910d41a655dac3b764f1ade94821093d3610248694320cd072303a8eedcf221d" +dependencies = [ + "proc-macro2", + "syn 2.0.82", +] + [[package]] name = "proc-macro2" version = "1.0.86" @@ -2078,6 +2211,59 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.13.0", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn 2.0.82", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + [[package]] name = "quote" version = "1.0.37" @@ -2239,7 +2425,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -2350,7 +2536,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -2387,7 +2573,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -2415,7 +2601,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -2437,9 +2623,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.77" +version = "2.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" dependencies = [ "proc-macro2", "quote", @@ -2476,7 +2662,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -2550,7 +2736,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -2585,7 +2771,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] @@ -2742,7 +2928,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", "wasm-bindgen-shared", ] @@ -2764,7 +2950,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2934,7 +3120,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn 2.0.82", ] [[package]] diff --git a/horaedb/Cargo.toml b/horaedb/Cargo.toml index b304860201..ee231cbea3 100644 --- a/horaedb/Cargo.toml +++ b/horaedb/Cargo.toml @@ -23,15 +23,19 @@ license = "Apache-2.0" [workspace] resolver = "2" -members = ["metric_engine", "server"] +members = ["metric_engine", "pb_types", "server"] [workspace.dependencies] anyhow = { version = "1.0" } metric_engine = { path = "metric_engine" } thiserror = "1" +bytes = "1" datafusion = "41" parquet = { version = "53" } object_store = { version = "0.11" } +macros = { path = "../src/components/macros" } +pb_types = { path = "pb_types" } +prost = { version = "0.13" } arrow = { version = "53", features = ["prettyprint"] } tokio = { version = "1", features = ["full"] } async-trait = "0.1" diff --git a/horaedb/Makefile b/horaedb/Makefile index 908a8cb872..72736b0af4 100644 --- a/horaedb/Makefile +++ b/horaedb/Makefile @@ -19,10 +19,14 @@ SHELL = /bin/bash clippy: cargo clippy --all-targets --all-features -- -D warnings \ - -A dead_code -A unused_variables # Remove these once we have a clean build + -A dead_code -A unused_variables -A clippy::unreachable # Remove these once we have a clean build sort: cargo sort --workspace --check fmt: cargo fmt -- --check + +fix: + cargo fmt + cargo sort --workspace diff --git a/horaedb/metric_engine/Cargo.toml b/horaedb/metric_engine/Cargo.toml index a29abb8c6d..d2ea85c8c9 100644 --- a/horaedb/metric_engine/Cargo.toml +++ b/horaedb/metric_engine/Cargo.toml @@ -34,10 +34,15 @@ workspace = true anyhow = { workspace = true } arrow = { workspace = true } async-trait = { workspace = true } +bytes = { workspace = true } datafusion = { workspace = true } futures = { workspace = true } itertools = { workspace = true } lazy_static = { workspace = true } +macros = { workspace = true } object_store = { workspace = true } +parquet = { workspace = true, features = ["object_store"] } +pb_types = { workspace = true } +prost = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } diff --git a/horaedb/metric_engine/src/error.rs b/horaedb/metric_engine/src/error.rs index 08e720f499..35b8e9aef5 100644 --- a/horaedb/metric_engine/src/error.rs +++ b/horaedb/metric_engine/src/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +pub use anyhow::Error as AnyhowError; use thiserror::Error; #[derive(Error, Debug)] diff --git a/horaedb/metric_engine/src/lib.rs b/horaedb/metric_engine/src/lib.rs index 1a0149dba2..8a05223ca3 100644 --- a/horaedb/metric_engine/src/lib.rs +++ b/horaedb/metric_engine/src/lib.rs @@ -23,4 +23,4 @@ mod sst; pub mod storage; pub mod types; -pub use error::{Error, Result}; +pub use error::{AnyhowError, Error, Result}; diff --git a/horaedb/metric_engine/src/manifest.rs b/horaedb/metric_engine/src/manifest.rs index 4839126765..aceac3daba 100644 --- a/horaedb/metric_engine/src/manifest.rs +++ b/horaedb/metric_engine/src/manifest.rs @@ -15,11 +15,119 @@ // specific language governing permissions and limitations // under the License. -pub struct Manifest {} +use anyhow::Context; +use bytes::Bytes; +use object_store::{path::Path, PutPayload}; +use prost::Message; +use tokio::sync::RwLock; + +use crate::{ + sst::{FileId, FileMeta, SstFile}, + types::ObjectStoreRef, + AnyhowError, Error, Result, +}; + +pub const PREFIX_PATH: &str = "manifest"; +pub const SNAPSHOT_FILENAME: &str = "snapshot"; + +pub struct Manifest { + path: String, + snapshot_path: Path, + store: ObjectStoreRef, + + payload: RwLock, +} + +pub struct Payload { + files: Vec, +} + +impl TryFrom for Payload { + type Error = Error; + + fn try_from(value: pb_types::Manifest) -> Result { + let files = value + .files + .into_iter() + .map(SstFile::try_from) + .collect::>>()?; + + Ok(Self { files }) + } +} impl Manifest { - pub fn new(id: u64) -> Self { - // Recover the manifest using the id from storage. - Self {} + pub async fn try_new(path: String, store: ObjectStoreRef) -> Result { + let snapshot_path = Path::from(format!("{path}/{SNAPSHOT_FILENAME}")); + let payload = match store.get(&snapshot_path).await { + Ok(v) => { + let bytes = v + .bytes() + .await + .context("failed to read manifest snapshot")?; + let pb_payload = pb_types::Manifest::decode(bytes) + .context("failed to decode manifest snapshot")?; + Payload::try_from(pb_payload)? + } + Err(err) => { + if err.to_string().contains("not found") { + Payload { files: vec![] } + } else { + let context = format!("Failed to get manifest snapshot, path:{snapshot_path}"); + return Err(AnyhowError::new(err).context(context).into()); + } + } + }; + + Ok(Self { + path, + snapshot_path, + store, + payload: RwLock::new(payload), + }) + } + + // TODO: Now this functions is poorly implemented, we concat new_sst to + // snapshot, and upload it back in a whole. + // In more efficient way, we can create a new diff file, and do compaction in + // background to merge them to `snapshot`. + pub async fn add_file(&self, id: FileId, meta: FileMeta) -> Result<()> { + let mut payload = self.payload.write().await; + let mut tmp_ssts = payload.files.clone(); + let new_sst = SstFile { id, meta }; + tmp_ssts.push(new_sst.clone()); + let pb_manifest = pb_types::Manifest { + files: tmp_ssts + .into_iter() + .map(|f| pb_types::SstFile { + id: f.id, + meta: Some(pb_types::SstMeta { + max_sequence: f.meta.max_sequence, + num_rows: f.meta.num_rows, + time_range: Some(pb_types::TimeRange { + start: f.meta.time_range.start, + end: f.meta.time_range.end, + }), + }), + }) + .collect::>(), + }; + + let mut buf = Vec::with_capacity(pb_manifest.encoded_len()); + pb_manifest + .encode(&mut buf) + .context("failed to encode manifest")?; + let put_payload = PutPayload::from_bytes(Bytes::from(buf)); + + // 1. Persist the snapshot + self.store + .put(&self.snapshot_path, put_payload) + .await + .context("Failed to update manifest")?; + + // 2. Update cached payload + payload.files.push(new_sst); + + Ok(()) } } diff --git a/horaedb/metric_engine/src/sst.rs b/horaedb/metric_engine/src/sst.rs index 37cc8f11d8..5eb96867ad 100644 --- a/horaedb/metric_engine/src/sst.rs +++ b/horaedb/metric_engine/src/sst.rs @@ -15,6 +15,78 @@ // specific language governing permissions and limitations // under the License. -pub struct SSTable { - pub id: u64, +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + LazyLock, + }, + time::SystemTime, +}; + +use macros::ensure; + +use crate::{types::TimeRange, Error}; + +pub const PREFIX_PATH: &str = "data"; + +pub type FileId = u64; + +#[derive(Clone, Debug)] +pub struct SstFile { + pub id: FileId, + pub meta: FileMeta, +} + +impl TryFrom for SstFile { + type Error = Error; + + fn try_from(value: pb_types::SstFile) -> Result { + ensure!(value.meta.is_some(), "file meta is missing"); + let meta = value.meta.unwrap(); + let meta = meta.try_into()?; + + Ok(Self { id: value.id, meta }) + } +} + +#[derive(Clone, Debug)] +pub struct FileMeta { + pub max_sequence: u64, + pub num_rows: u32, + pub time_range: TimeRange, +} + +impl TryFrom for FileMeta { + type Error = Error; + + fn try_from(value: pb_types::SstMeta) -> Result { + ensure!(value.time_range.is_some(), "time range is missing"); + let time_range = value.time_range.unwrap(); + + Ok(Self { + max_sequence: value.max_sequence, + num_rows: value.num_rows, + time_range: TimeRange { + start: time_range.start, + end: time_range.end, + }, + }) + } +} + +// Used for sst file id allocation. +// This number mustn't go backwards on restarts, otherwise file id +// collisions are possible. So don't change time on the server +// between server restarts. +static NEXT_ID: LazyLock = LazyLock::new(|| { + AtomicU64::new( + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_nanos() as u64, + ) +}); + +pub fn allocate_id() -> u64 { + NEXT_ID.fetch_add(1, Ordering::SeqCst) } diff --git a/horaedb/metric_engine/src/storage.rs b/horaedb/metric_engine/src/storage.rs index 1cae2bb9c0..4c5b2667e6 100644 --- a/horaedb/metric_engine/src/storage.rs +++ b/horaedb/metric_engine/src/storage.rs @@ -15,19 +15,30 @@ // specific language governing permissions and limitations // under the License. -use arrow::{array::RecordBatch, datatypes::Schema}; +use anyhow::Context; +use arrow::{ + array::{Int64Array, RecordBatch}, + datatypes::SchemaRef, +}; use async_trait::async_trait; use datafusion::logical_expr::Expr; +use macros::ensure; +use object_store::path::Path; +use parquet::{ + arrow::{async_writer::ParquetObjectWriter, AsyncArrowWriter}, + file::properties::WriterProperties, +}; use crate::{ manifest::Manifest, - sst::SSTable, - types::{ObjectStoreRef, SendableRecordBatchStream, TimeRange}, + sst::{allocate_id, FileId, FileMeta}, + types::{ObjectStoreRef, SendableRecordBatchStream, TimeRange, Timestamp}, Result, }; pub struct WriteRequest { batch: RecordBatch, + props: Option, } pub struct ScanRequest { @@ -42,7 +53,7 @@ pub struct CompactRequest {} /// Time-aware merge storage interface. #[async_trait] pub trait TimeMergeStorage { - fn schema(&self) -> Result<&Schema>; + fn schema(&self) -> &SchemaRef; async fn write(&self, req: WriteRequest) -> Result<()>; @@ -53,35 +64,106 @@ pub trait TimeMergeStorage { async fn compact(&self, req: CompactRequest) -> Result<()>; } -/// TMStorage implementation using cloud object storage. +/// `TimeMergeStorage` implementation using cloud object storage. pub struct CloudObjectStorage { - name: String, - id: u64, + path: String, store: ObjectStoreRef, - sstables: Vec, + arrow_schema: SchemaRef, + timestamp_index: usize, manifest: Manifest, } +/// It will organize the data in the following way: +/// ```plaintext +/// {root_path}/manifest/snapshot +/// {root_path}/manifest/timestamp1 +/// {root_path}/manifest/timestamp2 +/// {root_path}/manifest/... +/// {root_path}/data/timestamp_a.sst +/// {root_path}/data/timestamp_b.sst +/// {root_path}/data/... +/// ``` impl CloudObjectStorage { - pub fn new(name: String, id: u64, store: ObjectStoreRef) -> Self { - Self { - name, - id, + pub async fn try_new( + root_path: String, + store: ObjectStoreRef, + arrow_schema: SchemaRef, + timestamp_index: usize, + ) -> Result { + let manifest_prefix = crate::manifest::PREFIX_PATH; + let manifest = + Manifest::try_new(format!("{root_path}/{manifest_prefix}"), store.clone()).await?; + Ok(Self { + path: root_path, + timestamp_index, store, - sstables: Vec::new(), - manifest: Manifest::new(id), - } + arrow_schema, + manifest, + }) + } + + fn build_file_path(&self, id: FileId) -> String { + let root = &self.path; + let prefix = crate::sst::PREFIX_PATH; + format!("{root}/{prefix}/{id}") + } + + async fn write_batch(&self, req: WriteRequest) -> Result { + let file_id = allocate_id(); + let file_path = self.build_file_path(file_id); + let object_store_writer = + ParquetObjectWriter::new(self.store.clone(), Path::from(file_path)); + let mut writer = + AsyncArrowWriter::try_new(object_store_writer, self.schema().clone(), req.props) + .context("create arrow writer")?; + + // TODO: sort record batch according to primary key columns. + writer + .write(&req.batch) + .await + .context("write arrow batch")?; + writer.close().await.context("close arrow writer")?; + + Ok(file_id) } } #[async_trait] impl TimeMergeStorage for CloudObjectStorage { - fn schema(&self) -> Result<&Schema> { - todo!() + fn schema(&self) -> &SchemaRef { + &self.arrow_schema } async fn write(&self, req: WriteRequest) -> Result<()> { - todo!() + ensure!(req.batch.schema_ref().eq(self.schema()), "schema not match"); + + let num_rows = req.batch.num_rows(); + let time_column = req + .batch + .column(self.timestamp_index) + .as_any() + .downcast_ref::() + .context("timestamp column should be int64")?; + + let mut start = Timestamp::MAX; + let mut end = Timestamp::MIN; + for v in time_column.values() { + start = start.min(*v); + end = end.max(*v); + } + let time_range = TimeRange { + start, + end: end + 1, + }; + let file_id = self.write_batch(req).await?; + let file_meta = FileMeta { + max_sequence: file_id, // Since file_id in increasing order, we can use it as sequence. + num_rows: num_rows as u32, + time_range, + }; + self.manifest.add_file(file_id, file_meta).await?; + + Ok(()) } async fn scan(&self, req: ScanRequest) -> Result { diff --git a/horaedb/metric_engine/src/types.rs b/horaedb/metric_engine/src/types.rs index 08d42fcdcb..96a4b74ad4 100644 --- a/horaedb/metric_engine/src/types.rs +++ b/horaedb/metric_engine/src/types.rs @@ -23,7 +23,8 @@ use object_store::ObjectStore; use crate::error::Result; -pub type TimeRange = Range; +pub type Timestamp = i64; +pub type TimeRange = Range; pub type ObjectStoreRef = Arc; diff --git a/horaedb/pb_types/Cargo.toml b/horaedb/pb_types/Cargo.toml new file mode 100644 index 0000000000..e6929fa018 --- /dev/null +++ b/horaedb/pb_types/Cargo.toml @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "pb_types" + +[package.license] +workspace = true + +[package.version] +workspace = true + +[package.authors] +workspace = true + +[package.edition] +workspace = true + +[dependencies] +prost = { workspace = true } + +[build-dependencies] +prost-build = { version = "0.13" } diff --git a/horaedb/pb_types/build.rs b/horaedb/pb_types/build.rs new file mode 100644 index 0000000000..7eb68464b0 --- /dev/null +++ b/horaedb/pb_types/build.rs @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::Result; + +fn main() -> Result<()> { + prost_build::compile_protos(&["protos/sst.proto"], &["protos/"])?; + Ok(()) +} diff --git a/horaedb/pb_types/protos/sst.proto b/horaedb/pb_types/protos/sst.proto new file mode 100644 index 0000000000..ce3db30169 --- /dev/null +++ b/horaedb/pb_types/protos/sst.proto @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +syntax = "proto3"; + +package pb_types.sst; + +// Time range of [start, end) +message TimeRange { + // inclusive + int64 start = 1; + // exclusive + int64 end = 2; +} + +message SstMeta { + uint64 max_sequence = 1; + uint32 num_rows = 2; + TimeRange time_range = 3; +} + +message SstFile { + uint64 id = 1; + SstMeta meta = 2; +} + +message Manifest { + repeated SstFile files = 1; +} + +message MetaUpdate { + repeated SstFile to_adds = 1; + repeated uint64 to_removes = 2; +} diff --git a/horaedb/pb_types/src/lib.rs b/horaedb/pb_types/src/lib.rs new file mode 100644 index 0000000000..bfa215b02c --- /dev/null +++ b/horaedb/pb_types/src/lib.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod pb_types { + include!(concat!(env!("OUT_DIR"), "/pb_types.sst.rs")); +} + +pub use pb_types::*; diff --git a/horaedb/rust-toolchain.toml b/horaedb/rust-toolchain.toml new file mode 100644 index 0000000000..4c621ca810 --- /dev/null +++ b/horaedb/rust-toolchain.toml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[toolchain] +channel = "nightly-2024-10-15" +components = [ "rustfmt", "clippy" ] From e47d9ae7cbf240d50d009317562bc4247c92b8c4 Mon Sep 17 00:00:00 2001 From: Leslie Su <3530611790@qq.com> Date: Wed, 30 Oct 2024 08:48:52 +0800 Subject: [PATCH 3/4] feat: support horaedb submit compaction task to remote (#1563) ## Rationale The subtask to support compaction offloading. See #1545 ## Detailed Changes **Compaction node support remote compaction service** - Define `CompactionServiceImpl` to support compaction rpc service. - Introduce `NodeType` to distinguish compaction node and horaedb node. Enable the deployment of compaction node. - Impl `compaction_client` for horaedb node to access remote compaction node. **Horaedb node support compaction offload** - Introduce `compaction_mode` in analytic engine's `Config` to determine whether exec compaction offload or not. - Define `CompactionNodePicker` trait, supporting get remote compaction node info. - Impl `RemoteCompactionRunner`, supporting pick remote node and pass compaction task to the node. - Add docs (e.g. `example-cluster-n.toml`) to explain how to deploy a cluster supporting compaction offload. ## Test Plan --------- Co-authored-by: kamille --- Cargo.lock | 9 +- Cargo.toml | 2 +- src/analytic_engine/Cargo.toml | 6 + src/analytic_engine/src/compaction/mod.rs | 91 ++++++- .../src/compaction/runner/local_runner.rs | 1 + .../src/compaction/runner/mod.rs | 244 +++++++++++++++++- .../src/compaction/runner/node_picker.rs | 88 +++++++ .../src/compaction/runner/remote_client.rs | 148 +++++++++++ .../src/compaction/runner/remote_runner.rs | 116 +++++++++ src/analytic_engine/src/instance/engine.rs | 7 + .../src/instance/flush_compaction.rs | 20 ++ src/analytic_engine/src/instance/open.rs | 56 +++- src/analytic_engine/src/lib.rs | 21 +- src/analytic_engine/src/setup.rs | 6 + src/analytic_engine/src/sst/factory.rs | 65 ++++- src/analytic_engine/src/sst/file.rs | 98 ++++++- src/analytic_engine/src/sst/writer.rs | 95 ++++++- src/analytic_engine/src/table_options.rs | 54 ++++ src/analytic_engine/src/tests/util.rs | 1 + src/benchmarks/src/util.rs | 1 + src/cluster/src/cluster_impl.rs | 8 +- src/cluster/src/config.rs | 3 + src/cluster/src/lib.rs | 6 +- src/common_types/src/cluster.rs | 26 ++ src/common_types/src/lib.rs | 1 + src/horaedb/Cargo.toml | 53 ++-- src/horaedb/src/config.rs | 4 +- src/horaedb/src/setup.rs | 16 +- src/meta_client/src/lib.rs | 14 +- src/meta_client/src/meta_impl.rs | 14 +- src/meta_client/src/types.rs | 9 + src/router/src/cluster_based.rs | 6 +- .../src/grpc/compaction_service/error.rs | 96 +++++++ src/server/src/grpc/compaction_service/mod.rs | 113 ++++++++ src/server/src/grpc/mod.rs | 65 ++++- src/server/src/server.rs | 9 + src/table_engine/src/predicate.rs | 6 +- src/table_engine/src/table.rs | 6 + 38 files changed, 1505 insertions(+), 79 deletions(-) create mode 100644 src/analytic_engine/src/compaction/runner/node_picker.rs create mode 100644 src/analytic_engine/src/compaction/runner/remote_client.rs create mode 100644 src/analytic_engine/src/compaction/runner/remote_runner.rs create mode 100644 src/common_types/src/cluster.rs create mode 100644 src/server/src/grpc/compaction_service/error.rs create mode 100644 src/server/src/grpc/compaction_service/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 33ed54bcd5..1830a3fb05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,7 @@ dependencies = [ "atomic_enum", "base64 0.13.1", "bytes_ext", + "cluster", "codec", "common_types", "datafusion", @@ -107,6 +108,7 @@ dependencies = [ "lru 0.7.8", "macros", "message_queue", + "meta_client", "metric_ext", "object_store 2.1.0", "parquet", @@ -116,10 +118,12 @@ dependencies = [ "prost 0.11.8", "rand 0.8.5", "remote_engine_client", + "reqwest 0.12.4", "router", "runtime", "sampling_cache", "serde", + "serde_json", "size_ext", "skiplist", "smallvec", @@ -131,7 +135,9 @@ dependencies = [ "thiserror", "time_ext", "tokio", + "tonic 0.8.3", "trace_metric", + "url", "wal", "xorfilter-rs", ] @@ -3150,6 +3156,7 @@ dependencies = [ "catalog_impls", "clap", "cluster", + "common_types", "datafusion", "df_operator", "etcd-client", @@ -3223,7 +3230,7 @@ dependencies = [ [[package]] name = "horaedbproto" version = "2.0.0" -source = "git+https://github.com/apache/incubator-horaedb-proto.git?rev=a5874d9fedee32ab1292252c4eb6defc4f6e245a#a5874d9fedee32ab1292252c4eb6defc4f6e245a" +source = "git+https://github.com/apache/incubator-horaedb-proto.git?rev=fac8564e6e3d50e51daa2af6eb905e747f3191b0#fac8564e6e3d50e51daa2af6eb905e747f3191b0" dependencies = [ "prost 0.11.8", "protoc-bin-vendored", diff --git a/Cargo.toml b/Cargo.toml index d2d73fd0ab..b6ca6273dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,7 +103,7 @@ thiserror = "1" bytes_ext = { path = "src/components/bytes_ext" } catalog = { path = "src/catalog" } catalog_impls = { path = "src/catalog_impls" } -horaedbproto = { git = "https://github.com/apache/incubator-horaedb-proto.git", rev = "a5874d9fedee32ab1292252c4eb6defc4f6e245a" } +horaedbproto = { git = "https://github.com/apache/incubator-horaedb-proto.git", rev = "fac8564e6e3d50e51daa2af6eb905e747f3191b0" } codec = { path = "src/components/codec" } chrono = "0.4" clap = { version = "4.5.1", features = ["derive"] } diff --git a/src/analytic_engine/Cargo.toml b/src/analytic_engine/Cargo.toml index 09ff47af21..d6c642eb75 100644 --- a/src/analytic_engine/Cargo.toml +++ b/src/analytic_engine/Cargo.toml @@ -49,6 +49,7 @@ async-trait = { workspace = true } atomic_enum = { workspace = true } base64 = { workspace = true } bytes_ext = { workspace = true } +cluster = { workspace = true } codec = { workspace = true } common_types = { workspace = true } datafusion = { workspace = true } @@ -66,6 +67,7 @@ logger = { workspace = true } lru = { workspace = true } macros = { workspace = true } message_queue = { workspace = true } +meta_client = { workspace = true } metric_ext = { workspace = true } object_store = { workspace = true } parquet = { workspace = true } @@ -73,10 +75,12 @@ parquet_ext = { workspace = true } prometheus = { workspace = true } prost = { workspace = true } remote_engine_client = { workspace = true } +reqwest = { workspace = true } router = { workspace = true } runtime = { workspace = true } sampling_cache = { workspace = true } serde = { workspace = true } +serde_json = { workspace = true } size_ext = { workspace = true } skiplist = { path = "../components/skiplist" } smallvec = { workspace = true } @@ -87,7 +91,9 @@ tempfile = { workspace = true, optional = true } thiserror = { workspace = true } time_ext = { workspace = true } tokio = { workspace = true } +tonic = { workspace = true } trace_metric = { workspace = true } +url = "2.2" wal = { workspace = true } xorfilter-rs = { workspace = true } diff --git a/src/analytic_engine/src/compaction/mod.rs b/src/analytic_engine/src/compaction/mod.rs index 34048d6b35..8f63c93ece 100644 --- a/src/analytic_engine/src/compaction/mod.rs +++ b/src/analytic_engine/src/compaction/mod.rs @@ -20,15 +20,17 @@ use std::{collections::HashMap, fmt, str::FromStr, sync::Arc}; use common_types::COMPACTION_STRATEGY; +use generic_error::{BoxError, GenericError}; +use macros::define_result; use serde::{Deserialize, Serialize}; use size_ext::ReadableSize; -use snafu::{ensure, Backtrace, GenerateBacktrace, ResultExt, Snafu}; +use snafu::{ensure, Backtrace, GenerateBacktrace, OptionExt, ResultExt, Snafu}; use time_ext::TimeUnit; use tokio::sync::oneshot; use crate::{ compaction::picker::{CommonCompactionPicker, CompactionPickerRef}, - sst::file::{FileHandle, Level}, + sst::file::{FileHandle, FileMeta, FilePurgeQueue, Level}, table::data::TableDataRef, }; @@ -72,8 +74,22 @@ pub enum Error { }, #[snafu(display("Invalid compaction option value, err: {}", error))] InvalidOption { error: String, backtrace: Backtrace }, + + #[snafu(display("Empty file meta.\nBacktrace:\n{}", backtrace))] + EmptyFileMeta { backtrace: Backtrace }, + + #[snafu(display("Failed to convert file meta, err:{}", source))] + ConvertFileMeta { source: GenericError }, + + #[snafu(display("Empty purge queue.\nBacktrace:\n{}", backtrace))] + EmptyPurgeQueue { backtrace: Backtrace }, + + #[snafu(display("Failed to convert level, err:{}", source))] + ConvertLevel { source: GenericError }, } +define_result!(Error); + #[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Serialize)] pub enum CompactionStrategy { #[default] @@ -145,7 +161,7 @@ impl CompactionStrategy { pub(crate) fn parse_from( value: &str, options: &HashMap, - ) -> Result { + ) -> Result { match value.trim().to_lowercase().as_str() { DEFAULT_STRATEGY => Ok(CompactionStrategy::Default), STC_STRATEGY => Ok(CompactionStrategy::SizeTiered( @@ -182,7 +198,7 @@ impl CompactionStrategy { } impl SizeTieredCompactionOptions { - pub(crate) fn validate(&self) -> Result<(), Error> { + pub(crate) fn validate(&self) -> Result<()> { ensure!( self.bucket_high > self.bucket_low, InvalidOption { @@ -215,7 +231,7 @@ impl SizeTieredCompactionOptions { pub(crate) fn parse_from( options: &HashMap, - ) -> Result { + ) -> Result { let mut opts = SizeTieredCompactionOptions::default(); if let Some(v) = options.get(BUCKET_LOW_KEY) { opts.bucket_low = v.parse().context(ParseFloat { @@ -278,7 +294,7 @@ impl TimeWindowCompactionOptions { ); } - pub(crate) fn validate(&self) -> Result<(), Error> { + pub(crate) fn validate(&self) -> Result<()> { if !Self::valid_timestamp_unit(self.timestamp_resolution) { return InvalidOption { error: format!( @@ -294,7 +310,7 @@ impl TimeWindowCompactionOptions { pub(crate) fn parse_from( options: &HashMap, - ) -> Result { + ) -> Result { let mut opts = TimeWindowCompactionOptions { size_tiered: SizeTieredCompactionOptions::parse_from(options)?, ..Default::default() @@ -326,6 +342,67 @@ pub struct CompactionInputFiles { pub output_level: Level, } +impl TryFrom for CompactionInputFiles { + type Error = Error; + + fn try_from(value: horaedbproto::compaction_service::CompactionInputFiles) -> Result { + let level: Level = value.level.try_into().box_err().context(ConvertLevel)?; + let output_level: Level = value + .output_level + .try_into() + .box_err() + .context(ConvertLevel)?; + + let mut files: Vec = Vec::with_capacity(value.files.len()); + for file in value.files { + let meta: FileMeta = file + .meta + .context(EmptyFileMeta)? + .try_into() + .box_err() + .context(ConvertFileMeta)?; + + let purge_queue: FilePurgeQueue = file.purge_queue.context(EmptyPurgeQueue)?.into(); + + files.push({ + let handle = FileHandle::new(meta, purge_queue); + handle.set_being_compacted(file.being_compacted); + handle + }); + } + + Ok(CompactionInputFiles { + level, + files, + output_level, + }) + } +} + +impl From for horaedbproto::compaction_service::CompactionInputFiles { + fn from(value: CompactionInputFiles) -> Self { + let mut files = Vec::with_capacity(value.files.len()); + for file in value.files { + let handle = horaedbproto::compaction_service::FileHandle { + meta: Some(file.meta().into()), + purge_queue: Some(horaedbproto::compaction_service::FilePurgeQueue { + space_id: file.space_id(), + table_id: file.table_id().into(), + }), + being_compacted: file.being_compacted(), + metrics: Some(horaedbproto::compaction_service::SstMetrics {}), + }; + files.push(handle); + } + + Self { + level: value.level.as_u32(), + files, + output_level: value.output_level.as_u32(), + } + } +} + #[derive(Debug, Default, Clone)] pub struct ExpiredFiles { /// Level of the expired files. diff --git a/src/analytic_engine/src/compaction/runner/local_runner.rs b/src/analytic_engine/src/compaction/runner/local_runner.rs index fc34b2bfa6..e379d78544 100644 --- a/src/analytic_engine/src/compaction/runner/local_runner.rs +++ b/src/analytic_engine/src/compaction/runner/local_runner.rs @@ -45,6 +45,7 @@ use crate::{ const MAX_RECORD_BATCHES_IN_FLIGHT_WHEN_COMPACTION_READ: usize = 64; /// Executor carrying for actual compaction work +#[derive(Clone)] pub struct LocalCompactionRunner { runtime: Arc, scan_options: ScanOptions, diff --git a/src/analytic_engine/src/compaction/runner/mod.rs b/src/analytic_engine/src/compaction/runner/mod.rs index 12f333eac3..c8e34484cc 100644 --- a/src/analytic_engine/src/compaction/runner/mod.rs +++ b/src/analytic_engine/src/compaction/runner/mod.rs @@ -16,17 +16,23 @@ // under the License. pub mod local_runner; +pub mod node_picker; +mod remote_client; +pub mod remote_runner; use std::sync::Arc; use async_trait::async_trait; use common_types::{request_id::RequestId, schema::Schema, SequenceNumber}; +use generic_error::{BoxError, GenericError}; +use macros::define_result; use object_store::Path; +use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; use table_engine::table::TableId; use crate::{ compaction::CompactionInputFiles, - instance::flush_compaction::Result, + instance::flush_compaction, row_iter::IterOptions, space::SpaceId, sst::{ @@ -39,12 +45,87 @@ use crate::{ /// Compaction runner #[async_trait] pub trait CompactionRunner: Send + Sync + 'static { - async fn run(&self, task: CompactionRunnerTask) -> Result; + async fn run( + &self, + task: CompactionRunnerTask, + ) -> flush_compaction::Result; } pub type CompactionRunnerPtr = Box; pub type CompactionRunnerRef = Arc; +#[derive(Debug, Snafu)] +#[snafu(visibility = "pub")] +pub enum Error { + #[snafu(display("Empty table schema.\nBacktrace:\n{}", backtrace))] + EmptyTableSchema { backtrace: Backtrace }, + + #[snafu(display("Empty input context.\nBacktrace:\n{}", backtrace))] + EmptyInputContext { backtrace: Backtrace }, + + #[snafu(display("Empty ouput context.\nBacktrace:\n{}", backtrace))] + EmptyOuputContext { backtrace: Backtrace }, + + #[snafu(display("Empty compaction input files.\nBacktrace:\n{}", backtrace))] + EmptyCompactionInputFiles { backtrace: Backtrace }, + + #[snafu(display("Empty write options.\nBacktrace:\n{}", backtrace))] + EmptySstWriteOptions { backtrace: Backtrace }, + + #[snafu(display("Sst meta data is empty.\nBacktrace:\n{backtrace}"))] + EmptySstMeta { backtrace: Backtrace }, + + #[snafu(display("Empty sst info.\nBacktrace:\n{}", backtrace))] + EmptySstInfo { backtrace: Backtrace }, + + #[snafu(display("Empty compaction task exec result.\nBacktrace:\n{}", backtrace))] + EmptyExecResult { backtrace: Backtrace }, + + #[snafu(display("Failed to convert table schema, err:{}", source))] + ConvertTableSchema { source: GenericError }, + + #[snafu(display("Failed to convert input context, err:{}", source))] + ConvertInputContext { source: GenericError }, + + #[snafu(display("Failed to convert ouput context, err:{}", source))] + ConvertOuputContext { source: GenericError }, + + #[snafu(display("Failed to convert compaction input files, err:{}", source))] + ConvertCompactionInputFiles { source: GenericError }, + + #[snafu(display("Failed to convert write options, err:{}", source))] + ConvertSstWriteOptions { source: GenericError }, + + #[snafu(display("Failed to convert sst info, err:{}", source))] + ConvertSstInfo { source: GenericError }, + + #[snafu(display("Failed to convert sst meta, err:{}", source))] + ConvertSstMeta { source: GenericError }, + + #[snafu(display("Failed to connect the service endpoint:{}, err:{}", addr, source,))] + FailConnect { addr: String, source: GenericError }, + + #[snafu(display("Failed to execute compaction task, err:{}", source))] + FailExecuteCompactionTask { source: GenericError }, + + #[snafu(display("Missing header in rpc response.\nBacktrace:\n{}", backtrace))] + MissingHeader { backtrace: Backtrace }, + + #[snafu(display( + "Bad response, resp code:{}, msg:{}.\nBacktrace:\n{}", + code, + msg, + backtrace + ))] + BadResponse { + code: u32, + msg: String, + backtrace: Backtrace, + }, +} + +define_result!(Error); + /// Compaction runner task #[derive(Debug, Clone)] pub struct CompactionRunnerTask { @@ -113,12 +194,106 @@ impl CompactionRunnerTask { } } +impl TryFrom + for CompactionRunnerTask +{ + type Error = Error; + + fn try_from( + request: horaedbproto::compaction_service::ExecuteCompactionTaskRequest, + ) -> Result { + let task_key = request.task_key; + let request_id: RequestId = request.request_id.into(); + + let schema: Schema = request + .schema + .context(EmptyTableSchema)? + .try_into() + .box_err() + .context(ConvertTableSchema)?; + + let space_id: SpaceId = request.space_id; + let table_id: TableId = request.table_id.into(); + let sequence: SequenceNumber = request.sequence; + + let input_ctx: InputContext = request + .input_ctx + .context(EmptyInputContext)? + .try_into() + .box_err() + .context(ConvertInputContext)?; + + let output_ctx: OutputContext = request + .output_ctx + .context(EmptyOuputContext)? + .try_into() + .box_err() + .context(ConvertOuputContext)?; + + Ok(Self { + task_key, + request_id, + schema, + space_id, + table_id, + sequence, + input_ctx, + output_ctx, + }) + } +} + +impl From for horaedbproto::compaction_service::ExecuteCompactionTaskRequest { + fn from(task: CompactionRunnerTask) -> Self { + Self { + task_key: task.task_key, + request_id: task.request_id.into(), + schema: Some((&(task.schema)).into()), + space_id: task.space_id, + table_id: task.table_id.into(), + sequence: task.sequence, + input_ctx: Some(task.input_ctx.into()), + output_ctx: Some(task.output_ctx.into()), + } + } +} + pub struct CompactionRunnerResult { pub output_file_path: Path, pub sst_info: SstInfo, pub sst_meta: MetaData, } +impl TryFrom + for CompactionRunnerResult +{ + type Error = Error; + + fn try_from( + resp: horaedbproto::compaction_service::ExecuteCompactionTaskResponse, + ) -> Result { + let res = resp.result.context(EmptyExecResult)?; + let sst_info = res + .sst_info + .context(EmptySstInfo)? + .try_into() + .box_err() + .context(ConvertSstInfo)?; + let sst_meta = res + .sst_meta + .context(EmptySstMeta)? + .try_into() + .box_err() + .context(ConvertSstMeta)?; + + Ok(Self { + output_file_path: res.output_file_path.into(), + sst_info, + sst_meta, + }) + } +} + #[derive(Debug, Clone)] pub struct InputContext { /// Input sst files in this compaction @@ -128,6 +303,43 @@ pub struct InputContext { pub need_dedup: bool, } +impl TryFrom for InputContext { + type Error = Error; + + fn try_from(value: horaedbproto::compaction_service::InputContext) -> Result { + let num_rows_per_row_group: usize = value.num_rows_per_row_group as usize; + let merge_iter_options = IterOptions { + batch_size: value.merge_iter_options as usize, + }; + let need_dedup = value.need_dedup; + + let files: CompactionInputFiles = value + .files + .context(EmptyCompactionInputFiles)? + .try_into() + .box_err() + .context(ConvertCompactionInputFiles)?; + + Ok(InputContext { + files, + num_rows_per_row_group, + merge_iter_options, + need_dedup, + }) + } +} + +impl From for horaedbproto::compaction_service::InputContext { + fn from(value: InputContext) -> Self { + Self { + files: Some(value.files.into()), + num_rows_per_row_group: value.num_rows_per_row_group as u64, + merge_iter_options: value.merge_iter_options.batch_size as u64, + need_dedup: value.need_dedup, + } + } +} + #[derive(Debug, Clone)] pub struct OutputContext { /// Output sst file path @@ -135,3 +347,31 @@ pub struct OutputContext { /// Output sst write context pub write_options: SstWriteOptions, } + +impl TryFrom for OutputContext { + type Error = Error; + + fn try_from(value: horaedbproto::compaction_service::OutputContext) -> Result { + let file_path: Path = value.file_path.into(); + let write_options: SstWriteOptions = value + .write_options + .context(EmptySstWriteOptions)? + .try_into() + .box_err() + .context(ConvertSstWriteOptions)?; + + Ok(OutputContext { + file_path, + write_options, + }) + } +} + +impl From for horaedbproto::compaction_service::OutputContext { + fn from(value: OutputContext) -> Self { + Self { + file_path: value.file_path.into(), + write_options: Some(value.write_options.into()), + } + } +} diff --git a/src/analytic_engine/src/compaction/runner/node_picker.rs b/src/analytic_engine/src/compaction/runner/node_picker.rs new file mode 100644 index 0000000000..bf21787c71 --- /dev/null +++ b/src/analytic_engine/src/compaction/runner/node_picker.rs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Remote compaction node picker. + +use std::sync::Arc; + +use async_trait::async_trait; +use macros::define_result; +use meta_client::{types::FetchCompactionNodeRequest, MetaClientRef}; +use serde::{Deserialize, Serialize}; +use snafu::{ResultExt, Snafu}; + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(tag = "node_picker", content = "endpoint")] +pub enum NodePicker { + // Local node picker that specifies the local endpoint. + // The endpoint in the form `addr:port`. + Local(String), + Remote, +} + +#[async_trait] +pub trait CompactionNodePicker: Send + Sync { + /// Get the addr of the remote compaction node. + async fn get_compaction_node(&self) -> Result; +} + +pub type RemoteCompactionNodePickerRef = Arc; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Meta client fetch compaciton node failed, err:{source}."))] + FetchCompactionNodeFailure { source: meta_client::Error }, +} + +define_result!(Error); + +/// RemoteCompactionNodePickerImpl is an implementation of +/// [`CompactionNodePicker`] based [`MetaClient`]. +pub struct RemoteCompactionNodePickerImpl { + pub meta_client: MetaClientRef, +} + +#[async_trait] +impl CompactionNodePicker for RemoteCompactionNodePickerImpl { + /// Get proper remote compaction node info for compaction offload with meta + /// client. + async fn get_compaction_node(&self) -> Result { + let req = FetchCompactionNodeRequest::default(); + let resp = self + .meta_client + .fetch_compaction_node(req) + .await + .context(FetchCompactionNodeFailure)?; + + let compaction_node_addr = resp.endpoint; + Ok(compaction_node_addr) + } +} + +/// LocalCompactionNodePickerImpl is an implementation of +/// [`CompactionNodePicker`] mainly used for testing. +pub struct LocalCompactionNodePickerImpl { + pub endpoint: String, +} + +#[async_trait] +impl CompactionNodePicker for LocalCompactionNodePickerImpl { + /// Return the local addr and port of grpc service. + async fn get_compaction_node(&self) -> Result { + Ok(self.endpoint.clone()) + } +} diff --git a/src/analytic_engine/src/compaction/runner/remote_client.rs b/src/analytic_engine/src/compaction/runner/remote_client.rs new file mode 100644 index 0000000000..cf1f69be4a --- /dev/null +++ b/src/analytic_engine/src/compaction/runner/remote_client.rs @@ -0,0 +1,148 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use async_trait::async_trait; +use generic_error::BoxError; +use horaedbproto::{ + common::ResponseHeader, compaction_service::compaction_service_client::CompactionServiceClient, +}; +use logger::info; +use serde::{Deserialize, Serialize}; +use snafu::{OptionExt, ResultExt}; +use time_ext::ReadableDuration; + +use crate::compaction::runner::{ + BadResponse, FailConnect, FailExecuteCompactionTask, MissingHeader, Result, +}; + +type CompactionServiceGrpcClient = CompactionServiceClient; + +#[derive(Debug, Deserialize, Clone, Serialize)] +#[serde(default)] +pub struct CompactionClientConfig { + pub compaction_server_addr: String, + pub timeout: ReadableDuration, +} + +impl Default for CompactionClientConfig { + fn default() -> Self { + Self { + compaction_server_addr: "127.0.0.1:7878".to_string(), + timeout: ReadableDuration::secs(5), + } + } +} + +/// CompactionClient is the abstraction of client used for HoraeDB to +/// communicate with CompactionServer cluster. +#[async_trait] +pub trait CompactionClient: Send + Sync { + async fn execute_compaction_task( + &self, + req: horaedbproto::compaction_service::ExecuteCompactionTaskRequest, + ) -> Result; +} + +pub type CompactionClientRef = Arc; + +/// Default compaction client impl, will interact with the remote compaction +/// node. +pub struct CompactionClientImpl { + client: CompactionServiceGrpcClient, +} + +impl CompactionClientImpl { + pub async fn connect(config: CompactionClientConfig) -> Result { + let client = { + let endpoint = + tonic::transport::Endpoint::from_shared(config.compaction_server_addr.to_string()) + .box_err() + .context(FailConnect { + addr: &config.compaction_server_addr, + })? + .timeout(config.timeout.0); + CompactionServiceGrpcClient::connect(endpoint) + .await + .box_err() + .context(FailConnect { + addr: &config.compaction_server_addr, + })? + }; + + Ok(Self { client }) + } + + #[inline] + fn client(&self) -> CompactionServiceGrpcClient { + self.client.clone() + } +} + +#[async_trait] +impl CompactionClient for CompactionClientImpl { + async fn execute_compaction_task( + &self, + pb_req: horaedbproto::compaction_service::ExecuteCompactionTaskRequest, + ) -> Result { + // TODO(leslie): Add request header for ExecuteCompactionTaskRequest. + + info!( + "Compaction client try to execute compaction task in remote compaction node, req:{:?}", + pb_req + ); + + let pb_resp = self + .client() + .execute_compaction_task(pb_req) + .await + .box_err() + .context(FailExecuteCompactionTask)? + .into_inner(); + + info!( + "Compaction client finish executing compaction task in remote compaction node, req:{:?}", + pb_resp + ); + + check_response_header(&pb_resp.header)?; + Ok(pb_resp) + } +} + +// TODO(leslie): Consider to refactor and reuse the similar function in +// meta_client. +fn check_response_header(header: &Option) -> Result<()> { + let header = header.as_ref().context(MissingHeader)?; + if header.code == 0 { + Ok(()) + } else { + BadResponse { + code: header.code, + msg: header.error.clone(), + } + .fail() + } +} + +pub async fn build_compaction_client( + config: CompactionClientConfig, +) -> Result { + let compaction_client = CompactionClientImpl::connect(config).await?; + Ok(Arc::new(compaction_client)) +} diff --git a/src/analytic_engine/src/compaction/runner/remote_runner.rs b/src/analytic_engine/src/compaction/runner/remote_runner.rs new file mode 100644 index 0000000000..59a70c2fc2 --- /dev/null +++ b/src/analytic_engine/src/compaction/runner/remote_runner.rs @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use async_trait::async_trait; +use generic_error::BoxError; +use logger::info; +use snafu::ResultExt; + +use super::{local_runner::LocalCompactionRunner, node_picker::RemoteCompactionNodePickerRef}; +use crate::{ + compaction::runner::{ + remote_client::{build_compaction_client, CompactionClientConfig, CompactionClientRef}, + CompactionRunner, CompactionRunnerResult, CompactionRunnerTask, + }, + instance::flush_compaction::{ + self, BuildCompactionClientFailed, ConvertCompactionTaskResponse, + GetCompactionClientFailed, PickCompactionNodeFailed, Result, + }, +}; + +pub struct RemoteCompactionRunner { + pub node_picker: RemoteCompactionNodePickerRef, + + pub fallback_local_when_failed: bool, + /// Responsible for executing compaction task locally if fail to remote + /// compact when `fallback_local_when_failed` is true, used for better fault + /// tolerance. + pub local_compaction_runner: LocalCompactionRunner, +} + +impl RemoteCompactionRunner { + async fn get_compaction_client(&self) -> Result { + let mut config = CompactionClientConfig::default(); + let endpoint = self + .node_picker + .get_compaction_node() + .await + .context(PickCompactionNodeFailed)?; + config.compaction_server_addr = make_formatted_endpoint(&endpoint); + + let client = build_compaction_client(config) + .await + .context(BuildCompactionClientFailed)?; + Ok(client) + } + + async fn local_compact(&self, task: CompactionRunnerTask) -> Result { + self.local_compaction_runner.run(task).await + } +} + +#[async_trait] +impl CompactionRunner for RemoteCompactionRunner { + /// Run the compaction task either on a remote node or fall back to local + /// compaction. + async fn run(&self, task: CompactionRunnerTask) -> Result { + let client = self + .get_compaction_client() + .await + .box_err() + .context(GetCompactionClientFailed); + + let pb_resp = match client { + Ok(client) => match client.execute_compaction_task(task.clone().into()).await { + Ok(resp) => resp, + Err(e) => { + if !self.fallback_local_when_failed { + return Err(flush_compaction::Error::RemoteCompactFailed { source: e }); + } + + info!( + "The compaction task falls back to local because of error:{}", + e + ); + return self.local_compact(task).await; + } + }, + Err(e) => { + if !self.fallback_local_when_failed { + return Err(e); + } + + info!( + "The compaction task falls back to local because of error:{}", + e + ); + return self.local_compact(task).await; + } + }; + + let resp = pb_resp + .try_into() + .box_err() + .context(ConvertCompactionTaskResponse)?; + + Ok(resp) + } +} + +fn make_formatted_endpoint(endpoint: &str) -> String { + format!("http://{endpoint}") +} diff --git a/src/analytic_engine/src/instance/engine.rs b/src/analytic_engine/src/instance/engine.rs index 8c29ab1c2d..537b83314f 100644 --- a/src/analytic_engine/src/instance/engine.rs +++ b/src/analytic_engine/src/instance/engine.rs @@ -259,6 +259,12 @@ pub enum Error { sequence: SequenceNumber, source: wal::manager::Error, }, + + #[snafu(display( + "Failed to find meta client to construct remote compaction runner.\nBacktrace:\n{}", + backtrace + ))] + MetaClientNotExist { backtrace: Backtrace }, } define_result!(Error); @@ -293,6 +299,7 @@ impl From for table_engine::engine::Error { | Error::DoManifestSnapshot { .. } | Error::OpenManifest { .. } | Error::TableNotExist { .. } + | Error::MetaClientNotExist { .. } | Error::OpenTablesOfShard { .. } | Error::ReplayWalNoCause { .. } | Error::PurgeWal { .. } diff --git a/src/analytic_engine/src/instance/flush_compaction.rs b/src/analytic_engine/src/instance/flush_compaction.rs index da1647eb70..9deceff563 100644 --- a/src/analytic_engine/src/instance/flush_compaction.rs +++ b/src/analytic_engine/src/instance/flush_compaction.rs @@ -41,6 +41,7 @@ use tokio::{sync::oneshot, time::Instant}; use wal::manager::WalLocation; use crate::{ + compaction::runner::node_picker, instance::{ self, reorder_memtable::Reorder, serial_executor::TableFlushScheduler, SpaceStoreRef, }, @@ -158,6 +159,25 @@ pub enum Error { #[snafu(display("Failed to alloc file id, err:{}", source))] AllocFileId { source: data::Error }, + + #[snafu(display("Failed to convert compaction task response, err:{}", source))] + ConvertCompactionTaskResponse { source: GenericError }, + + #[snafu(display("Failed to pick remote compaction node, err:{}", source))] + PickCompactionNodeFailed { source: node_picker::Error }, + + #[snafu(display("Failed to build compaction client, err:{}", source))] + BuildCompactionClientFailed { + source: crate::compaction::runner::Error, + }, + + #[snafu(display("Failed to get compaction client, err:{}", source))] + GetCompactionClientFailed { source: GenericError }, + + #[snafu(display("Failed to execute compaction task remotely, err:{}", source))] + RemoteCompactFailed { + source: crate::compaction::runner::Error, + }, } define_result!(Error); diff --git a/src/analytic_engine/src/instance/open.rs b/src/analytic_engine/src/instance/open.rs index 220fa84c3a..97717c5ab0 100644 --- a/src/analytic_engine/src/instance/open.rs +++ b/src/analytic_engine/src/instance/open.rs @@ -24,20 +24,28 @@ use std::{ use common_types::table::ShardId; use logger::{error, info}; +use meta_client::MetaClientRef; use object_store::ObjectStoreRef; -use snafu::ResultExt; +use snafu::{OptionExt, ResultExt}; use table_engine::{engine::TableDef, table::TableId}; use wal::manager::WalManagerRef; use crate::{ compaction::{ - runner::{local_runner::LocalCompactionRunner, CompactionRunnerPtr, CompactionRunnerRef}, + runner::{ + local_runner::LocalCompactionRunner, + node_picker::{ + LocalCompactionNodePickerImpl, NodePicker, RemoteCompactionNodePickerImpl, + }, + remote_runner::RemoteCompactionRunner, + CompactionRunnerPtr, CompactionRunnerRef, + }, scheduler::SchedulerImpl, }, context::OpenContext, engine, instance::{ - engine::{OpenManifest, OpenTablesOfShard, ReadMetaUpdate, Result}, + engine::{MetaClientNotExist, OpenManifest, OpenTablesOfShard, ReadMetaUpdate, Result}, flush_compaction::Flusher, mem_collector::MemUsageCollector, wal_replayer::{ReplayMode, WalReplayer}, @@ -52,7 +60,7 @@ use crate::{ }, table::data::{TableCatalogInfo, TableDataRef}, table_meta_set_impl::TableMetaSetImpl, - RecoverMode, + CompactionMode, RecoverMode, }; pub(crate) struct InstanceContext { @@ -68,14 +76,48 @@ impl InstanceContext { wal_manager: WalManagerRef, store_picker: ObjectStorePickerRef, sst_factory: SstFactoryRef, + meta_client: Option, ) -> Result { - let compaction_runner = Box::new(LocalCompactionRunner::new( + info!( + "Construct compaction runner with compaction_mode:{:?}", + ctx.config.compaction_mode + ); + + let local_compaction_runner = LocalCompactionRunner::new( ctx.runtimes.compact_runtime.clone(), &ctx.config, sst_factory.clone(), store_picker.clone(), ctx.meta_cache.clone(), - )); + ); + + let compaction_runner: CompactionRunnerPtr = match &ctx.config.compaction_mode { + CompactionMode::Offload(NodePicker::Local(endpoint)) => { + Box::new(RemoteCompactionRunner { + node_picker: Arc::new(LocalCompactionNodePickerImpl { + endpoint: endpoint.clone(), + }), + // This field is set to false here for testing. + fallback_local_when_failed: false, + local_compaction_runner: local_compaction_runner.clone(), + }) + } + CompactionMode::Offload(NodePicker::Remote) => Box::new(RemoteCompactionRunner { + node_picker: Arc::new(RemoteCompactionNodePickerImpl { + meta_client: meta_client.context(MetaClientNotExist)?, + }), + fallback_local_when_failed: true, + local_compaction_runner: local_compaction_runner.clone(), + }), + + CompactionMode::Local => Box::new(LocalCompactionRunner::new( + ctx.runtimes.compact_runtime.clone(), + &ctx.config, + sst_factory.clone(), + store_picker.clone(), + ctx.meta_cache.clone(), + )), + }; let instance = Instance::open( ctx, @@ -89,7 +131,7 @@ impl InstanceContext { Ok(Self { instance, - local_compaction_runner: None, + local_compaction_runner: Some(Arc::new(local_compaction_runner)), }) } } diff --git a/src/analytic_engine/src/lib.rs b/src/analytic_engine/src/lib.rs index 4b80741f6c..687bcf637a 100644 --- a/src/analytic_engine/src/lib.rs +++ b/src/analytic_engine/src/lib.rs @@ -19,7 +19,7 @@ #![feature(option_get_or_insert_default)] -mod compaction; +pub mod compaction; mod context; mod engine; pub mod error; @@ -40,6 +40,7 @@ pub mod table_meta_set_impl; #[cfg(any(test, feature = "test"))] pub mod tests; +use compaction::runner::node_picker::NodePicker; use error::ErrorKind; use manifest::details::Options as ManifestOptions; use object_store::config::StorageOptions; @@ -54,6 +55,20 @@ pub use crate::{ table_options::TableOptions, }; +/// The compaction mode decides compaction offload or not. +/// +/// [CompactionMode::Offload] means offload the compaction task +/// to a local or remote node. +/// +/// [CompactionMode::Local] means local compaction, no offloading. +#[derive(Clone, Default, Debug, Deserialize, Serialize)] +#[serde(tag = "compaction_mode")] +pub enum CompactionMode { + #[default] + Local, + Offload(NodePicker), +} + /// Config of analytic engine #[derive(Debug, Clone, Deserialize, Serialize)] #[serde(default)] @@ -77,6 +92,9 @@ pub struct Config { pub compaction: SchedulerConfig, + /// Offload the compaction task or not. + pub compaction_mode: CompactionMode, + /// sst meta cache capacity pub sst_meta_cache_cap: Option, /// sst data cache capacity @@ -187,6 +205,7 @@ impl Default for Config { table_opts: TableOptions::default(), try_compat_old_layered_memtable_opts: false, compaction: SchedulerConfig::default(), + compaction_mode: CompactionMode::Local, sst_meta_cache_cap: Some(1000), sst_data_cache_cap: Some(1000), manifest: ManifestOptions::default(), diff --git a/src/analytic_engine/src/setup.rs b/src/analytic_engine/src/setup.rs index ee16772985..4075e250db 100644 --- a/src/analytic_engine/src/setup.rs +++ b/src/analytic_engine/src/setup.rs @@ -21,6 +21,7 @@ use std::{num::NonZeroUsize, path::Path, pin::Pin, sync::Arc}; use futures::Future; use macros::define_result; +use meta_client::MetaClientRef; use object_store::{ aliyun, config::{ObjectStoreOptions, StorageOptions}, @@ -96,6 +97,8 @@ pub struct EngineBuilder<'a> { pub config: &'a Config, pub engine_runtimes: Arc, pub opened_wals: OpenedWals, + // Meta client is needed when compaction offload with remote node picker. + pub meta_client: Option, } impl<'a> EngineBuilder<'a> { @@ -116,6 +119,7 @@ impl<'a> EngineBuilder<'a> { self.opened_wals.data_wal, manifest_storages, Arc::new(opened_storages), + self.meta_client, ) .await?; @@ -134,6 +138,7 @@ async fn build_instance_context( wal_manager: WalManagerRef, manifest_storages: ManifestStorages, store_picker: ObjectStorePickerRef, + meta_client: Option, ) -> Result { let meta_cache: Option = config .sst_meta_cache_cap @@ -151,6 +156,7 @@ async fn build_instance_context( wal_manager, store_picker, Arc::new(FactoryImpl), + meta_client.clone(), ) .await .context(OpenInstance)?; diff --git a/src/analytic_engine/src/sst/factory.rs b/src/analytic_engine/src/sst/factory.rs index 2ddeb24668..1f17b8df1d 100644 --- a/src/analytic_engine/src/sst/factory.rs +++ b/src/analytic_engine/src/sst/factory.rs @@ -21,10 +21,11 @@ use std::{collections::HashMap, fmt::Debug, sync::Arc}; use async_trait::async_trait; use common_types::projected_schema::RowProjectorBuilder; +use generic_error::{BoxError, GenericError}; use macros::define_result; use object_store::{ObjectStoreRef, Path}; use runtime::Runtime; -use snafu::{ResultExt, Snafu}; +use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; use table_engine::predicate::PredicateRef; use trace_metric::MetricsCollector; @@ -50,6 +51,15 @@ use crate::{ pub enum Error { #[snafu(display("Failed to parse sst header, err:{}", source,))] ParseHeader { source: header::Error }, + + #[snafu(display("Empty storage format hint.\nBacktrace:\n{}", backtrace))] + EmptyStorageFormatHint { backtrace: Backtrace }, + + #[snafu(display("Failed to convert storage format hint, err:{}", source))] + ConvertStorageFormatHint { source: GenericError }, + + #[snafu(display("Failed to convert compression, err:{}", source))] + ConvertCompression { source: GenericError }, } define_result!(Error); @@ -164,6 +174,59 @@ pub struct SstWriteOptions { pub column_stats: HashMap, } +impl TryFrom for SstWriteOptions { + type Error = Error; + + fn try_from(value: horaedbproto::compaction_service::SstWriteOptions) -> Result { + let storage_format_hint: StorageFormatHint = value + .storage_format_hint + .context(EmptyStorageFormatHint)? + .try_into() + .box_err() + .context(ConvertStorageFormatHint)?; + + let num_rows_per_row_group = value.num_rows_per_row_group as usize; + let compression: Compression = value + .compression + .try_into() + .box_err() + .context(ConvertCompression)?; + let max_buffer_size = value.max_buffer_size as usize; + + let column_stats: HashMap = value + .column_stats + .into_iter() + .map(|(k, v)| (k, ColumnStats { low_cardinality: v })) + .collect(); + + Ok(SstWriteOptions { + storage_format_hint, + num_rows_per_row_group, + compression, + max_buffer_size, + column_stats, + }) + } +} + +impl From for horaedbproto::compaction_service::SstWriteOptions { + fn from(value: SstWriteOptions) -> Self { + let column_stats = value + .column_stats + .into_iter() + .map(|(k, v)| (k, v.low_cardinality)) + .collect(); + + Self { + storage_format_hint: Some(value.storage_format_hint.into()), + num_rows_per_row_group: value.num_rows_per_row_group as u64, + compression: value.compression.into(), + max_buffer_size: value.max_buffer_size as u64, + column_stats, + } + } +} + impl From<&ColumnStats> for ColumnEncoding { fn from(value: &ColumnStats) -> Self { ColumnEncoding { diff --git a/src/analytic_engine/src/sst/file.rs b/src/analytic_engine/src/sst/file.rs index 39cdc7c7d1..a6cc336a31 100644 --- a/src/analytic_engine/src/sst/file.rs +++ b/src/analytic_engine/src/sst/file.rs @@ -35,12 +35,13 @@ use common_types::{ SequenceNumber, }; use future_ext::{retry_async, BackoffConfig, RetryConfig}; +use generic_error::{BoxError, GenericError}; use logger::{error, info, trace, warn}; use macros::define_result; use metric_ext::Meter; use object_store::{ObjectStoreRef, Path}; use runtime::{JoinHandle, Runtime}; -use snafu::{ResultExt, Snafu}; +use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; use table_engine::table::TableId; use tokio::sync::{ mpsc::{self, UnboundedReceiver, UnboundedSender}, @@ -54,6 +55,18 @@ use crate::{space::SpaceId, sst::manager::FileId, table::sst_util, table_options pub enum Error { #[snafu(display("Failed to join purger, err:{}", source))] StopPurger { source: runtime::Error }, + + #[snafu(display("Empty time range.\nBacktrace:\n{}", backtrace))] + EmptyTimeRange { backtrace: Backtrace }, + + #[snafu(display("Failed to convert time range, err:{}", source))] + ConvertTimeRange { source: GenericError }, + + #[snafu(display("Failed to convert storage format, err:{}", source))] + ConvertStorageFormat { source: GenericError }, + + #[snafu(display("Converted overflow, err:{}", source))] + ConvertOverflow { source: GenericError }, } define_result!(Error); @@ -95,6 +108,15 @@ impl From for Level { } } +impl TryFrom for Level { + type Error = Error; + + fn try_from(value: u32) -> Result { + let value: u16 = value.try_into().box_err().context(ConvertOverflow)?; + Ok(value.into()) + } +} + impl fmt::Display for Level { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) @@ -197,6 +219,16 @@ impl FileHandle { } } + #[inline] + pub fn space_id(&self) -> SpaceId { + self.inner.purge_queue.space_id() + } + + #[inline] + pub fn table_id(&self) -> TableId { + self.inner.purge_queue.table_id() + } + #[inline] pub fn read_meter(&self) -> Arc { self.inner.metrics.read_meter.clone() @@ -460,6 +492,53 @@ impl FileMeta { } } +impl TryFrom for FileMeta { + type Error = Error; + + fn try_from(value: horaedbproto::compaction_service::FileMeta) -> Result { + let time_range: TimeRange = value + .time_range + .context(EmptyTimeRange)? + .try_into() + .box_err() + .context(ConvertTimeRange)?; + + let storage_format: StorageFormat = value + .storage_format + .try_into() + .box_err() + .context(ConvertStorageFormat)?; + let mut associated_files: Vec = Vec::with_capacity(value.associated_files.len()); + for file in value.associated_files { + associated_files.push(file); + } + + Ok(FileMeta { + id: value.file_id, + size: value.size, + row_num: value.row_num, + time_range, + max_seq: value.max_seq, + storage_format, + associated_files, + }) + } +} + +impl From for horaedbproto::compaction_service::FileMeta { + fn from(value: FileMeta) -> Self { + Self { + file_id: value.id, + max_seq: value.max_seq, + time_range: Some(value.time_range.into()), + size: value.size, + row_num: value.row_num, + storage_format: value.storage_format.into(), + associated_files: value.associated_files, + } + } +} + // Queue to store files to be deleted for a table. #[derive(Clone)] pub struct FilePurgeQueue { @@ -508,6 +587,23 @@ impl FilePurgeQueue { ); } } + + #[inline] + pub fn space_id(&self) -> SpaceId { + self.inner.space_id + } + + #[inline] + pub fn table_id(&self) -> TableId { + self.inner.table_id + } +} + +impl From for FilePurgeQueue { + fn from(value: horaedbproto::compaction_service::FilePurgeQueue) -> Self { + let (tx, _rx) = mpsc::unbounded_channel(); + FilePurgeQueue::new(value.space_id, value.table_id.into(), tx) + } } struct FilePurgeQueueInner { diff --git a/src/analytic_engine/src/sst/writer.rs b/src/analytic_engine/src/sst/writer.rs index e424e8af48..577f499332 100644 --- a/src/analytic_engine/src/sst/writer.rs +++ b/src/analytic_engine/src/sst/writer.rs @@ -26,7 +26,8 @@ use common_types::{ SequenceNumber, }; use futures::Stream; -use generic_error::GenericError; +use generic_error::{BoxError, GenericError}; +use snafu::{OptionExt, ResultExt}; use crate::table_options::StorageFormat; @@ -96,6 +97,21 @@ pub mod error { #[snafu(display("Other kind of error, msg:{}.\nBacktrace:\n{}", msg, backtrace))] OtherNoCause { msg: String, backtrace: Backtrace }, + + #[snafu(display("Empty time range.\nBacktrace:\n{}", backtrace))] + EmptyTimeRange { backtrace: Backtrace }, + + #[snafu(display("Empty schema.\nBacktrace:\n{}", backtrace))] + EmptySchema { backtrace: Backtrace }, + + #[snafu(display("Failed to convert time range, err:{}", source))] + ConvertTimeRange { source: GenericError }, + + #[snafu(display("Failed to convert sst info, err:{}", source))] + ConvertSstInfo { source: GenericError }, + + #[snafu(display("Failed to convert schema, err:{}", source))] + ConvertSchema { source: GenericError }, } define_result!(Error); @@ -117,6 +133,44 @@ pub struct SstInfo { pub time_range: TimeRange, } +impl TryFrom for SstInfo { + type Error = Error; + + fn try_from(value: horaedbproto::compaction_service::SstInfo) -> Result { + let storage_format = value + .storage_format + .try_into() + .box_err() + .context(ConvertSstInfo)?; + let time_range = value + .time_range + .context(EmptyTimeRange)? + .try_into() + .box_err() + .context(ConvertTimeRange)?; + + Ok(Self { + file_size: value.file_size as usize, + row_num: value.row_num as usize, + storage_format, + meta_path: value.meta_path, + time_range, + }) + } +} + +impl From for horaedbproto::compaction_service::SstInfo { + fn from(value: SstInfo) -> Self { + Self { + file_size: value.file_size as u64, + row_num: value.row_num as u64, + storage_format: value.storage_format.into(), + meta_path: value.meta_path, + time_range: Some(value.time_range.into()), + } + } +} + #[derive(Debug, Clone)] pub struct MetaData { /// Min key of the sst. @@ -131,6 +185,45 @@ pub struct MetaData { pub schema: Schema, } +impl TryFrom for MetaData { + type Error = Error; + + fn try_from(meta: horaedbproto::compaction_service::MetaData) -> Result { + let time_range = meta + .time_range + .context(EmptyTimeRange)? + .try_into() + .box_err() + .context(ConvertTimeRange)?; + let schema = meta + .schema + .context(EmptySchema)? + .try_into() + .box_err() + .context(ConvertSchema)?; + + Ok(Self { + min_key: Bytes::from(meta.min_key), + max_key: Bytes::from(meta.max_key), + time_range, + max_sequence: meta.max_sequence, + schema, + }) + } +} + +impl From for horaedbproto::compaction_service::MetaData { + fn from(meta: MetaData) -> Self { + Self { + min_key: meta.min_key.to_vec(), + max_key: meta.max_key.to_vec(), + max_sequence: meta.max_sequence, + time_range: Some(meta.time_range.into()), + schema: Some((&meta.schema).into()), + } + } +} + /// The writer for sst. /// /// The caller provides a stream of [RecordBatch] and the writer takes diff --git a/src/analytic_engine/src/table_options.rs b/src/analytic_engine/src/table_options.rs index 4c1823eed2..0ecabb9512 100644 --- a/src/analytic_engine/src/table_options.rs +++ b/src/analytic_engine/src/table_options.rs @@ -130,6 +130,13 @@ pub enum Error { ))] UnknownStorageFormatHint { value: String, backtrace: Backtrace }, + #[snafu(display( + "Unknown compression type. value:{:?}.\nBacktrace:\n{}", + value, + backtrace + ))] + UnknownCompressionType { value: i32, backtrace: Backtrace }, + #[snafu(display("Storage format hint is missing.\nBacktrace:\n{}", backtrace))] MissingStorageFormatHint { backtrace: Backtrace }, @@ -234,6 +241,33 @@ impl From for Compression { } } +impl TryFrom for Compression { + type Error = Error; + + fn try_from(compression: i32) -> Result { + let compression = match compression { + 0 => Compression::Uncompressed, + 1 => Compression::Lz4, + 2 => Compression::Snappy, + 3 => Compression::Zstd, + _ => return UnknownCompressionType { value: compression }.fail(), + }; + + Ok(compression) + } +} + +impl From for i32 { + fn from(value: Compression) -> Self { + match value { + Compression::Uncompressed => 0, + Compression::Lz4 => 1, + Compression::Snappy => 2, + Compression::Zstd => 3, + } + } +} + impl From for ParquetCompression { fn from(compression: Compression) -> Self { match compression { @@ -340,6 +374,14 @@ impl From for manifest_pb::StorageFormat { } } +impl From for i32 { + fn from(value: StorageFormat) -> Self { + match value { + StorageFormat::Columnar => 0, + } + } +} + impl TryFrom for StorageFormat { type Error = Error; @@ -363,6 +405,18 @@ impl TryFrom<&str> for StorageFormat { } } +impl TryFrom for StorageFormat { + type Error = Error; + + fn try_from(value: i32) -> Result { + let format = match value { + 0 => Self::Columnar, + _ => return UnknownStorageFormatType { value }.fail(), + }; + Ok(format) + } +} + impl ToString for StorageFormat { fn to_string(&self) -> String { match self { diff --git a/src/analytic_engine/src/tests/util.rs b/src/analytic_engine/src/tests/util.rs index 8fe0710624..04bc09f75f 100644 --- a/src/analytic_engine/src/tests/util.rs +++ b/src/analytic_engine/src/tests/util.rs @@ -141,6 +141,7 @@ impl TestContext { config: &self.config, engine_runtimes: self.runtimes.clone(), opened_wals: opened_wals.clone(), + meta_client: None, }; self.opened_wals = Some(opened_wals); diff --git a/src/benchmarks/src/util.rs b/src/benchmarks/src/util.rs index a7f86f0866..97c8457be8 100644 --- a/src/benchmarks/src/util.rs +++ b/src/benchmarks/src/util.rs @@ -522,6 +522,7 @@ impl TestContext { config: &self.config, engine_runtimes: self.runtimes.clone(), opened_wals: opened_wals.clone(), + meta_client: None, }; self.opened_wals = Some(opened_wals); diff --git a/src/cluster/src/cluster_impl.rs b/src/cluster/src/cluster_impl.rs index aee54e42b3..d79eda0485 100644 --- a/src/cluster/src/cluster_impl.rs +++ b/src/cluster/src/cluster_impl.rs @@ -46,8 +46,8 @@ use crate::{ shard_set::{Shard, ShardRef, ShardSet}, topology::ClusterTopology, Cluster, ClusterNodesNotFound, ClusterNodesResp, EtcdClientFailureWithCause, - InitEtcdClientConfig, InvalidArguments, MetaClientFailure, OpenShard, OpenShardWithCause, - Result, ShardNotFound, TableStatus, + InitEtcdClientConfig, InvalidArguments, MetaClientFailure, NodeType, OpenShard, + OpenShardWithCause, Result, ShardNotFound, TableStatus, }; /// ClusterImpl is an implementation of [`Cluster`] based [`MetaClient`]. @@ -376,6 +376,10 @@ impl Cluster for ClusterImpl { Ok(()) } + fn node_type(&self) -> NodeType { + self.config.node_type.clone() + } + async fn open_shard(&self, shard_info: &ShardInfo) -> Result { self.inner.open_shard(shard_info).await } diff --git a/src/cluster/src/config.rs b/src/cluster/src/config.rs index 29e0da9719..d0b1c694b9 100644 --- a/src/cluster/src/config.rs +++ b/src/cluster/src/config.rs @@ -23,6 +23,8 @@ use serde::{Deserialize, Serialize}; use table_engine::ANALYTIC_ENGINE_TYPE; use time_ext::ReadableDuration; +use crate::NodeType; + #[derive(Debug, Clone, Deserialize, Serialize)] #[serde(default)] // TODO: move this to table_engine crates @@ -133,6 +135,7 @@ impl Default for TlsConfig { #[serde(default)] pub struct ClusterConfig { pub cmd_channel_buffer_size: usize, + pub node_type: NodeType, pub meta_client: MetaClientConfig, pub etcd_client: EtcdClientConfig, } diff --git a/src/cluster/src/lib.rs b/src/cluster/src/lib.rs index a97c945a0b..ddda6c4689 100644 --- a/src/cluster/src/lib.rs +++ b/src/cluster/src/lib.rs @@ -28,7 +28,7 @@ use std::sync::Arc; use async_trait::async_trait; -use common_types::schema::SchemaName; +use common_types::{cluster::NodeType, schema::SchemaName}; use generic_error::GenericError; use macros::define_result; use meta_client::types::{ @@ -190,12 +190,14 @@ pub struct ClusterNodesResp { pub cluster_nodes: ClusterNodesRef, } -/// Cluster manages tables and shard infos in cluster mode. #[async_trait] pub trait Cluster { async fn start(&self) -> Result<()>; async fn stop(&self) -> Result<()>; + /// Get cluster type. + fn node_type(&self) -> NodeType; + /// Fetch related information and open shard. async fn open_shard(&self, shard_info: &ShardInfo) -> Result; diff --git a/src/common_types/src/cluster.rs b/src/common_types/src/cluster.rs new file mode 100644 index 0000000000..ad302023e9 --- /dev/null +++ b/src/common_types/src/cluster.rs @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use serde::{Deserialize, Serialize}; + +/// Type to distinguish different node type in cluster mode. +#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum NodeType { + #[default] + HoraeDB, + CompactionServer, +} diff --git a/src/common_types/src/lib.rs b/src/common_types/src/lib.rs index 0b6cda17c8..334bd42f91 100644 --- a/src/common_types/src/lib.rs +++ b/src/common_types/src/lib.rs @@ -18,6 +18,7 @@ //! Contains common types pub mod bitset; +pub mod cluster; pub mod column; pub mod column_block; pub mod column_schema; diff --git a/src/horaedb/Cargo.toml b/src/horaedb/Cargo.toml index ce505105f2..5a6144d3cc 100644 --- a/src/horaedb/Cargo.toml +++ b/src/horaedb/Cargo.toml @@ -38,32 +38,33 @@ wal-rocksdb = ["wal/wal-rocksdb", "analytic_engine/wal-rocksdb"] wal-local-storage = ["wal/wal-local-storage", "analytic_engine/wal-local-storage"] [dependencies] -analytic_engine = { workspace = true } -catalog = { workspace = true } -catalog_impls = { workspace = true } -clap = { workspace = true } -cluster = { workspace = true } -datafusion = { workspace = true } -df_operator = { workspace = true } -etcd-client = { workspace = true } -interpreters = { workspace = true } -logger = { workspace = true } -meta_client = { workspace = true } -moka = { version = "0.10", features = ["future"] } -panic_ext = { workspace = true } -proxy = { workspace = true } -query_engine = { workspace = true } -router = { workspace = true } -runtime = { workspace = true } -serde = { workspace = true } -server = { workspace = true } -signal-hook = "0.3" -size_ext = { workspace = true } -table_engine = { workspace = true } -toml = { workspace = true } -toml_ext = { workspace = true } -tracing_util = { workspace = true } -wal = { workspace = true } +analytic_engine = { workspace = true } +catalog = { workspace = true } +catalog_impls = { workspace = true } +clap = { workspace = true } +cluster = { workspace = true } +common_types = { workspace = true } +datafusion = { workspace = true } +df_operator = { workspace = true } +etcd-client = { workspace = true } +interpreters = { workspace = true } +logger = { workspace = true } +meta_client = { workspace = true } +moka = { version = "0.10", features = ["future"] } +panic_ext = { workspace = true } +proxy = { workspace = true } +query_engine = { workspace = true } +router = { workspace = true } +runtime = { workspace = true } +serde = { workspace = true } +server = { workspace = true } +signal-hook = "0.3" +size_ext = { workspace = true } +table_engine = { workspace = true } +toml = { workspace = true } +toml_ext = { workspace = true } +tracing_util = { workspace = true } +wal = { workspace = true } [build-dependencies] vergen = { version = "8", default-features = false, features = [ diff --git a/src/horaedb/src/config.rs b/src/horaedb/src/config.rs index b9f8932f19..e7f19233f0 100644 --- a/src/horaedb/src/config.rs +++ b/src/horaedb/src/config.rs @@ -26,8 +26,8 @@ use size_ext::ReadableSize; #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(default)] pub struct NodeInfo { - /// The address of the horaedb node. It can be a domain name or an IP - /// address without port followed. + /// The address of the horaedb (or compaction server) node. It can be a + /// domain name or an IP address without port followed. pub addr: String, pub zone: String, pub idc: String, diff --git a/src/horaedb/src/setup.rs b/src/horaedb/src/setup.rs index 9bdb46daf9..33632b5524 100644 --- a/src/horaedb/src/setup.rs +++ b/src/horaedb/src/setup.rs @@ -313,6 +313,7 @@ async fn build_with_meta( zone: config.node.zone.clone(), idc: config.node.idc.clone(), binary_version: config.node.binary_version.clone(), + node_type: cluster_config.node_type.clone(), }; info!("Build horaedb with node meta info:{node_meta_info:?}"); @@ -349,8 +350,12 @@ async fn build_with_meta( config: &config.analytic, engine_runtimes: runtimes.clone(), opened_wals: opened_wals.clone(), + meta_client: Some(meta_client.clone()), }; - let TableEngineContext { table_engine, .. } = engine_builder + let TableEngineContext { + table_engine, + local_compaction_runner, + } = engine_builder .build() .await .expect("Failed to setup analytic engine"); @@ -368,14 +373,18 @@ async fn build_with_meta( let table_manipulator = Arc::new(meta_based::TableManipulatorImpl::new(meta_client)); let schema_config_provider = Arc::new(ClusterBasedProvider::new(cluster.clone())); - builder + + let mut builder = builder .table_engine(engine_proxy) .catalog_manager(catalog_manager) .table_manipulator(table_manipulator) .cluster(cluster) .opened_wals(opened_wals) .router(router) - .schema_config_provider(schema_config_provider) + .schema_config_provider(schema_config_provider); + builder = builder.compaction_runner(local_compaction_runner.expect("Empty compaction runner.")); + + builder } async fn build_without_meta( @@ -394,6 +403,7 @@ async fn build_without_meta( config: &config.analytic, engine_runtimes: runtimes.clone(), opened_wals: opened_wals.clone(), + meta_client: None, }; let TableEngineContext { table_engine, .. } = engine_builder .build() diff --git a/src/meta_client/src/lib.rs b/src/meta_client/src/lib.rs index a6cb8df6b9..ba93313537 100644 --- a/src/meta_client/src/lib.rs +++ b/src/meta_client/src/lib.rs @@ -23,9 +23,9 @@ use macros::define_result; use snafu::{Backtrace, Snafu}; use types::{ AllocSchemaIdRequest, AllocSchemaIdResponse, CreateTableRequest, CreateTableResponse, - DropTableRequest, DropTableResponse, GetNodesRequest, GetNodesResponse, - GetTablesOfShardsRequest, GetTablesOfShardsResponse, RouteTablesRequest, RouteTablesResponse, - ShardInfo, + DropTableRequest, DropTableResponse, FetchCompactionNodeRequest, FetchCompactionNodeResponse, + GetNodesRequest, GetNodesResponse, GetTablesOfShardsRequest, GetTablesOfShardsResponse, + RouteTablesRequest, RouteTablesResponse, ShardInfo, }; pub mod meta_impl; @@ -76,6 +76,9 @@ pub enum Error { #[snafu(display("Failed to get tables, err:{}", source))] FailGetTables { source: GenericError }, + #[snafu(display("Failed to fetch compaction node, err:{}", source))] + FailFetchCompactionNode { source: GenericError }, + #[snafu(display("Failed to route tables, err:{}", source))] FailRouteTables { source: GenericError }, @@ -113,6 +116,11 @@ pub trait MetaClient: Send + Sync { async fn get_nodes(&self, req: GetNodesRequest) -> Result; + async fn fetch_compaction_node( + &self, + req: FetchCompactionNodeRequest, + ) -> Result; + async fn send_heartbeat(&self, req: Vec) -> Result<()>; } diff --git a/src/meta_client/src/meta_impl.rs b/src/meta_client/src/meta_impl.rs index 5ba98de5fc..ffe32faeb8 100644 --- a/src/meta_client/src/meta_impl.rs +++ b/src/meta_client/src/meta_impl.rs @@ -31,9 +31,10 @@ use time_ext::ReadableDuration; use crate::{ types::{ AllocSchemaIdRequest, AllocSchemaIdResponse, CreateTableRequest, CreateTableResponse, - DropTableRequest, DropTableResponse, GetNodesRequest, GetNodesResponse, - GetTablesOfShardsRequest, GetTablesOfShardsResponse, NodeInfo, NodeMetaInfo, RequestHeader, - RouteTablesRequest, RouteTablesResponse, ShardInfo, + DropTableRequest, DropTableResponse, FetchCompactionNodeRequest, + FetchCompactionNodeResponse, GetNodesRequest, GetNodesResponse, GetTablesOfShardsRequest, + GetTablesOfShardsResponse, NodeInfo, NodeMetaInfo, RequestHeader, RouteTablesRequest, + RouteTablesResponse, ShardInfo, }, BadResponse, FailAllocSchemaId, FailConnect, FailCreateTable, FailDropTable, FailGetTables, FailRouteTables, FailSendHeartbeat, MetaClient, MetaClientRef, MissingHeader, Result, @@ -236,6 +237,13 @@ impl MetaClient for MetaClientImpl { GetNodesResponse::try_from(pb_resp) } + async fn fetch_compaction_node( + &self, + _req: FetchCompactionNodeRequest, + ) -> Result { + todo!() + } + async fn send_heartbeat(&self, shard_infos: Vec) -> Result<()> { let node_info = NodeInfo { node_meta_info: self.node_meta_info.clone(), diff --git a/src/meta_client/src/types.rs b/src/meta_client/src/types.rs index 6a6aba6918..524843620b 100644 --- a/src/meta_client/src/types.rs +++ b/src/meta_client/src/types.rs @@ -19,6 +19,7 @@ use std::{collections::HashMap, fmt, sync::Arc}; pub use common_types::table::{ShardId, ShardVersion}; use common_types::{ + cluster::NodeType, schema::{SchemaId, SchemaName}, table::{TableId, TableName}, }; @@ -163,6 +164,7 @@ pub struct NodeMetaInfo { pub zone: String, pub idc: String, pub binary_version: String, + pub node_type: NodeType, } impl NodeMetaInfo { @@ -589,3 +591,10 @@ impl TryFrom for GetNodesResponse { }) } } + +#[derive(Debug, Clone, Default)] +pub struct FetchCompactionNodeRequest {} + +pub struct FetchCompactionNodeResponse { + pub endpoint: String, +} diff --git a/src/router/src/cluster_based.rs b/src/router/src/cluster_based.rs index d929104407..bd5efd8b5b 100644 --- a/src/router/src/cluster_based.rs +++ b/src/router/src/cluster_based.rs @@ -205,7 +205,7 @@ mod tests { shard_lock_manager::ShardLockManagerRef, shard_set::ShardRef, Cluster, ClusterNodesResp, TableStatus, }; - use common_types::table::ShardId; + use common_types::{cluster::NodeType, table::ShardId}; use horaedbproto::storage::{RequestContext, RouteRequest as RouteRequestPb}; use meta_client::types::{ NodeShard, RouteEntry, RouteTablesResponse, ShardInfo, ShardRole::Leader, TableInfo, @@ -226,6 +226,10 @@ mod tests { unimplemented!(); } + fn node_type(&self) -> NodeType { + unimplemented!() + } + async fn open_shard(&self, _: &ShardInfo) -> cluster::Result { unimplemented!(); } diff --git a/src/server/src/grpc/compaction_service/error.rs b/src/server/src/grpc/compaction_service/error.rs new file mode 100644 index 0000000000..eadb3f2418 --- /dev/null +++ b/src/server/src/grpc/compaction_service/error.rs @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Error definitions for compaction service. + +use generic_error::GenericError; +use horaedbproto::common::ResponseHeader; +use macros::define_result; +use snafu::Snafu; + +use crate::error_util; + +define_result!(Error); + +#[derive(Snafu, Debug)] +#[snafu(visibility(pub))] +pub enum Error { + #[snafu(display("Server error, code:{:?}, message:{}", code, msg))] + ErrNoCause { code: StatusCode, msg: String }, + + #[snafu(display("Server error, code:{:?}, message:{}, cause:{}", code, msg, source))] + ErrWithCause { + code: StatusCode, + msg: String, + source: GenericError, + }, +} + +impl Error { + pub fn code(&self) -> StatusCode { + match *self { + Error::ErrNoCause { code, .. } => code, + Error::ErrWithCause { code, .. } => code, + } + } + + /// Get the error message returned to the user. + pub fn error_message(&self) -> String { + match self { + Error::ErrNoCause { msg, .. } => msg.clone(), + + Error::ErrWithCause { msg, source, .. } => { + let err_string = source.to_string(); + let first_line = error_util::remove_backtrace_from_err(&err_string); + format!("{msg}. Caused by: {first_line}") + } + } + } +} + +/// A set of codes for compaction service. +/// +/// Note that such a set of codes is different with the codes (alias to http +/// status code) used by storage service. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum StatusCode { + #[default] + Ok = 0, + BadRequest = 401, + Internal = 500, +} + +impl StatusCode { + #[inline] + pub fn as_u32(self) -> u32 { + self as u32 + } +} + +pub fn build_err_header(err: Error) -> ResponseHeader { + ResponseHeader { + code: err.code().as_u32(), + error: err.error_message(), + } +} + +pub fn build_ok_header() -> ResponseHeader { + ResponseHeader { + code: StatusCode::Ok.as_u32(), + ..Default::default() + } +} diff --git a/src/server/src/grpc/compaction_service/mod.rs b/src/server/src/grpc/compaction_service/mod.rs new file mode 100644 index 0000000000..3954b78a44 --- /dev/null +++ b/src/server/src/grpc/compaction_service/mod.rs @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Compaction rpc service implementation. + +use std::sync::Arc; + +use analytic_engine::compaction::runner::{CompactionRunnerRef, CompactionRunnerTask}; +use async_trait::async_trait; +use error::{build_err_header, build_ok_header, ErrWithCause, StatusCode}; +use generic_error::BoxError; +use horaedbproto::compaction_service::{ + compaction_service_server::CompactionService, ExecResult, ExecuteCompactionTaskRequest, + ExecuteCompactionTaskResponse, +}; +use runtime::Runtime; +use snafu::ResultExt; +use tonic::{Request, Response, Status}; + +mod error; + +/// Builder for [CompactionServiceImpl] +pub struct Builder { + pub runtime: Arc, + pub compaction_runner: CompactionRunnerRef, +} + +impl Builder { + pub fn build(self) -> CompactionServiceImpl { + let Self { + runtime, + compaction_runner, + } = self; + + CompactionServiceImpl { + runtime, + compaction_runner, + } + } +} + +#[derive(Clone)] +pub struct CompactionServiceImpl { + pub runtime: Arc, + pub compaction_runner: CompactionRunnerRef, +} + +#[async_trait] +impl CompactionService for CompactionServiceImpl { + async fn execute_compaction_task( + &self, + request: Request, + ) -> Result, Status> { + let request: Result = request + .into_inner() + .try_into() + .box_err() + .context(ErrWithCause { + code: StatusCode::BadRequest, + msg: "fail to convert the execute compaction task request", + }); + + let mut resp: ExecuteCompactionTaskResponse = ExecuteCompactionTaskResponse::default(); + match request { + Ok(task) => { + let request_id = task.request_id.clone(); + let res = self + .compaction_runner + .run(task) + .await + .box_err() + .with_context(|| ErrWithCause { + code: StatusCode::Internal, + msg: format!("fail to compact task, request:{request_id}"), + }); + + match res { + Ok(res) => { + resp.header = Some(build_ok_header()); + resp.result = Some(ExecResult { + output_file_path: res.output_file_path.into(), + sst_info: Some(res.sst_info.into()), + sst_meta: Some(res.sst_meta.into()), + }); + // TODO(leslie): Add status. + } + Err(e) => { + resp.header = Some(build_err_header(e)); + } + } + } + Err(e) => { + resp.header = Some(build_err_header(e)); + } + } + + Ok(Response::new(resp)) + } +} diff --git a/src/server/src/grpc/mod.rs b/src/server/src/grpc/mod.rs index 7b02a3a2a2..24a181682b 100644 --- a/src/server/src/grpc/mod.rs +++ b/src/server/src/grpc/mod.rs @@ -24,11 +24,14 @@ use std::{ time::Duration, }; +use analytic_engine::compaction::runner::CompactionRunnerRef; use cluster::ClusterRef; use common_types::column_schema; +use compaction_service::CompactionServiceImpl; use futures::FutureExt; use generic_error::GenericError; use horaedbproto::{ + compaction_service::compaction_service_server::CompactionServiceServer, meta_event::meta_event_service_server::MetaEventServiceServer, remote_engine::remote_engine_service_server::RemoteEngineServiceServer, storage::storage_service_server::StorageServiceServer, @@ -60,6 +63,7 @@ use crate::{ }, }; +mod compaction_service; mod meta_event_service; mod metrics; mod remote_engine_service; @@ -105,6 +109,9 @@ pub enum Error { #[snafu(display("Missing wals.\nBacktrace:\n{}", backtrace))] MissingWals { backtrace: Backtrace }, + #[snafu(display("Missing compaction runner.\nBacktrace:\n{}", backtrace))] + MissingCompactionRunner { backtrace: Backtrace }, + #[snafu(display("Missing timeout.\nBacktrace:\n{}", backtrace))] MissingTimeout { backtrace: Backtrace }, @@ -163,6 +170,7 @@ define_result!(Error); pub struct RpcServices { serve_addr: SocketAddr, rpc_server: InterceptedService, AuthWithFile>, + compaction_rpc_server: Option>, meta_rpc_server: Option>, remote_engine_server: RemoteEngineServiceServer, runtime: Arc, @@ -173,6 +181,7 @@ pub struct RpcServices { impl RpcServices { pub async fn start(&mut self) -> Result<()> { let rpc_server = self.rpc_server.clone(); + let compaction_rpc_server = self.compaction_rpc_server.clone(); let meta_rpc_server = self.meta_rpc_server.clone(); let remote_engine_server = self.remote_engine_server.clone(); let serve_addr = self.serve_addr; @@ -182,6 +191,11 @@ impl RpcServices { let mut router = Server::builder().add_service(rpc_server); + if let Some(s) = compaction_rpc_server { + info!("Grpc server serves compaction service"); + router = router.add_service(s); + }; + if let Some(s) = meta_rpc_server { info!("Grpc server serves meta rpc service"); router = router.add_service(s); @@ -226,6 +240,7 @@ pub struct Builder { proxy: Option>, query_dedup_config: Option, hotspot_recorder: Option>, + compaction_runner: Option, } impl Builder { @@ -241,6 +256,7 @@ impl Builder { proxy: None, query_dedup_config: None, hotspot_recorder: None, + compaction_runner: None, } } @@ -294,6 +310,12 @@ impl Builder { self.query_dedup_config = Some(config); self } + + // Compaction runner is an optional field for building [RpcServices]. + pub fn compaction_runner(mut self, runner: Option) -> Self { + self.compaction_runner = runner; + self + } } impl Builder { @@ -301,19 +323,39 @@ impl Builder { let auth = self.auth.context(MissingAuth)?; let runtimes = self.runtimes.context(MissingRuntimes)?; let instance = self.instance.context(MissingInstance)?; - let opened_wals = self.opened_wals.context(MissingWals)?; let proxy = self.proxy.context(MissingProxy)?; let hotspot_recorder = self.hotspot_recorder.context(MissingHotspotRecorder)?; - - let meta_rpc_server = self.cluster.map(|v| { - let builder = meta_event_service::Builder { - cluster: v, - instance: instance.clone(), - runtime: runtimes.meta_runtime.clone(), - opened_wals, - }; - MetaEventServiceServer::new(builder.build()) - }); + let mut meta_rpc_server: Option> = None; + let mut compaction_rpc_server: Option> = + None; + + self.cluster + .map(|v| { + let result: Result<()> = (|| { + // Support meta rpc service. + let opened_wals = self.opened_wals.context(MissingWals)?; + let builder = meta_event_service::Builder { + cluster: v.clone(), + instance: instance.clone(), + runtime: runtimes.meta_runtime.clone(), + opened_wals, + }; + meta_rpc_server = Some(MetaEventServiceServer::new(builder.build())); + + // Support remote compaction rpc service. + let compaction_runner = + self.compaction_runner.context(MissingCompactionRunner)?; + let builder = compaction_service::Builder { + runtime: runtimes.compact_runtime.clone(), + compaction_runner, + }; + compaction_rpc_server = Some(CompactionServiceServer::new(builder.build())); + + Ok(()) + })(); + result + }) + .transpose()?; let remote_engine_server = { let query_dedup = self @@ -349,6 +391,7 @@ impl Builder { Ok(RpcServices { serve_addr, rpc_server, + compaction_rpc_server, meta_rpc_server, remote_engine_server, runtime, diff --git a/src/server/src/server.rs b/src/server/src/server.rs index f7cd72ec7b..bca6c8d151 100644 --- a/src/server/src/server.rs +++ b/src/server/src/server.rs @@ -19,6 +19,7 @@ use std::sync::Arc; +use analytic_engine::compaction::runner::CompactionRunnerRef; use catalog::manager::ManagerRef; use cluster::ClusterRef; use datafusion::execution::{runtime_env::RuntimeConfig, FunctionRegistry}; @@ -251,6 +252,7 @@ pub struct Builder { opened_wals: Option, remote_engine: Option, datatfusion_context: Option, + compaction_runner: Option, } impl Builder { @@ -274,6 +276,7 @@ impl Builder { opened_wals: None, remote_engine: None, datatfusion_context: None, + compaction_runner: None, } } @@ -368,6 +371,11 @@ impl Builder { self } + pub fn compaction_runner(mut self, runner: CompactionRunnerRef) -> Self { + self.compaction_runner = Some(runner); + self + } + /// Build and run the server pub fn build(self) -> Result { // Build instance @@ -527,6 +535,7 @@ impl Builder { .proxy(proxy) .hotspot_recorder(hotspot_recorder) .query_dedup(self.server_config.query_dedup) + .compaction_runner(self.compaction_runner.clone()) .build() .context(BuildGrpcService)?; diff --git a/src/table_engine/src/predicate.rs b/src/table_engine/src/predicate.rs index b316b99e24..3a3294fcd9 100644 --- a/src/table_engine/src/predicate.rs +++ b/src/table_engine/src/predicate.rs @@ -112,7 +112,7 @@ impl Predicate { impl TryFrom<&Predicate> for horaedbproto::remote_engine::Predicate { type Error = Error; - fn try_from(predicate: &Predicate) -> std::result::Result { + fn try_from(predicate: &Predicate) -> Result { let time_range = predicate.time_range; let mut exprs = Vec::with_capacity(predicate.exprs.len()); for expr in &predicate.exprs { @@ -135,9 +135,7 @@ impl TryFrom<&Predicate> for horaedbproto::remote_engine::Predicate { impl TryFrom for Predicate { type Error = Error; - fn try_from( - pb: horaedbproto::remote_engine::Predicate, - ) -> std::result::Result { + fn try_from(pb: horaedbproto::remote_engine::Predicate) -> Result { let time_range = pb.time_range.context(EmptyTimeRange)?; let mut exprs = Vec::with_capacity(pb.exprs.len()); for pb_expr in pb.exprs { diff --git a/src/table_engine/src/table.rs b/src/table_engine/src/table.rs index 3c611b4395..6526579ab5 100644 --- a/src/table_engine/src/table.rs +++ b/src/table_engine/src/table.rs @@ -307,6 +307,12 @@ impl From for TableId { } } +impl From for u64 { + fn from(id: TableId) -> Self { + id.0 + } +} + impl fmt::Display for TableId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) From 63c4e9bb1c546aad89350c56987fbb4204147622 Mon Sep 17 00:00:00 2001 From: Leslie Su <3530611790@qq.com> Date: Wed, 30 Oct 2024 09:20:36 +0800 Subject: [PATCH 4/4] test: add integration test for compaction offload (#1573) ## Rationale Close #1571 ## Detailed Changes - Impl `compact` as pre-command for sqlness, with http compaction service. - update integration tests to test compaction offload. ## Test Plan --------- Co-authored-by: kamille --- integration_tests/Makefile | 16 ++- integration_tests/README.md | 3 + .../compaction_offload/compact/compact.result | 110 ++++++++++++++++++ .../compaction_offload/compact/compact.sql | 76 ++++++++++++ .../cases/env/compaction_offload/config.toml | 44 +++++++ .../config/compaction-offload.toml | 44 +++++++ integration_tests/src/database.rs | 51 ++++++++ integration_tests/src/main.rs | 9 +- src/server/src/http.rs | 49 ++++++++ 9 files changed, 398 insertions(+), 4 deletions(-) create mode 100644 integration_tests/cases/env/compaction_offload/compact/compact.result create mode 100644 integration_tests/cases/env/compaction_offload/compact/compact.sql create mode 100644 integration_tests/cases/env/compaction_offload/config.toml create mode 100644 integration_tests/config/compaction-offload.toml diff --git a/integration_tests/Makefile b/integration_tests/Makefile index fe7fbcdcb8..505f8380d5 100644 --- a/integration_tests/Makefile +++ b/integration_tests/Makefile @@ -21,6 +21,7 @@ HORAEDB_DATA_DIR = /tmp/horaedb HORAEDB_DATA_DIR_0 = /tmp/horaedb0 HORAEDB_DATA_DIR_1 = /tmp/horaedb1 HORAEMETA_DATA_DIR = /tmp/horaemeta +HORAEDB_DATA_DIR_2 = /tmp/compaction-offload export HORAEDB_TEST_CASE_PATH ?= $(ROOT)/cases/env export HORAEDB_TEST_BINARY ?= $(ROOT)/../target/$(MODE)/horaedb-test @@ -42,13 +43,17 @@ export CLUSTER_HORAEDB_STDOUT_FILE_0 ?= /tmp/horaedb-stdout-0.log export CLUSTER_HORAEDB_STDOUT_FILE_1 ?= /tmp/horaedb-stdout-1.log export RUST_BACKTRACE=1 +# Environment variables for compaction offload +export HORAEDB_STDOUT_FILE_2 ?= /tmp/horaedb-stdout-2.log +export HORAEDB_CONFIG_FILE_2 ?= $(ROOT)/config/compaction-offload.toml + # Whether update related repos # We don't want to rebuild the binaries and data on sometimes(e.g. debugging in local), # and we can set it to false. export UPDATE_REPOS_TO_LATEST ?= true clean: - rm -rf $(HORAEDB_DATA_DIR) $(HORAEDB_DATA_DIR_0) $(HORAEDB_DATA_DIR_1) $(HORAEMETA_DATA_DIR) + rm -rf $(HORAEDB_DATA_DIR) $(HORAEDB_DATA_DIR_0) $(HORAEDB_DATA_DIR_1) $(HORAEMETA_DATA_DIR) $(HORAEDB_DATA_DIR_2) build-meta: ./build_meta.sh @@ -80,8 +85,10 @@ run-horaedb-cluster: build-horaedb nohup ${HORAEDB_BINARY_PATH} --config ${HORAEDB_CONFIG_FILE_1} > ${CLUSTER_HORAEDB_STDOUT_FILE_1} 2>&1 & sleep 30 -run: prepare build-meta - $(HORAEDB_TEST_BINARY) +run: + make run-local + make run-cluster + make run-compaction-offload run-local: prepare HORAEDB_ENV_FILTER=local $(HORAEDB_TEST_BINARY) @@ -89,6 +96,9 @@ run-local: prepare run-cluster: prepare build-meta HORAEDB_ENV_FILTER=cluster $(HORAEDB_TEST_BINARY) +run-compaction-offload: prepare + HORAEDB_ENV_FILTER=compaction_offload $(HORAEDB_TEST_BINARY) + run-java: java -version cd sdk/java && MAVEN_OPTS="--add-opens=java.base/java.nio=ALL-UNNAMED" mvn clean compile exec:java diff --git a/integration_tests/README.md b/integration_tests/README.md index 3c87cda7d4..a3dc758392 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -12,6 +12,9 @@ make run-local # Only cluster env make run-cluster + +# Only compaction offload env +make run-compaction-offload ``` `horaedb-test` will recursively find all the files end with `.sql` and run it. Each file will be treated as a case. A file can contain multiple SQLs. When finished it will tell how many cases it run, and display the diff set if there is any. An example with one case: diff --git a/integration_tests/cases/env/compaction_offload/compact/compact.result b/integration_tests/cases/env/compaction_offload/compact/compact.result new file mode 100644 index 0000000000..9f4d91b488 --- /dev/null +++ b/integration_tests/cases/env/compaction_offload/compact/compact.result @@ -0,0 +1,110 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- +DROP TABLE IF EXISTS `compact_table1`; + +affected_rows: 0 + +CREATE TABLE `compact_table1` ( + `timestamp` timestamp NOT NULL, + `value` double, + `dic` string dictionary, + timestamp KEY (timestamp)) ENGINE=Analytic +WITH( + enable_ttl='false', + update_mode='OVERWRITE' +); + +affected_rows: 0 + +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (1, 100, "d1"), (2, 200, "d2"), (3, 300, "d3"); + +affected_rows: 3 + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (1, 100, "update_d1"), (2, 200, "update_d2"), (3, 300, "update_d3"); + +affected_rows: 3 + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (4, 400, "d4"), (5, 500, "d5"), (6, 600, "d6"); + +affected_rows: 3 + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (4, 400, "update_d4"), (5, 500, "update_d5"), (6, 600, "update_d6"); + +affected_rows: 3 + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (7, 700, "d7"), (8, 800, "d8"), (9, 900, "d9"); + +affected_rows: 3 + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (7, 700, "update_d7"), (8, 800, "update_d8"), (9, 900, "update_d9"); + +affected_rows: 3 + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (10, 1000, "d10"), (11, 1100, "d11"), (12, 1200, "d12"); + +affected_rows: 3 + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (10, 1000, "update_d10"), (11, 1100, "update_d11"), (12, 1200, "update_d12"); + +affected_rows: 3 + +-- trigger manual compaction after flush memtable +-- SQLNESS ARG pre_cmd=flush +-- SQLNESS ARG pre_cmd=compact +SELECT + * +FROM + `compact_table1` +ORDER BY + `value` ASC; + +tsid,timestamp,value,dic, +UInt64(0),Timestamp(1),Double(100.0),String("update_d1"), +UInt64(0),Timestamp(2),Double(200.0),String("update_d2"), +UInt64(0),Timestamp(3),Double(300.0),String("update_d3"), +UInt64(0),Timestamp(4),Double(400.0),String("update_d4"), +UInt64(0),Timestamp(5),Double(500.0),String("update_d5"), +UInt64(0),Timestamp(6),Double(600.0),String("update_d6"), +UInt64(0),Timestamp(7),Double(700.0),String("update_d7"), +UInt64(0),Timestamp(8),Double(800.0),String("update_d8"), +UInt64(0),Timestamp(9),Double(900.0),String("update_d9"), +UInt64(0),Timestamp(10),Double(1000.0),String("update_d10"), +UInt64(0),Timestamp(11),Double(1100.0),String("update_d11"), +UInt64(0),Timestamp(12),Double(1200.0),String("update_d12"), + + +DROP TABLE `compact_table1`; + +affected_rows: 0 + diff --git a/integration_tests/cases/env/compaction_offload/compact/compact.sql b/integration_tests/cases/env/compaction_offload/compact/compact.sql new file mode 100644 index 0000000000..f0aa46fbb5 --- /dev/null +++ b/integration_tests/cases/env/compaction_offload/compact/compact.sql @@ -0,0 +1,76 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- + +DROP TABLE IF EXISTS `compact_table1`; + +CREATE TABLE `compact_table1` ( + `timestamp` timestamp NOT NULL, + `value` double, + `dic` string dictionary, + timestamp KEY (timestamp)) ENGINE=Analytic +WITH( + enable_ttl='false', + update_mode='OVERWRITE' +); + + +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (1, 100, "d1"), (2, 200, "d2"), (3, 300, "d3"); + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (1, 100, "update_d1"), (2, 200, "update_d2"), (3, 300, "update_d3"); + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (4, 400, "d4"), (5, 500, "d5"), (6, 600, "d6"); + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (4, 400, "update_d4"), (5, 500, "update_d5"), (6, 600, "update_d6"); + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (7, 700, "d7"), (8, 800, "d8"), (9, 900, "d9"); + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (7, 700, "update_d7"), (8, 800, "update_d8"), (9, 900, "update_d9"); + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (10, 1000, "d10"), (11, 1100, "d11"), (12, 1200, "d12"); + +-- SQLNESS ARG pre_cmd=flush +INSERT INTO `compact_table1` (`timestamp`, `value`, `dic`) + VALUES (10, 1000, "update_d10"), (11, 1100, "update_d11"), (12, 1200, "update_d12"); + + +-- trigger manual compaction after flush memtable +-- SQLNESS ARG pre_cmd=flush +-- SQLNESS ARG pre_cmd=compact +SELECT + * +FROM + `compact_table1` +ORDER BY + `value` ASC; + + +DROP TABLE `compact_table1`; diff --git a/integration_tests/cases/env/compaction_offload/config.toml b/integration_tests/cases/env/compaction_offload/config.toml new file mode 100644 index 0000000000..044e6af1a5 --- /dev/null +++ b/integration_tests/cases/env/compaction_offload/config.toml @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[server] +bind_addr = "127.0.0.1" +http_port = 5440 +grpc_port = 8831 + +[query_engine] +read_parallelism = 8 + +[analytic.wal] +type = "RocksDB" +data_dir = "/tmp/horaedb" + +[analytic.storage] +mem_cache_capacity = '1G' +mem_cache_partition_bits = 0 +disk_cache_dir = "/tmp/horaedb" +disk_cache_capacity = '2G' +disk_cache_page_size = '1M' + +[analytic.storage.object_store] +type = "Local" +data_dir = "/tmp/horaedb" + +[analytic.compaction_mode] +compaction_mode = "Offload" +node_picker = "Local" +endpoint = "127.0.0.1:8831" diff --git a/integration_tests/config/compaction-offload.toml b/integration_tests/config/compaction-offload.toml new file mode 100644 index 0000000000..1cb8fbb214 --- /dev/null +++ b/integration_tests/config/compaction-offload.toml @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[server] +bind_addr = "0.0.0.0" +http_port = 5440 +grpc_port = 8831 +postgresql_port = 5433 + +[logger] +level = "info" + +[tracing] +dir = "/tmp/compaction-offload" + +[analytic.storage.object_store] +type = "Local" +data_dir = "/tmp/compaction-offload" + +[analytic.wal] +type = "Local" +data_dir = "/tmp/compaction-offload" + +[analytic.compaction_mode] +compaction_mode = "Offload" +node_picker = "Local" +endpoint = "127.0.0.1:8831" + +[analytic] +enable_primary_key_sampling = true diff --git a/integration_tests/src/database.rs b/integration_tests/src/database.rs index 2020cd84d7..e598a46ae7 100644 --- a/integration_tests/src/database.rs +++ b/integration_tests/src/database.rs @@ -43,6 +43,9 @@ const CLUSTER_HORAEDB_STDOUT_FILE_0_ENV: &str = "CLUSTER_HORAEDB_STDOUT_FILE_0"; const CLUSTER_HORAEDB_STDOUT_FILE_1_ENV: &str = "CLUSTER_HORAEDB_STDOUT_FILE_1"; const CLUSTER_HORAEDB_HEALTH_CHECK_INTERVAL_SECONDS: usize = 5; +const HORAEDB_STDOUT_FILE_2_ENV: &str = "HORAEDB_STDOUT_FILE_2"; +const HORAEDB_CONFIG_FILE_2_ENV: &str = "HORAEDB_CONFIG_FILE_2"; + const HORAEDB_SERVER_ADDR: &str = "HORAEDB_SERVER_ADDR"; // Used to access HoraeDB by http service. @@ -82,6 +85,10 @@ pub struct HoraeDBCluster { meta_stable_check_sql: String, } +pub struct HoraeDBCompactionOffload { + server: HoraeDBServer, +} + impl HoraeDBServer { fn spawn(bin: String, config: String, stdout: String) -> Self { let local_ip = local_ip_address::local_ip() @@ -231,6 +238,29 @@ impl Backend for HoraeDBCluster { } } +#[async_trait] +impl Backend for HoraeDBCompactionOffload { + fn start() -> Self { + let config = env::var(HORAEDB_CONFIG_FILE_2_ENV).expect("Cannot parse horaedb2 config env"); + let bin = env::var(HORAEDB_BINARY_PATH_ENV).expect("Cannot parse binary path env"); + let stdout = env::var(HORAEDB_STDOUT_FILE_2_ENV).expect("Cannot parse stdout2 env"); + Self { + server: HoraeDBServer::spawn(bin, config, stdout), + } + } + + async fn wait_for_ready(&self) { + tokio::time::sleep(Duration::from_secs(10)).await + } + + fn stop(&mut self) { + self.server + .server_process + .kill() + .expect("Failed to kill server"); + } +} + pub struct HoraeDB { backend: T, db_client: Arc, @@ -264,6 +294,7 @@ impl TryFrom<&str> for Protocol { #[derive(Debug, Clone, Copy)] enum Command { Flush, + Compact, } impl TryFrom<&str> for Command { @@ -272,6 +303,7 @@ impl TryFrom<&str> for Command { fn try_from(s: &str) -> Result { let cmd = match s { "flush" => Self::Flush, + "compact" => Self::Compact, _ => return Err(format!("Unknown command:{s}")), }; @@ -305,6 +337,12 @@ impl Database for HoraeDB { panic!("Execute flush command failed, err:{e}"); } } + Command::Compact => { + println!("Compact table..."); + if let Err(e) = self.execute_compact().await { + panic!("Execute compact command failed, err:{e}"); + } + } } } @@ -363,6 +401,19 @@ impl HoraeDB { Err(resp.text().await.unwrap_or_else(|e| format!("{e:?}"))) } + async fn execute_compact(&self) -> Result<(), String> { + // TODO(leslie): Improve code reusability. The following code is similar to + // `execute_flush()`. + let url = format!("http://{}/debug/compact_table", self.http_client.endpoint); + let resp = self.http_client.client.post(url).send().await.unwrap(); + + if resp.status() == StatusCode::OK { + return Ok(()); + } + + Err(resp.text().await.unwrap_or_else(|e| format!("{e:?}"))) + } + async fn execute_influxql( query: String, http_client: HttpClient, diff --git a/integration_tests/src/main.rs b/integration_tests/src/main.rs index 7098712715..e2c63f10c6 100644 --- a/integration_tests/src/main.rs +++ b/integration_tests/src/main.rs @@ -21,7 +21,7 @@ use std::{env, fmt::Display, path::Path}; use anyhow::Result; use async_trait::async_trait; -use database::{Backend, HoraeDB}; +use database::{Backend, HoraeDB, HoraeDBCompactionOffload}; use sqlness::{Database, EnvController, QueryContext, Runner}; use crate::database::{HoraeDBCluster, HoraeDBServer}; @@ -65,6 +65,9 @@ impl EnvController for HoraeDBController { let db = match env { "local" => Box::new(HoraeDB::::create().await) as DbRef, "cluster" => Box::new(HoraeDB::::create().await) as DbRef, + "compaction_offload" => { + Box::new(HoraeDB::::create().await) as DbRef + } _ => panic!("invalid env {env}"), }; @@ -103,6 +106,10 @@ async fn main() -> Result<()> { "build_local" => { let _ = controller.start("local", None).await; } + // Just build the compaction offload testing env. + "build_compaction_offload" => { + let _ = controller.start("compaction_offload", None).await; + } other => { panic!("Unknown run mode:{other}") } diff --git a/src/server/src/http.rs b/src/server/src/http.rs index 83dad8785d..d31f5adec0 100644 --- a/src/server/src/http.rs +++ b/src/server/src/http.rs @@ -247,6 +247,7 @@ impl Service { .or(self.admin_block()) // debug APIs .or(self.flush_memtable()) + .or(self.compact_table()) .or(self.update_log_level()) .or(self.profile_cpu()) .or(self.profile_heap()) @@ -524,6 +525,54 @@ impl Service { }) } + // POST /debug/compact_table + fn compact_table( + &self, + ) -> impl Filter + Clone { + warp::path!("debug" / "compact_table") + .and(warp::post()) + .and(self.with_instance()) + .and_then(|instance: InstanceRef| async move { + let get_all_tables = || { + let mut tables = Vec::new(); + for catalog in instance + .catalog_manager + .all_catalogs() + .box_err() + .context(Internal)? + { + for schema in catalog.all_schemas().box_err().context(Internal)? { + for table in schema.all_tables().box_err().context(Internal)? { + tables.push(table); + } + } + } + Result::Ok(tables) + }; + match get_all_tables() { + Ok(tables) => { + let mut failed_tables = Vec::new(); + let mut success_tables = Vec::new(); + + for table in tables { + let table_name = table.name().to_string(); + if let Err(e) = table.compact().await { + error!("compact {} failed, err:{}", &table_name, e); + failed_tables.push(table_name); + } else { + success_tables.push(table_name); + } + } + let mut result = HashMap::new(); + result.insert("success", success_tables); + result.insert("failed", failed_tables); + Ok(reply::json(&result)) + } + Err(e) => Err(reject::custom(e)), + } + }) + } + // GET /metrics fn metrics( &self,