Skip to content

Commit

Permalink
fix: remove redundancy length field in wal record (apache#1576)
Browse files Browse the repository at this point in the history
## Rationale
`length` field is not required in wal record, it's duplicated with
value_length.

## Detailed Changes
- Remove length from wal record
- Remove rocksdb-wal from default features

## Test Plan
CI and manually do some benchmark with
[avalanche](https://github.com/prometheus-community/avalanche)
  • Loading branch information
jiacai2050 authored Oct 17, 2024
1 parent a90745e commit 4642a35
Show file tree
Hide file tree
Showing 20 changed files with 78 additions and 100 deletions.
2 changes: 1 addition & 1 deletion .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ github:
protected_branches:
main:
required_pull_request_reviews:
dismiss_stale_reviews: true
dismiss_stale_reviews: false
required_approving_review_count: 1
protected_tags: []

Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ on:
- 'Cargo.lock'
- '.github/workflows/ci.yml'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

# Common environment variables
env:
RUSTFLAGS: "-C debuginfo=1"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tsbs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- name: Setup Build Environment
run: |
sudo apt update
sudo apt install --yes protobuf-compiler
sudo apt install --yes protobuf-compiler liblzma-dev
- name: Build server
run: |
make build
Expand Down
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ members = [
"src/wal"
]

default-members = ["src/horaedb"]

[workspace.dependencies]
alloc_tracker = { path = "src/components/alloc_tracker" }
arrow = { version = "49.0.0", features = ["prettyprint"] }
Expand Down
2 changes: 1 addition & 1 deletion docs/example-cluster-0.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ type = "Local"
data_dir = "/tmp/horaedb0"

[analytic.wal]
type = "RocksDB"
type = "Local"
data_dir = "/tmp/horaedb0"

[cluster_deployment]
Expand Down
2 changes: 1 addition & 1 deletion docs/example-cluster-1.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type = "Local"
data_dir = "/tmp/horaedb1"

[analytic.wal]
type = "RocksDB"
type = "Local"
data_dir = "/tmp/horaedb1"

[cluster_deployment]
Expand Down
3 changes: 1 addition & 2 deletions docs/example-standalone-static-routing.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ max_replay_tables_per_batch = 1024
write_group_command_channel_cap = 1024

[analytic.wal]
type = "RocksDB"
type = "Local"
data_dir = "/tmp/horaedb1"

[analytic.storage]
Expand Down Expand Up @@ -91,4 +91,3 @@ shards = [ 1 ]
[limiter]
write_block_list = ['mytable1']
read_block_list = ['mytable1']

2 changes: 1 addition & 1 deletion docs/minimal.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type = "Local"
data_dir = "/tmp/horaedb"

[analytic.wal]
type = "RocksDB"
type = "Local"
data_dir = "/tmp/horaedb"

[analytic]
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ build-meta:
./build_meta.sh

build-horaedb:
cd .. && cargo build --bin horaedb-server --features wal-table-kv,wal-message-queue,wal-rocksdb,wal-local-storage
cd .. && make build-debug

build-test:
cargo build
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,23 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";

plan_type,plan,
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=xx\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=xx\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),


-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
-- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");

plan_type,plan,
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),


ALTER TABLE partition_table_t ADD COLUMN (b string);
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/cases/env/cluster/ddl/partition_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,15 @@ SELECT * from partition_table_t where name in ("horaedb5", "horaedb6", "horaedb7
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";

-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
-- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");

ALTER TABLE partition_table_t ADD COLUMN (b string);
Expand Down
8 changes: 4 additions & 4 deletions integration_tests/cases/env/local/ddl/query-plan.result
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ explain analyze select t from `03_dml_select_real_time_range`
where t > 1695348001000;

plan_type,plan,
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t]:\n=0]\n"),


-- This query should have higher priority
Expand All @@ -60,7 +60,7 @@ explain analyze select t from `03_dml_select_real_time_range`
where t >= 1695348001000 and t < 1695348002000;

plan_type,plan,
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t]:\n=0]\n"),


-- This query should have higher priority
Expand All @@ -70,7 +70,7 @@ explain analyze select name from `03_dml_select_real_time_range`
where t >= 1695348001000 and t < 1695348002000;

plan_type,plan,
String("Plan with Metrics"),String("ProjectionExec: expr=[name@0 as name], metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t,name]:\n=0]\n"),
String("Plan with Metrics"),String("ProjectionExec: expr=[name@0 as name], metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t,name]:\n=0]\n"),


-- This query should not include memtable
Expand Down Expand Up @@ -135,7 +135,7 @@ explain analyze select t from `03_append_mode_table`
where t >= 1695348001000 and name = 'ceresdb';

plan_type,plan,
String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetched_columns:[t,name]:\n=0]\n"),
String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_166, fetched_columns:[t,name]:\n=0]\n"),


-- Should just fetch projected columns from SST
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/config/horaedb-cluster-0.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ type = "Local"
data_dir = "/tmp/horaedb0"

[analytic.wal]
type = "RocksDB"
type = "Local"
data_dir = "/tmp/horaedb0"

[cluster_deployment]
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/config/horaedb-cluster-1.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type = "Local"
data_dir = "/tmp/horaedb1"

[analytic.wal]
type = "RocksDB"
type = "Local"
data_dir = "/tmp/horaedb1"

[cluster_deployment]
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/config/shard-based-recovery.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ type = "Local"
data_dir = "/tmp/horaedb"

[analytic.wal]
type = "RocksDB"
type = "Local"
data_dir = "/tmp/horaedb"
2 changes: 1 addition & 1 deletion src/horaedb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ workspace = true
workspace = true

[features]
default = ["wal-rocksdb", "wal-table-kv", "wal-message-queue", "wal-local-storage"]
default = ["wal-table-kv", "wal-message-queue", "wal-local-storage"]
wal-table-kv = ["wal/wal-table-kv", "analytic_engine/wal-table-kv"]
wal-message-queue = ["wal/wal-message-queue", "analytic_engine/wal-message-queue"]
wal-rocksdb = ["wal/wal-rocksdb", "analytic_engine/wal-rocksdb"]
Expand Down
2 changes: 1 addition & 1 deletion src/wal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ workspace = true

[dependencies.rocksdb]
git = "https://github.com/tikv/rust-rocksdb.git"
rev = "f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f"
rev = "85e79e52c6ad80b8c547fcb90b3cade64f141fac"
features = ["portable"]
optional = true

Expand Down
Loading

0 comments on commit 4642a35

Please sign in to comment.