From 1a3c2cbd177dd7ab1346eb0abf81554e3d4d68e8 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Tue, 23 Apr 2024 06:42:43 +0200 Subject: [PATCH] Use non-orderby compressed metadata in compressed DML Currently the additional metadata derived from index columns are only used for the qualifier pushdown in querying but not the decompression during compressed DML. This patch makes use of this metadata for compressed DML as well. This will lead to considerable speedup when deleting or updating compressed chunks with filters on non-segmentby columns. --- .unreleased/pr_6849 | 1 + tsl/src/compression/compression.c | 32 +++++++++++++++---- .../expected/compression_update_delete.out | 29 +++++++++++++++++ tsl/test/sql/compression_update_delete.sql | 13 ++++++++ 4 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 .unreleased/pr_6849 diff --git a/.unreleased/pr_6849 b/.unreleased/pr_6849 new file mode 100644 index 00000000000..78ad70209d9 --- /dev/null +++ b/.unreleased/pr_6849 @@ -0,0 +1 @@ +Fixes: #6849 Use non-orderby compressed metadata in compressed DML diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 08214bbf60d..33c1d7ad440 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2607,17 +2607,31 @@ fill_predicate_context(Chunk *ch, CompressionSettings *settings, List *predicate false)); /* is_null */ } } + continue; } - else if (ts_array_is_member(settings->fd.orderby, column_name)) + + int min_attno = compressed_column_metadata_attno(settings, + ch->table_id, + var->varattno, + settings->fd.relid, + "min"); + int max_attno = compressed_column_metadata_attno(settings, + ch->table_id, + var->varattno, + settings->fd.relid, + "max"); + + if (min_attno != InvalidAttrNumber && max_attno != InvalidAttrNumber) { - int16 index = ts_array_position(settings->fd.orderby, column_name); switch (op_strategy) { case BTEqualStrategyNumber: { /* orderby col = value implies min <= value and max >= value */ *heap_filters = lappend(*heap_filters, - make_batchfilter(column_segment_min_name(index), + make_batchfilter(get_attname(settings->fd.relid, + min_attno, + false), BTLessEqualStrategyNumber, collation, opcode, @@ -2625,7 +2639,9 @@ fill_predicate_context(Chunk *ch, CompressionSettings *settings, List *predicate false, /* is_null_check */ false)); /* is_null */ *heap_filters = lappend(*heap_filters, - make_batchfilter(column_segment_max_name(index), + make_batchfilter(get_attname(settings->fd.relid, + max_attno, + false), BTGreaterEqualStrategyNumber, collation, opcode, @@ -2639,7 +2655,9 @@ fill_predicate_context(Chunk *ch, CompressionSettings *settings, List *predicate { /* orderby col <[=] value implies min <[=] value */ *heap_filters = lappend(*heap_filters, - make_batchfilter(column_segment_min_name(index), + make_batchfilter(get_attname(settings->fd.relid, + min_attno, + false), op_strategy, collation, opcode, @@ -2653,7 +2671,9 @@ fill_predicate_context(Chunk *ch, CompressionSettings *settings, List *predicate { /* orderby col >[=] value implies max >[=] value */ *heap_filters = lappend(*heap_filters, - make_batchfilter(column_segment_max_name(index), + make_batchfilter(get_attname(settings->fd.relid, + max_attno, + false), op_strategy, collation, opcode, diff --git a/tsl/test/expected/compression_update_delete.out b/tsl/test/expected/compression_update_delete.out index f66bd264f86..559d1aeb4c2 100644 --- a/tsl/test/expected/compression_update_delete.out +++ b/tsl/test/expected/compression_update_delete.out @@ -2915,3 +2915,32 @@ EXPLAIN (costs off) SELECT * FROM test_partials ORDER BY time; (14 rows) DROP TABLE test_partials; +CREATE TABLE test_meta_filters(time timestamptz NOT NULL, device text, metric text, v1 float, v2 float); +CREATE INDEX ON test_meta_filters(device, metric, v1); +SELECT create_hypertable('test_meta_filters', 'time'); + create_hypertable +--------------------------------- + (37,public,test_meta_filters,t) +(1 row) + +ALTER TABLE test_meta_filters SET (timescaledb.compress, timescaledb.compress_segmentby='device', timescaledb.compress_orderby='metric,time'); +INSERT INTO test_meta_filters SELECT '2020-01-01'::timestamptz,'d1','m' || metric::text,v1,v2 FROM generate_series(1,3,1) metric, generate_series(1,1000,1) v1, generate_series(1,10,1) v2 ORDER BY 1,2,3,4,5; +SELECT compress_chunk(show_chunks('test_meta_filters')); + compress_chunk +------------------------------------------ + _timescaledb_internal._hyper_37_77_chunk +(1 row) + +EXPLAIN (analyze, timing off, costs off, summary off) DELETE FROM test_meta_filters WHERE device = 'd1' AND metric = 'm1' AND v1 < 100; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 1 + Tuples decompressed: 1000 + -> Delete on test_meta_filters (actual rows=0 loops=1) + Delete on _hyper_37_77_chunk test_meta_filters_1 + -> Seq Scan on _hyper_37_77_chunk test_meta_filters_1 (actual rows=990 loops=1) + Filter: ((v1 < '100'::double precision) AND (device = 'd1'::text) AND (metric = 'm1'::text)) + Rows Removed by Filter: 10 +(8 rows) + diff --git a/tsl/test/sql/compression_update_delete.sql b/tsl/test/sql/compression_update_delete.sql index c03ea2c006d..4eb7a1a83fd 100644 --- a/tsl/test/sql/compression_update_delete.sql +++ b/tsl/test/sql/compression_update_delete.sql @@ -1480,3 +1480,16 @@ SELECT compress_chunk(show_chunks('test_partials')); -- fully compressed EXPLAIN (costs off) SELECT * FROM test_partials ORDER BY time; DROP TABLE test_partials; + +CREATE TABLE test_meta_filters(time timestamptz NOT NULL, device text, metric text, v1 float, v2 float); +CREATE INDEX ON test_meta_filters(device, metric, v1); +SELECT create_hypertable('test_meta_filters', 'time'); + +ALTER TABLE test_meta_filters SET (timescaledb.compress, timescaledb.compress_segmentby='device', timescaledb.compress_orderby='metric,time'); + +INSERT INTO test_meta_filters SELECT '2020-01-01'::timestamptz,'d1','m' || metric::text,v1,v2 FROM generate_series(1,3,1) metric, generate_series(1,1000,1) v1, generate_series(1,10,1) v2 ORDER BY 1,2,3,4,5; + +SELECT compress_chunk(show_chunks('test_meta_filters')); + +EXPLAIN (analyze, timing off, costs off, summary off) DELETE FROM test_meta_filters WHERE device = 'd1' AND metric = 'm1' AND v1 < 100; +