From e6ab6900e3278f0495b52ea4dc1576eb37452e23 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Fri, 28 Jun 2024 10:03:18 +0200 Subject: [PATCH] ALP: correctly skip when we are skipping fewer values than in a vector --- .../storage/compression/alp/alp_scan.hpp | 5 +- .../storage/compression/alprd/alprd_scan.hpp | 5 +- .../compression/alp/alp_list_skip.test | 127 ++++++++++++++++++ 3 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 test/sql/storage/compression/alp/alp_list_skip.test diff --git a/src/include/duckdb/storage/compression/alp/alp_scan.hpp b/src/include/duckdb/storage/compression/alp/alp_scan.hpp index a2180075237..9a269558880 100644 --- a/src/include/duckdb/storage/compression/alp/alp_scan.hpp +++ b/src/include/duckdb/storage/compression/alp/alp_scan.hpp @@ -177,12 +177,11 @@ struct AlpScanState : public SegmentScanState { public: //! Skip the next 'skip_count' values, we don't store the values void Skip(ColumnSegment &col_segment, idx_t skip_count) { - if (total_value_count != 0 && !VectorFinished()) { // Finish skipping the current vector - idx_t to_skip = LeftInVector(); - skip_count -= to_skip; + idx_t to_skip = MinValue(skip_count, LeftInVector()); ScanVector(nullptr, to_skip); + skip_count -= to_skip; } // Figure out how many entire vectors we can skip // For these vectors, we don't even need to process the metadata or values diff --git a/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp b/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp index 7d169895e99..05b35005db9 100644 --- a/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +++ b/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp @@ -184,12 +184,11 @@ struct AlpRDScanState : public SegmentScanState { public: //! Skip the next 'skip_count' values, we don't store the values void Skip(ColumnSegment &col_segment, idx_t skip_count) { - if (total_value_count != 0 && !VectorFinished()) { // Finish skipping the current vector - idx_t to_skip = LeftInVector(); - skip_count -= to_skip; + idx_t to_skip = MinValue(skip_count, LeftInVector()); ScanVector(nullptr, to_skip); + skip_count -= to_skip; } // Figure out how many entire vectors we can skip // For these vectors, we don't even need to process the metadata or values diff --git a/test/sql/storage/compression/alp/alp_list_skip.test b/test/sql/storage/compression/alp/alp_list_skip.test new file mode 100644 index 00000000000..987e7636bd8 --- /dev/null +++ b/test/sql/storage/compression/alp/alp_list_skip.test @@ -0,0 +1,127 @@ +# name: test/sql/storage/compression/alp/alp_list_skip.test +# description: Test skipping of small lists in alp +# group: [alp] + +# load the DB from disk +load __TEST_DIR__/test_alp_list_skip.db + +foreach comp alp alprd + +statement ok +SET force_compression='${comp}' + +# Create a table with random doubles of limited precision compressed as Uncompressed +# This data should achieve x6 compression ratio +statement ok +create or replace table list_doubles as select 5700 i, [5700.0] l UNION ALL select i, CASE WHEN i%128=0 THEN [i::DOUBLE] ELSE []::DOUBLE[] END as data from range(10000) tbl(i) union all select 5700, [i] FROM range(100) tbl(i); + +statement ok +checkpoint + +query II +SELECT * FROM list_doubles WHERE i=5700 +---- +5700 [5700.0] +5700 [] +5700 [0.0] +5700 [1.0] +5700 [2.0] +5700 [3.0] +5700 [4.0] +5700 [5.0] +5700 [6.0] +5700 [7.0] +5700 [8.0] +5700 [9.0] +5700 [10.0] +5700 [11.0] +5700 [12.0] +5700 [13.0] +5700 [14.0] +5700 [15.0] +5700 [16.0] +5700 [17.0] +5700 [18.0] +5700 [19.0] +5700 [20.0] +5700 [21.0] +5700 [22.0] +5700 [23.0] +5700 [24.0] +5700 [25.0] +5700 [26.0] +5700 [27.0] +5700 [28.0] +5700 [29.0] +5700 [30.0] +5700 [31.0] +5700 [32.0] +5700 [33.0] +5700 [34.0] +5700 [35.0] +5700 [36.0] +5700 [37.0] +5700 [38.0] +5700 [39.0] +5700 [40.0] +5700 [41.0] +5700 [42.0] +5700 [43.0] +5700 [44.0] +5700 [45.0] +5700 [46.0] +5700 [47.0] +5700 [48.0] +5700 [49.0] +5700 [50.0] +5700 [51.0] +5700 [52.0] +5700 [53.0] +5700 [54.0] +5700 [55.0] +5700 [56.0] +5700 [57.0] +5700 [58.0] +5700 [59.0] +5700 [60.0] +5700 [61.0] +5700 [62.0] +5700 [63.0] +5700 [64.0] +5700 [65.0] +5700 [66.0] +5700 [67.0] +5700 [68.0] +5700 [69.0] +5700 [70.0] +5700 [71.0] +5700 [72.0] +5700 [73.0] +5700 [74.0] +5700 [75.0] +5700 [76.0] +5700 [77.0] +5700 [78.0] +5700 [79.0] +5700 [80.0] +5700 [81.0] +5700 [82.0] +5700 [83.0] +5700 [84.0] +5700 [85.0] +5700 [86.0] +5700 [87.0] +5700 [88.0] +5700 [89.0] +5700 [90.0] +5700 [91.0] +5700 [92.0] +5700 [93.0] +5700 [94.0] +5700 [95.0] +5700 [96.0] +5700 [97.0] +5700 [98.0] +5700 [99.0] + +endloop \ No newline at end of file