diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 81c69215472a..f4e99e6b6195 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -216,3 +216,8 @@ ORDER BY L2Distance(vectors, Point) LIMIT N SETTINGS annoy_index_search_k_nodes=100; ``` + +:::note +The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see +[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml. +::: diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d43746117572..62547ff8786b 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1642,7 +1642,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( { if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) granule = reader.read(); - // Cast to Ann condition + auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) { diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 1c92645dbfa4..352456ab8722 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -172,7 +172,8 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t if (offsets[i + 1] - offsets[i] != size) throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); - index = std::make_shared>(size); + if (!index) + index = std::make_shared>(size); /// Add all rows of block index->add_item(index->get_n_items(), array.data()); @@ -195,7 +196,8 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t if (data.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read); - index = std::make_shared>(data[0].size()); + if (!index) + index = std::make_shared>(data[0].size()); for (const auto & item : data) index->add_item(index->get_n_items(), item.data()); diff --git a/tests/queries/0_stateless/02354_annoy_index.reference b/tests/queries/0_stateless/02354_annoy_index.reference index 5e01a6e566e4..cf17a7a7eabf 100644 --- a/tests/queries/0_stateless/02354_annoy_index.reference +++ b/tests/queries/0_stateless/02354_annoy_index.reference @@ -142,3 +142,8 @@ Expression (Projection) Description: annoy GRANULARITY 4 Parts: 1/1 Granules: 4/4 +--- Test correctness of Annoy index with > 1 mark +1 [1,0,0,0] +9000 [9000,0,0,0] +1 (1,0,0,0) +9000 (9000,0,0,0) diff --git a/tests/queries/0_stateless/02354_annoy_index.sql b/tests/queries/0_stateless/02354_annoy_index.sql index dbe5d95dd1fd..73d818695817 100644 --- a/tests/queries/0_stateless/02354_annoy_index.sql +++ b/tests/queries/0_stateless/02354_annoy_index.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64 +-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database SET allow_experimental_annoy_index = 1; SET allow_experimental_analyzer = 0; @@ -249,3 +249,35 @@ DROP TABLE tab; -- (*) Storage and search in Annoy indexes is inherently random. Tests which check for exact row matches would be unstable. Therefore, -- comment them out. + +SELECT '--- Test correctness of Annoy index with > 1 mark'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, [toFloat32(number), 0., 0., 0.] from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [1.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [9000.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +DROP TABLE tab; + +CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32, Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, (toFloat32(number), 0., 0., 0.) from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (1.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +DROP TABLE tab;