From 3f156890b4cf02572578b41b977f189d8cb1a0c2 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Mon, 26 Aug 2024 12:21:15 +0800 Subject: [PATCH] storage: Add vector search metrics (#9349) ref pingcap/tiflash#9032 storage: Add vector search metrics Signed-off-by: Wish Co-authored-by: Wenxuan --- dbms/src/Common/TiFlashMetrics.h | 17 +++++++ .../DMFileWithVectorIndexBlockInputStream.cpp | 2 + .../Index/VectorIndexHNSW/Index.cpp | 45 +++++++++++++++++++ .../DeltaMerge/Index/VectorIndexHNSW/Index.h | 13 ++++-- 4 files changed, 74 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 1703ec4387a..bac253f515c 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -857,6 +857,23 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva F(type_gac_req_ru_consumption_delta, {"type", "gac_req_ru_consumption_delta"}), \ F(type_gac_resp_tokens, {"type", "gac_resp_tokens"}), \ F(type_gac_resp_capacity, {"type", "gac_resp_capacity"})) \ + M(tiflash_vector_index_memory_usage, \ + "Vector index memory usage", \ + Gauge, \ + F(type_build, {"type", "build"}), \ + F(type_view, {"type", "view"})) \ + M(tiflash_vector_index_active_instances, \ + "Active Vector index instances", \ + Gauge, \ + F(type_build, {"type", "build"}), \ + F(type_view, {"type", "view"})) \ + M(tiflash_vector_index_duration, \ + "Vector index operation duration", \ + Histogram, \ + F(type_build, {{"type", "build"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_download, {{"type", "download"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_view, {{"type", "view"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_search, {{"type", "search"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_storage_io_limiter_pending_count, \ "I/O limiter pending count", \ Counter, \ diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp index a65a8b123bf..015ae523ee6 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp @@ -331,6 +331,8 @@ void DMFileWithVectorIndexBlockInputStream::loadVectorIndex() auto download_duration = watch.elapsedSeconds(); duration_load_index += download_duration; + + GET_METRIC(tiflash_vector_index_duration, type_download).Observe(download_duration); } else { diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp index b5ee8adb0b8..63fd1e69e90 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp @@ -13,6 +13,8 @@ // limitations under the License. #include +#include +#include #include #include #include @@ -22,6 +24,7 @@ #include #include +#include #include #include @@ -62,6 +65,7 @@ VectorIndexHNSWBuilder::VectorIndexHNSWBuilder(const TiDB::VectorIndexDefinition getUSearchMetricKind(definition->distance_metric)))) { RUNTIME_CHECK(definition_->kind == tipb::VectorIndexKind::HNSW); + GET_METRIC(tiflash_vector_index_active_instances, type_build).Increment(); } void VectorIndexHNSWBuilder::addBlock(const IColumn & column, const ColumnVector * del_mark) @@ -83,6 +87,9 @@ void VectorIndexHNSWBuilder::addBlock(const IColumn & column, const ColumnVector throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for HNSW index"); } + Stopwatch w; + SCOPE_EXIT({ total_duration += w.elapsedSeconds(); }); + for (int i = 0, i_max = col_array->size(); i < i_max; ++i) { auto row_offset = added_rows; @@ -110,14 +117,30 @@ void VectorIndexHNSWBuilder::addBlock(const IColumn & column, const ColumnVector row_offset, rc.error.release()); } + + auto current_memory_usage = index.memory_usage(); + auto delta = static_cast(current_memory_usage) - static_cast(last_reported_memory_usage); + GET_METRIC(tiflash_vector_index_memory_usage, type_build).Increment(static_cast(delta)); + last_reported_memory_usage = current_memory_usage; } void VectorIndexHNSWBuilder::save(std::string_view path) const { + Stopwatch w; + SCOPE_EXIT({ total_duration += w.elapsedSeconds(); }); + auto result = index.save(unum::usearch::output_file_t(path.data())); RUNTIME_CHECK_MSG(result, "Failed to save vector index: {}", result.error.what()); } +VectorIndexHNSWBuilder::~VectorIndexHNSWBuilder() +{ + GET_METRIC(tiflash_vector_index_duration, type_build).Observe(total_duration); + GET_METRIC(tiflash_vector_index_memory_usage, type_build) + .Decrement(static_cast(last_reported_memory_usage)); + GET_METRIC(tiflash_vector_index_active_instances, type_build).Decrement(); +} + VectorIndexViewerPtr VectorIndexHNSWViewer::view(const dtpb::VectorIndexFileProps & file_props, std::string_view path) { RUNTIME_CHECK(file_props.index_kind() == tipb::VectorIndexKind_Name(tipb::VectorIndexKind::HNSW)); @@ -126,6 +149,9 @@ VectorIndexViewerPtr VectorIndexHNSWViewer::view(const dtpb::VectorIndexFileProp RUNTIME_CHECK(tipb::VectorDistanceMetric_Parse(file_props.distance_metric(), &metric)); RUNTIME_CHECK(metric != tipb::VectorDistanceMetric::INVALID_DISTANCE_METRIC); + Stopwatch w; + SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_view).Observe(w.elapsedSeconds()); }); + auto vi = std::make_shared(file_props); vi->index = USearchImplType::make(unum::usearch::metric_punned_t( // file_props.dimensions(), @@ -133,6 +159,10 @@ VectorIndexViewerPtr VectorIndexHNSWViewer::view(const dtpb::VectorIndexFileProp auto result = vi->index.view(unum::usearch::memory_mapped_file_t(path.data())); RUNTIME_CHECK_MSG(result, "Failed to load vector index: {}", result.error.what()); + auto current_memory_usage = vi->index.memory_usage(); + GET_METRIC(tiflash_vector_index_memory_usage, type_view).Increment(static_cast(current_memory_usage)); + vi->last_reported_memory_usage = current_memory_usage; + return vi; } @@ -181,6 +211,9 @@ std::vector VectorIndexHNSWViewer::search( } }; + Stopwatch w; + SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_search).Observe(w.elapsedSeconds()); }); + // TODO(vector-index): Support efSearch. auto result = index.search( // reinterpret_cast(query_info->ref_vec_f32().data() + sizeof(UInt32)), @@ -211,4 +244,16 @@ void VectorIndexHNSWViewer::get(Key key, std::vector & out) const index.get(key, out.data()); } +VectorIndexHNSWViewer::VectorIndexHNSWViewer(const dtpb::VectorIndexFileProps & props) + : VectorIndexViewer(props) +{ + GET_METRIC(tiflash_vector_index_active_instances, type_view).Increment(); +} + +VectorIndexHNSWViewer::~VectorIndexHNSWViewer() +{ + GET_METRIC(tiflash_vector_index_memory_usage, type_view).Decrement(static_cast(last_reported_memory_usage)); + GET_METRIC(tiflash_vector_index_active_instances, type_view).Decrement(); +} + } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h index 1086fdce0c0..616cd43d262 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h @@ -29,6 +29,8 @@ class VectorIndexHNSWBuilder : public VectorIndexBuilder public: explicit VectorIndexHNSWBuilder(const TiDB::VectorIndexDefinitionPtr & definition_); + ~VectorIndexHNSWBuilder() override; + void addBlock(const IColumn & column, const ColumnVector * del_mark) override; void save(std::string_view path) const override; @@ -36,6 +38,9 @@ class VectorIndexHNSWBuilder : public VectorIndexBuilder private: USearchImplType index; UInt64 added_rows = 0; // Includes nulls and deletes. Used as the index key. + + mutable double total_duration = 0; + size_t last_reported_memory_usage = 0; }; class VectorIndexHNSWViewer : public VectorIndexViewer @@ -43,9 +48,9 @@ class VectorIndexHNSWViewer : public VectorIndexViewer public: static VectorIndexViewerPtr view(const dtpb::VectorIndexFileProps & props, std::string_view path); - explicit VectorIndexHNSWViewer(const dtpb::VectorIndexFileProps & props) - : VectorIndexViewer(props) - {} + explicit VectorIndexHNSWViewer(const dtpb::VectorIndexFileProps & props); + + ~VectorIndexHNSWViewer() override; std::vector search(const ANNQueryInfoPtr & query_info, const RowFilter & valid_rows) const override; @@ -53,6 +58,8 @@ class VectorIndexHNSWViewer : public VectorIndexViewer private: USearchImplType index; + + size_t last_reported_memory_usage = 0; }; } // namespace DB::DM