Skip to content

Commit

Permalink
[#23377] DocDB: Implement the way to apply vector index updates to DocDB
Browse files Browse the repository at this point in the history
Summary:
In order to manage vector indexes we should have a way to store and load them.
Please check implemented format in corresponding GitHub issue.
Jira: DB-12299

Test Plan: vector_index_update-test

Reviewers: mbautin, timur, aleksandr.ponomarenko

Reviewed By: mbautin, aleksandr.ponomarenko

Subscribers: ybase

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D36972
  • Loading branch information
spolitov committed Aug 6, 2024
1 parent f39c76c commit a42549e
Show file tree
Hide file tree
Showing 15 changed files with 520 additions and 45 deletions.
4 changes: 4 additions & 0 deletions src/yb/common/doc_hybrid_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ class DocHybridTime {
HybridTime hybrid_time() const { return hybrid_time_; }
IntraTxnWriteId write_id() const { return write_id_; }

void IncrementWriteId() {
++write_id_;
}

// Returns pointer to byte after last used byte.
char* EncodedInDocDbFormat(char* dest) const;

Expand Down
2 changes: 2 additions & 0 deletions src/yb/docdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set(DOCDB_SRCS
transaction_dump.cc
transaction_status_cache.cc
local_waiting_txn_registry.cc
vector_index_update.cc
wait_queue.cc
)

Expand Down Expand Up @@ -143,6 +144,7 @@ ADD_YB_TEST(shared_lock_manager-test)
ADD_YB_TEST(consensus_frontier-test)
ADD_YB_TEST(compaction_file_filter-test)
ADD_YB_TEST(usearch_vector_index-test)
ADD_YB_TEST(vector_index_docdb-test)

if(YB_BUILD_FUZZ_TARGETS)
# A library with common code shared between DocDB fuzz tests.
Expand Down
11 changes: 2 additions & 9 deletions src/yb/docdb/docdb_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,8 @@ template <class DumpStringFunc>
void ProcessDumpEntry(
Slice key, Slice value, SchemaPackingProvider* schema_packing_provider /*null ok*/,
StorageDbType db_type, IncludeBinary include_binary, DumpStringFunc func) {
auto [key_str, value_str] = DumpEntryToString(key, value, schema_packing_provider, db_type);
if (!key_str.ok()) {
func(key_str.status().ToString());
}
if (!value_str.ok()) {
func(value_str.status().CloneAndAppend(". Key: " + *key_str).ToString());
} else {
func(Format("$0 -> $1", *key_str, *value_str));
}
auto [key_res, value_res] = DumpEntryToString(key, value, schema_packing_provider, db_type);
func(Format("$0 -> $1", key_res, value_res));
if (include_binary) {
func(Format("$0 -> $1\n", FormatSliceAsStr(key), FormatSliceAsStr(value)));
}
Expand Down
24 changes: 24 additions & 0 deletions src/yb/docdb/vector_index.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) YugabyteDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//

#pragma once

#include "yb/dockv/primitive_value.h"

namespace yb::docdb {

using VertexId = uint64_t;
using VectorIndexLevel = uint8_t;
using VectorNodeNeighbors = std::set<VertexId>;

} // namespace yb::docdb
69 changes: 69 additions & 0 deletions src/yb/docdb/vector_index_docdb-test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) YugabyteDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//

#include "yb/docdb/docdb_test_base.h"
#include "yb/docdb/vector_index_update.h"

#include "yb/util/range.h"

namespace yb::docdb {

class VectorIndexDocDBTest : public DocDBTestBase {
Schema CreateSchema() override {
return Schema();
}
};

TEST_F(VectorIndexDocDBTest, Update) {
const HybridTime hybrid_time = HybridTime::FromMicros(1000);
constexpr int kNumNodes = 3;
const auto kNodes = Range(1, kNumNodes + 1);
rocksdb::WriteBatch write_batch;
FloatVectorIndexUpdate update(hybrid_time, write_batch);
for (int i : kNodes) {
update.AddVector(i, {static_cast<float>(M_E * i), static_cast<float>(M_PI * i)});
}
for (int i : kNodes) {
update.SetNeighbors(i, /* level= */ 0, Range(i + 1, kNumNodes + 1).ToContainer());
}
for (int i : kNodes) {
update.AddDirectedEdge(i, (i % kNumNodes) + 1, i * 10);
}

update.DeleteDirectedEdge(2, 3, 20);
update.DeleteVector(3);

ASSERT_OK(rocksdb()->Write(write_options(), &write_batch));

AssertDocDbDebugDumpStrEq(R"#(
// The vector 1 itself.
SubDocKey(DocKey([], [1]), [HT{ physical: 1000 }]) -> [2.71828174591064, 3.14159274101257]
// The neighbors of the vector 1 in level 0.
SubDocKey(DocKey([], [1]), [0; HT{ physical: 1000 w: 3 }]) -> [2, 3]
// The added edge from vector 1 to vector 2 in level 10.
SubDocKey(DocKey([], [1]), [10, 2; HT{ physical: 1000 w: 6 }]) -> null
// The same for remaining vectors.
SubDocKey(DocKey([], [2]), [HT{ physical: 1000 w: 1 }]) -> [5.43656349182129, 6.28318548202515]
SubDocKey(DocKey([], [2]), [0; HT{ physical: 1000 w: 4 }]) -> [3]
// Delete the edge from vector 2 to vector 3 in level 20.
SubDocKey(DocKey([], [2]), [20, 3; HT{ physical: 1000 w: 9 }]) -> DEL
SubDocKey(DocKey([], [2]), [20, 3; HT{ physical: 1000 w: 7 }]) -> null
// Delete the vector 3.
SubDocKey(DocKey([], [3]), [HT{ physical: 1000 w: 10 }]) -> DEL
SubDocKey(DocKey([], [3]), [HT{ physical: 1000 w: 2 }]) -> [8.15484523773193, 9.42477798461914]
SubDocKey(DocKey([], [3]), [0; HT{ physical: 1000 w: 5 }]) -> []
SubDocKey(DocKey([], [3]), [30, 1; HT{ physical: 1000 w: 8 }]) -> null
)#");
}

} // namespace yb::docdb
116 changes: 116 additions & 0 deletions src/yb/docdb/vector_index_update.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright (c) YugabyteDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//

#include "yb/docdb/vector_index_update.h"

#include "yb/dockv/doc_key.h"

#include "yb/util/decimal.h"

namespace yb::docdb {

template <class CoordinateType>
void VectorIndexUpdate<CoordinateType>::AddVector(VertexId id, IndexedVector vector) {
write_batch_.Put(MakeKey(id).AsSlice(), dockv::PrimitiveValue::Encoded(vector).AsSlice());
nodes_[id].vector = std::move(vector);
}

template <class CoordinateType>
void VectorIndexUpdate<CoordinateType>::DeleteVector(yb::docdb::VertexId id) {
write_batch_.Put(MakeKey(id).AsSlice(), dockv::PrimitiveValue::TombstoneSlice());
nodes_[id].tombstone = true;
}

template <class CoordinateType>
void VectorIndexUpdate<CoordinateType>::SetNeighbors(
VertexId id, VectorIndexLevel level, VectorNodeNeighbors new_neighbors) {
write_batch_.Put(
MakeKey(id, level),
dockv::PrimitiveValue::Encoded(
dockv::UInt64Vector{new_neighbors.begin(), new_neighbors.end()}).AsSlice());

GetLevel(id, level).neighbors = std::move(new_neighbors);
}

template <class CoordinateType>
void VectorIndexUpdate<CoordinateType>::AddDirectedEdge(
VertexId a, VertexId b, VectorIndexLevel level) {
write_batch_.Put(MakeKey(a, level, b), dockv::PrimitiveValue::NullSlice());

auto& vector_info = GetLevel(a, level);
vector_info.neighbors.insert(b);
vector_info.deleted_neighbors.erase(b);
}

template <class CoordinateType>
void VectorIndexUpdate<CoordinateType>::DeleteDirectedEdge(
VertexId a, VertexId b, VectorIndexLevel level) {
write_batch_.Put(MakeKey(a, level, b), dockv::PrimitiveValue::TombstoneSlice());

auto& vector_info = GetLevel(a, level);
vector_info.neighbors.erase(b);
vector_info.deleted_neighbors.insert(b);
}

template <class CoordinateType>
auto VectorIndexUpdate<CoordinateType>::GetLevel(VertexId id, VectorIndexLevel level) ->
VectorIndexUpdate<CoordinateType>::IndexedVectorLevelInfo& {
auto& node = nodes_[id];
if (level >= node.levels.size()) {
node.levels.resize(level + 1);
}
return node.levels[level];
}

namespace {

void AppendSubkeys(dockv::KeyBytes& key) {
}

void AppendSubkey(dockv::KeyBytes& key, VectorIndexLevel level) {
key.AppendKeyEntryType(dockv::KeyEntryType::kUInt32);
key.AppendUInt32(level);
}

void AppendSubkey(dockv::KeyBytes& key, VertexId id) {
key.AppendKeyEntryType(dockv::KeyEntryType::kUInt64);
key.AppendUInt64(id);
}

template <class T, class... Subkeys>
void AppendSubkeys(dockv::KeyBytes& key, const T& t, Subkeys&&... subkeys) {
AppendSubkey(key, t);
AppendSubkeys(key, std::forward<Subkeys>(subkeys)...);
}

} // namespace

template <class CoordinateType>
template <class... Subkeys>
dockv::KeyBytes VectorIndexUpdate<CoordinateType>::MakeKey(VertexId id, Subkeys&&... subkeys) {
dockv::KeyBytes key;
auto key_entry_value = dockv::KeyEntryValue::VectorVertexId(id);
key_entry_value.AppendToKey(&key);
key.AppendGroupEnd();
AppendSubkeys(key, std::forward<Subkeys>(subkeys)...);
key.AppendKeyEntryType(dockv::KeyEntryType::kHybridTime);
key.AppendHybridTime(doc_ht_);

doc_ht_.IncrementWriteId();

return key;
}

template class VectorIndexUpdate<float>;

} // namespace yb::docdb
62 changes: 62 additions & 0 deletions src/yb/docdb/vector_index_update.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) YugabyteDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//

#pragma once

#include <set>
#include <vector>

#include "yb/docdb/vector_index.h"

#include "yb/rocksdb/write_batch.h"

namespace yb::docdb {

template <class CoordinateType>
class VectorIndexUpdate {
public:
using IndexedVector = std::vector<CoordinateType>;

explicit VectorIndexUpdate(HybridTime ht, rocksdb::WriteBatch& write_batch)
: doc_ht_(ht), write_batch_(write_batch) {}

void AddVector(VertexId id, IndexedVector v);
void DeleteVector(VertexId id);
void SetNeighbors(VertexId id, VectorIndexLevel level, VectorNodeNeighbors new_neighbors);
void AddDirectedEdge(VertexId a, VertexId b, VectorIndexLevel level);
void DeleteDirectedEdge(VertexId a, VertexId b, VectorIndexLevel level);

private:
struct IndexedVectorLevelInfo {
VectorNodeNeighbors neighbors;
VectorNodeNeighbors deleted_neighbors;
};

IndexedVectorLevelInfo& GetLevel(VertexId id, VectorIndexLevel level);
template <class... Subkeys>
dockv::KeyBytes MakeKey(VertexId id, Subkeys&&... subkeys);

struct IndexedVectorInfo {
bool tombstone = false;
IndexedVector vector;
std::vector<IndexedVectorLevelInfo> levels;
};

DocHybridTime doc_ht_;
std::unordered_map<VertexId, IndexedVectorInfo> nodes_;
rocksdb::WriteBatch& write_batch_;
};

using FloatVectorIndexUpdate = VectorIndexUpdate<float>;

} // namespace yb::docdb
1 change: 1 addition & 0 deletions src/yb/dockv/key_entry_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ class KeyEntryValue {
static KeyEntryValue UInt32(uint32_t v, SortOrder sort_order = SortOrder::kAscending);
static KeyEntryValue Int64(int64_t v, SortOrder sort_order = SortOrder::kAscending);
static KeyEntryValue UInt64(uint64_t v, SortOrder sort_order = SortOrder::kAscending);
static KeyEntryValue VectorVertexId(uint64_t v);
static KeyEntryValue MakeTimestamp(
const Timestamp& timestamp, SortOrder sort_order = SortOrder::kAscending);
static KeyEntryValue MakeInetAddress(
Expand Down
Loading

0 comments on commit a42549e

Please sign in to comment.