-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[#23377] DocDB: Implement the way to apply vector index updates to DocDB
Summary: In order to manage vector indexes we should have a way to store and load them. Please check implemented format in corresponding GitHub issue. Jira: DB-12299 Test Plan: vector_index_update-test Reviewers: mbautin, timur, aleksandr.ponomarenko Reviewed By: mbautin, aleksandr.ponomarenko Subscribers: ybase Tags: #jenkins-ready Differential Revision: https://phorge.dev.yugabyte.com/D36972
- Loading branch information
Showing
15 changed files
with
520 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Copyright (c) YugabyteDB, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
// in compliance with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software distributed under the License | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
// or implied. See the License for the specific language governing permissions and limitations | ||
// under the License. | ||
// | ||
|
||
#pragma once | ||
|
||
#include "yb/dockv/primitive_value.h" | ||
|
||
namespace yb::docdb { | ||
|
||
using VertexId = uint64_t; | ||
using VectorIndexLevel = uint8_t; | ||
using VectorNodeNeighbors = std::set<VertexId>; | ||
|
||
} // namespace yb::docdb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Copyright (c) YugabyteDB, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
// in compliance with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software distributed under the License | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
// or implied. See the License for the specific language governing permissions and limitations | ||
// under the License. | ||
// | ||
|
||
#include "yb/docdb/docdb_test_base.h" | ||
#include "yb/docdb/vector_index_update.h" | ||
|
||
#include "yb/util/range.h" | ||
|
||
namespace yb::docdb { | ||
|
||
class VectorIndexDocDBTest : public DocDBTestBase { | ||
Schema CreateSchema() override { | ||
return Schema(); | ||
} | ||
}; | ||
|
||
TEST_F(VectorIndexDocDBTest, Update) { | ||
const HybridTime hybrid_time = HybridTime::FromMicros(1000); | ||
constexpr int kNumNodes = 3; | ||
const auto kNodes = Range(1, kNumNodes + 1); | ||
rocksdb::WriteBatch write_batch; | ||
FloatVectorIndexUpdate update(hybrid_time, write_batch); | ||
for (int i : kNodes) { | ||
update.AddVector(i, {static_cast<float>(M_E * i), static_cast<float>(M_PI * i)}); | ||
} | ||
for (int i : kNodes) { | ||
update.SetNeighbors(i, /* level= */ 0, Range(i + 1, kNumNodes + 1).ToContainer()); | ||
} | ||
for (int i : kNodes) { | ||
update.AddDirectedEdge(i, (i % kNumNodes) + 1, i * 10); | ||
} | ||
|
||
update.DeleteDirectedEdge(2, 3, 20); | ||
update.DeleteVector(3); | ||
|
||
ASSERT_OK(rocksdb()->Write(write_options(), &write_batch)); | ||
|
||
AssertDocDbDebugDumpStrEq(R"#( | ||
// The vector 1 itself. | ||
SubDocKey(DocKey([], [1]), [HT{ physical: 1000 }]) -> [2.71828174591064, 3.14159274101257] | ||
// The neighbors of the vector 1 in level 0. | ||
SubDocKey(DocKey([], [1]), [0; HT{ physical: 1000 w: 3 }]) -> [2, 3] | ||
// The added edge from vector 1 to vector 2 in level 10. | ||
SubDocKey(DocKey([], [1]), [10, 2; HT{ physical: 1000 w: 6 }]) -> null | ||
// The same for remaining vectors. | ||
SubDocKey(DocKey([], [2]), [HT{ physical: 1000 w: 1 }]) -> [5.43656349182129, 6.28318548202515] | ||
SubDocKey(DocKey([], [2]), [0; HT{ physical: 1000 w: 4 }]) -> [3] | ||
// Delete the edge from vector 2 to vector 3 in level 20. | ||
SubDocKey(DocKey([], [2]), [20, 3; HT{ physical: 1000 w: 9 }]) -> DEL | ||
SubDocKey(DocKey([], [2]), [20, 3; HT{ physical: 1000 w: 7 }]) -> null | ||
// Delete the vector 3. | ||
SubDocKey(DocKey([], [3]), [HT{ physical: 1000 w: 10 }]) -> DEL | ||
SubDocKey(DocKey([], [3]), [HT{ physical: 1000 w: 2 }]) -> [8.15484523773193, 9.42477798461914] | ||
SubDocKey(DocKey([], [3]), [0; HT{ physical: 1000 w: 5 }]) -> [] | ||
SubDocKey(DocKey([], [3]), [30, 1; HT{ physical: 1000 w: 8 }]) -> null | ||
)#"); | ||
} | ||
|
||
} // namespace yb::docdb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
// Copyright (c) YugabyteDB, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
// in compliance with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software distributed under the License | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
// or implied. See the License for the specific language governing permissions and limitations | ||
// under the License. | ||
// | ||
|
||
#include "yb/docdb/vector_index_update.h" | ||
|
||
#include "yb/dockv/doc_key.h" | ||
|
||
#include "yb/util/decimal.h" | ||
|
||
namespace yb::docdb { | ||
|
||
template <class CoordinateType> | ||
void VectorIndexUpdate<CoordinateType>::AddVector(VertexId id, IndexedVector vector) { | ||
write_batch_.Put(MakeKey(id).AsSlice(), dockv::PrimitiveValue::Encoded(vector).AsSlice()); | ||
nodes_[id].vector = std::move(vector); | ||
} | ||
|
||
template <class CoordinateType> | ||
void VectorIndexUpdate<CoordinateType>::DeleteVector(yb::docdb::VertexId id) { | ||
write_batch_.Put(MakeKey(id).AsSlice(), dockv::PrimitiveValue::TombstoneSlice()); | ||
nodes_[id].tombstone = true; | ||
} | ||
|
||
template <class CoordinateType> | ||
void VectorIndexUpdate<CoordinateType>::SetNeighbors( | ||
VertexId id, VectorIndexLevel level, VectorNodeNeighbors new_neighbors) { | ||
write_batch_.Put( | ||
MakeKey(id, level), | ||
dockv::PrimitiveValue::Encoded( | ||
dockv::UInt64Vector{new_neighbors.begin(), new_neighbors.end()}).AsSlice()); | ||
|
||
GetLevel(id, level).neighbors = std::move(new_neighbors); | ||
} | ||
|
||
template <class CoordinateType> | ||
void VectorIndexUpdate<CoordinateType>::AddDirectedEdge( | ||
VertexId a, VertexId b, VectorIndexLevel level) { | ||
write_batch_.Put(MakeKey(a, level, b), dockv::PrimitiveValue::NullSlice()); | ||
|
||
auto& vector_info = GetLevel(a, level); | ||
vector_info.neighbors.insert(b); | ||
vector_info.deleted_neighbors.erase(b); | ||
} | ||
|
||
template <class CoordinateType> | ||
void VectorIndexUpdate<CoordinateType>::DeleteDirectedEdge( | ||
VertexId a, VertexId b, VectorIndexLevel level) { | ||
write_batch_.Put(MakeKey(a, level, b), dockv::PrimitiveValue::TombstoneSlice()); | ||
|
||
auto& vector_info = GetLevel(a, level); | ||
vector_info.neighbors.erase(b); | ||
vector_info.deleted_neighbors.insert(b); | ||
} | ||
|
||
template <class CoordinateType> | ||
auto VectorIndexUpdate<CoordinateType>::GetLevel(VertexId id, VectorIndexLevel level) -> | ||
VectorIndexUpdate<CoordinateType>::IndexedVectorLevelInfo& { | ||
auto& node = nodes_[id]; | ||
if (level >= node.levels.size()) { | ||
node.levels.resize(level + 1); | ||
} | ||
return node.levels[level]; | ||
} | ||
|
||
namespace { | ||
|
||
void AppendSubkeys(dockv::KeyBytes& key) { | ||
} | ||
|
||
void AppendSubkey(dockv::KeyBytes& key, VectorIndexLevel level) { | ||
key.AppendKeyEntryType(dockv::KeyEntryType::kUInt32); | ||
key.AppendUInt32(level); | ||
} | ||
|
||
void AppendSubkey(dockv::KeyBytes& key, VertexId id) { | ||
key.AppendKeyEntryType(dockv::KeyEntryType::kUInt64); | ||
key.AppendUInt64(id); | ||
} | ||
|
||
template <class T, class... Subkeys> | ||
void AppendSubkeys(dockv::KeyBytes& key, const T& t, Subkeys&&... subkeys) { | ||
AppendSubkey(key, t); | ||
AppendSubkeys(key, std::forward<Subkeys>(subkeys)...); | ||
} | ||
|
||
} // namespace | ||
|
||
template <class CoordinateType> | ||
template <class... Subkeys> | ||
dockv::KeyBytes VectorIndexUpdate<CoordinateType>::MakeKey(VertexId id, Subkeys&&... subkeys) { | ||
dockv::KeyBytes key; | ||
auto key_entry_value = dockv::KeyEntryValue::VectorVertexId(id); | ||
key_entry_value.AppendToKey(&key); | ||
key.AppendGroupEnd(); | ||
AppendSubkeys(key, std::forward<Subkeys>(subkeys)...); | ||
key.AppendKeyEntryType(dockv::KeyEntryType::kHybridTime); | ||
key.AppendHybridTime(doc_ht_); | ||
|
||
doc_ht_.IncrementWriteId(); | ||
|
||
return key; | ||
} | ||
|
||
template class VectorIndexUpdate<float>; | ||
|
||
} // namespace yb::docdb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Copyright (c) YugabyteDB, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
// in compliance with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software distributed under the License | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
// or implied. See the License for the specific language governing permissions and limitations | ||
// under the License. | ||
// | ||
|
||
#pragma once | ||
|
||
#include <set> | ||
#include <vector> | ||
|
||
#include "yb/docdb/vector_index.h" | ||
|
||
#include "yb/rocksdb/write_batch.h" | ||
|
||
namespace yb::docdb { | ||
|
||
template <class CoordinateType> | ||
class VectorIndexUpdate { | ||
public: | ||
using IndexedVector = std::vector<CoordinateType>; | ||
|
||
explicit VectorIndexUpdate(HybridTime ht, rocksdb::WriteBatch& write_batch) | ||
: doc_ht_(ht), write_batch_(write_batch) {} | ||
|
||
void AddVector(VertexId id, IndexedVector v); | ||
void DeleteVector(VertexId id); | ||
void SetNeighbors(VertexId id, VectorIndexLevel level, VectorNodeNeighbors new_neighbors); | ||
void AddDirectedEdge(VertexId a, VertexId b, VectorIndexLevel level); | ||
void DeleteDirectedEdge(VertexId a, VertexId b, VectorIndexLevel level); | ||
|
||
private: | ||
struct IndexedVectorLevelInfo { | ||
VectorNodeNeighbors neighbors; | ||
VectorNodeNeighbors deleted_neighbors; | ||
}; | ||
|
||
IndexedVectorLevelInfo& GetLevel(VertexId id, VectorIndexLevel level); | ||
template <class... Subkeys> | ||
dockv::KeyBytes MakeKey(VertexId id, Subkeys&&... subkeys); | ||
|
||
struct IndexedVectorInfo { | ||
bool tombstone = false; | ||
IndexedVector vector; | ||
std::vector<IndexedVectorLevelInfo> levels; | ||
}; | ||
|
||
DocHybridTime doc_ht_; | ||
std::unordered_map<VertexId, IndexedVectorInfo> nodes_; | ||
rocksdb::WriteBatch& write_batch_; | ||
}; | ||
|
||
using FloatVectorIndexUpdate = VectorIndexUpdate<float>; | ||
|
||
} // namespace yb::docdb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.