From 3c8b68c6f230d174e0b86a4e5f499485ea625acb Mon Sep 17 00:00:00 2001 From: datelier <57349093+datelier@users.noreply.github.com> Date: Wed, 25 Dec 2024 20:12:33 +0900 Subject: [PATCH] Add rust-agent-qbg (#2751) * add rust-agent-qbg * add unit-test-rust.yaml --------- Signed-off-by: datelier <57349093+datelier@users.noreply.github.com> --- .github/workflows/unit-test-rust.yaml | 51 ++ Makefile.d/test.mk | 10 + rust/Cargo.lock | 11 + rust/Cargo.toml | 1 + rust/libs/algorithm/Cargo.toml | 1 + rust/libs/algorithms/qbg/Cargo.toml | 29 ++ rust/libs/algorithms/qbg/build.rs | 35 ++ rust/libs/algorithms/qbg/src/input.cpp | 435 ++++++++++++++++ rust/libs/algorithms/qbg/src/input.h | 118 +++++ rust/libs/algorithms/qbg/src/lib.rs | 671 +++++++++++++++++++++++++ 10 files changed, 1362 insertions(+) create mode 100644 .github/workflows/unit-test-rust.yaml create mode 100644 rust/libs/algorithms/qbg/Cargo.toml create mode 100644 rust/libs/algorithms/qbg/build.rs create mode 100644 rust/libs/algorithms/qbg/src/input.cpp create mode 100644 rust/libs/algorithms/qbg/src/input.h create mode 100644 rust/libs/algorithms/qbg/src/lib.rs diff --git a/.github/workflows/unit-test-rust.yaml b/.github/workflows/unit-test-rust.yaml new file mode 100644 index 0000000000..b8f24a5aed --- /dev/null +++ b/.github/workflows/unit-test-rust.yaml @@ -0,0 +1,51 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +name: "Run unit tests (Rust)" +on: + push: + branches: + - main + - "release/v*.*" + - "!release/v*.*.*" + paths: + - ".github/workflows/test.yaml" + - "rust/**" + pull_request: + paths: + - ".github/workflows/test.yaml" + - "rust/**" +jobs: + dump-contexts-to-log: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/dump-context + detect-ci-container: + uses: ./.github/workflows/_detect-ci-container.yaml + test-rust-qbg: + name: Run tests for Rust + runs-on: ubuntu-latest + needs: [detect-ci-container] + container: + image: ghcr.io/vdaas/vald/vald-ci-container:${{ needs.detect-ci-container.outputs.TAG }} + steps: + - uses: actions/checkout@v4 + - name: Set Git config + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Run tests for Rust / gotestfmt + run: | + TEST_RESULT_DIR=${GITHUB_WORKSPACE} make test/rust/qbg \ No newline at end of file diff --git a/Makefile.d/test.mk b/Makefile.d/test.mk index 511c7e0fa5..888bf3ba36 100644 --- a/Makefile.d/test.mk +++ b/Makefile.d/test.mk @@ -284,6 +284,16 @@ test/cmd: CGO_LDFLAGS="$(CGO_LDFLAGS)" \ go test -short -shuffle=on -race -mod=readonly -cover $(ROOTDIR)/cmd/... +.PHONY: test/rust/qbg +## run tests for qbg +test/rust/qbg: + cargo test --manifest-path rust/Cargo.toml --package qbg --lib -- tests::test_ffi_qbg --exact --show-output + cargo test --manifest-path rust/Cargo.toml --package qbg --lib -- tests::test_ffi_qbg_prebuilt --exact --show-output + rm -rf rust/libs/algorithms/qbg/index/ + cargo test --manifest-path rust/Cargo.toml --package qbg --lib -- tests::test_property --exact --show-output + cargo test --manifest-path rust/Cargo.toml --package qbg --lib -- tests::test_index --exact --show-output + rm -rf rust/libs/algorithms/qbg/index/ + .PHONY: test/hack ## run tests for hack test/hack: diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 88306072d4..d958880694 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -62,6 +62,7 @@ dependencies = [ "faiss", "ngt", "proto", + "qbg", "tonic 0.12.3", ] @@ -3365,6 +3366,16 @@ dependencies = [ "tonic-types", ] +[[package]] +name = "qbg" +version = "0.1.0" +dependencies = [ + "anyhow", + "cxx", + "cxx-build", + "miette", +] + [[package]] name = "quote" version = "1.0.37" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 924c8a24d6..45a4605d51 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -21,5 +21,6 @@ members = [ "bin/agent", "libs/algorithm", "libs/algorithms/ngt", + "libs/algorithms/qbg", "libs/algorithms/faiss", ] diff --git a/rust/libs/algorithm/Cargo.toml b/rust/libs/algorithm/Cargo.toml index 7af0713ebf..5af2470cd8 100644 --- a/rust/libs/algorithm/Cargo.toml +++ b/rust/libs/algorithm/Cargo.toml @@ -22,5 +22,6 @@ edition = "2021" anyhow = "1.0.88" faiss = { version = "0.1.0", path = "../algorithms/faiss" } ngt = { version = "0.1.0", path = "../algorithms/ngt" } +qbg = { version = "0.1.0", path = "../algorithms/qbg" } proto = { version = "0.1.0", path = "../proto" } tonic = "0.12.2" diff --git a/rust/libs/algorithms/qbg/Cargo.toml b/rust/libs/algorithms/qbg/Cargo.toml new file mode 100644 index 0000000000..6385d00596 --- /dev/null +++ b/rust/libs/algorithms/qbg/Cargo.toml @@ -0,0 +1,29 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +[package] +name = "qbg" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.88" +cxx = { version = "1.0.128", features = ["c++20"] } + +[build-dependencies] +cxx-build = "1.0.128" +miette = { version = "7.2.0", features = ["fancy"] } + +[dev-dependencies] diff --git a/rust/libs/algorithms/qbg/build.rs b/rust/libs/algorithms/qbg/build.rs new file mode 100644 index 0000000000..46a557607a --- /dev/null +++ b/rust/libs/algorithms/qbg/build.rs @@ -0,0 +1,35 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +fn main() -> miette::Result<()> { + let current_dir = std::env::current_dir().unwrap(); + println!("cargo:rustc-link-search=native={}", current_dir.display()); + + cxx_build::bridge("src/lib.rs") + .file("src/input.cpp") + .flag_if_supported("-std=c++20") + .flag_if_supported("-fopenmp") + .flag_if_supported("-DNGT_BFLOAT_DISABLED") + .compile("qbg-rs"); + + println!("cargo:rustc-link-search=native=/usr/local/lib"); + println!("cargo:rustc-link-lib=static=ngt"); + println!("cargo:rustc-link-lib=blas"); + println!("cargo:rustc-link-lib=lapack"); + println!("cargo:rustc-link-lib=dylib=gomp"); + println!("cargo:rerun-if-changed=src/*"); + + Ok(()) +} diff --git a/rust/libs/algorithms/qbg/src/input.cpp b/rust/libs/algorithms/qbg/src/input.cpp new file mode 100644 index 0000000000..3e6109d8fb --- /dev/null +++ b/rust/libs/algorithms/qbg/src/input.cpp @@ -0,0 +1,435 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "qbg/src/input.h" +#include "qbg/src/lib.rs.h" + +Property::Property() +{ + qbg_construction_parameters = new QBGConstructionParameters(); + qbg_build_parameters = new QBGBuildParameters(); + qbg_initialize_construction_parameters(qbg_construction_parameters); + qbg_initialize_build_parameters(qbg_build_parameters); +} + +Property::~Property() +{ + delete qbg_construction_parameters; + delete qbg_build_parameters; +} + +QBGConstructionParameters *Property::get_qbg_construction_parameters() +{ + return qbg_construction_parameters; +} + +void Property::init_qbg_construction_parameters() +{ + qbg_initialize_construction_parameters(qbg_construction_parameters); +} + +void Property::set_qbg_construction_parameters( + rust::usize extended_dimension, + rust::usize dimension, + rust::usize number_of_subvectors, + rust::usize number_of_blobs, + rust::i32 internal_data_type, + rust::i32 data_type, + rust::i32 distance_type) +{ + qbg_initialize_construction_parameters(qbg_construction_parameters); + qbg_construction_parameters->extended_dimension = extended_dimension; + qbg_construction_parameters->dimension = dimension; + qbg_construction_parameters->number_of_subvectors = number_of_subvectors; + qbg_construction_parameters->number_of_blobs = number_of_blobs; + qbg_construction_parameters->internal_data_type = internal_data_type; + qbg_construction_parameters->data_type = data_type; + qbg_construction_parameters->distance_type = distance_type; +} + +void Property::set_extended_dimension(rust::usize extended_dimension) +{ + qbg_construction_parameters->extended_dimension = extended_dimension; +} + +void Property::set_dimension(rust::usize dimension) +{ + qbg_construction_parameters->dimension = dimension; +} + +void Property::set_number_of_subvectors(rust::usize number_of_subvectors) +{ + qbg_construction_parameters->number_of_subvectors = number_of_subvectors; +} + +void Property::set_number_of_blobs(rust::usize number_of_blobs) +{ + qbg_construction_parameters->number_of_blobs = number_of_blobs; +} + +void Property::set_internal_data_type(rust::i32 internal_data_type) +{ + qbg_construction_parameters->internal_data_type = internal_data_type; +} + +void Property::set_data_type(rust::i32 data_type) +{ + qbg_construction_parameters->data_type = data_type; +} + +void Property::set_distance_type(rust::i32 distance_type) +{ + qbg_construction_parameters->distance_type = distance_type; +} + +QBGBuildParameters *Property::get_qbg_build_parameters() +{ + return qbg_build_parameters; +} + +void Property::init_qbg_build_parameters() +{ + qbg_initialize_build_parameters(qbg_build_parameters); +} + +void Property::set_qbg_build_parameters( + rust::i32 hierarchical_clustering_init_mode, + rust::usize number_of_first_objects, + rust::usize number_of_first_clusters, + rust::usize number_of_second_objects, + rust::usize number_of_second_clusters, + rust::usize number_of_third_clusters, + rust::usize number_of_objects, + rust::usize number_of_subvectors, + rust::i32 optimization_clustering_init_mode, + rust::usize rotation_iteration, + rust::usize subvector_iteration, + rust::usize number_of_matrices, + bool rotation, + bool repositioning) +{ + qbg_initialize_build_parameters(qbg_build_parameters); + qbg_build_parameters->hierarchical_clustering_init_mode = hierarchical_clustering_init_mode; + qbg_build_parameters->number_of_first_objects = number_of_first_objects; + qbg_build_parameters->number_of_first_clusters = number_of_first_clusters; + qbg_build_parameters->number_of_second_objects = number_of_second_objects; + qbg_build_parameters->number_of_second_clusters = number_of_second_clusters; + qbg_build_parameters->number_of_third_clusters = number_of_third_clusters; + qbg_build_parameters->number_of_objects = number_of_objects; + qbg_build_parameters->number_of_subvectors = number_of_subvectors; + qbg_build_parameters->optimization_clustering_init_mode = optimization_clustering_init_mode; + qbg_build_parameters->rotation_iteration = rotation_iteration; + qbg_build_parameters->subvector_iteration = subvector_iteration; + qbg_build_parameters->number_of_matrices = number_of_matrices; + qbg_build_parameters->rotation = rotation; + qbg_build_parameters->repositioning = repositioning; +} + +void Property::set_hierarchical_clustering_init_mode(rust::i32 hierarchical_clustering_init_mode) +{ + qbg_build_parameters->hierarchical_clustering_init_mode = hierarchical_clustering_init_mode; +} + +void Property::set_number_of_first_objects(rust::usize number_of_first_objects) +{ + qbg_build_parameters->number_of_first_objects = number_of_first_objects; +} + +void Property::set_number_of_first_clusters(rust::usize number_of_first_clusters) +{ + qbg_build_parameters->number_of_first_clusters = number_of_first_clusters; +} + +void Property::set_number_of_second_objects(rust::usize number_of_second_objects) +{ + qbg_build_parameters->number_of_second_objects = number_of_second_objects; +} + +void Property::set_number_of_second_clusters(rust::usize number_of_second_clusters) +{ + qbg_build_parameters->number_of_second_clusters = number_of_second_clusters; +} + +void Property::set_number_of_third_clusters(rust::usize number_of_third_clusters) +{ + qbg_build_parameters->number_of_third_clusters = number_of_third_clusters; +} + +void Property::set_number_of_objects(rust::usize number_of_objects) +{ + qbg_build_parameters->number_of_objects = number_of_objects; +} + +void Property::set_number_of_subvectors_for_bp(rust::usize number_of_subvectors) +{ + qbg_build_parameters->number_of_subvectors = number_of_subvectors; +} + +void Property::set_optimization_clustering_init_mode(rust::i32 optimization_clustering_init_mode) +{ + qbg_build_parameters->optimization_clustering_init_mode = optimization_clustering_init_mode; +} + +void Property::set_rotation_iteration(rust::usize rotation_iteration) +{ + qbg_build_parameters->rotation_iteration = rotation_iteration; +} + +void Property::set_subvector_iteration(rust::usize subvector_iteration) +{ + qbg_build_parameters->subvector_iteration = subvector_iteration; +} + +void Property::set_number_of_matrices(rust::usize number_of_matrices) +{ + qbg_build_parameters->number_of_matrices = number_of_matrices; +} + +void Property::set_rotation(bool rotation) +{ + qbg_build_parameters->rotation = rotation; +} + +void Property::set_repositioning(bool repositioning) +{ + qbg_build_parameters->repositioning = repositioning; +} + +Index::Index(const rust::String &path, Property &p) +{ + NGTError err = ngt_create_error_object(); + std::string cpath(path); + bool ok = qbg_create(cpath.c_str(), p.get_qbg_construction_parameters(), err); + if (!ok) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + open_index(cpath.c_str(), false); + ngt_destroy_error_object(err); +} + +Index::Index(const rust::String &path, bool prebuilt) +{ + std::string cpath(path); + open_index(cpath.c_str(), prebuilt); +} + +Index::~Index() {} + +void Index::open_index(const rust::String &path, bool prebuilt) +{ + NGTError err = ngt_create_error_object(); + std::string cpath(path); + index = qbg_open_index(cpath.c_str(), prebuilt, err); + if (index == 0) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); +} + +void Index::build_index(const rust::String &path, Property &p) +{ + NGTError err = ngt_create_error_object(); + std::string cpath(path); + bool ok = qbg_build_index(cpath.c_str(), p.get_qbg_build_parameters(), err); + if (!ok) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); +} + +void Index::save_index() +{ + NGTError err = ngt_create_error_object(); + bool ok = qbg_save_index(index, err); + if (!ok) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); +} + +void Index::close_index() +{ + qbg_close_index(index); +} + +rust::i32 Index::append(rust::Slice v) +{ + NGTError err = ngt_create_error_object(); + std::vector vec(v.begin(), v.end()); + unsigned int id = qbg_append_object(index, vec.data(), v.length(), err); + if (id == 0) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); + return id; +} + +rust::i32 Index::insert(rust::Slice v) +{ + NGTError err = ngt_create_error_object(); + std::vector vec(v.begin(), v.end()); + unsigned int id = qbg_insert_object(index, vec.data(), v.length(), err); + if (id == 0) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); + return id; +} + +void Index::remove(rust::usize id) +{ + NGTError err = ngt_create_error_object(); + bool ok = qbg_remove_object(index, id, err); + if (!ok) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); +} + +std::unique_ptr> Index::search(rust::Slice v, rust::usize k, rust::f32 radius, rust::f32 epsilon) +{ + QBGQuery query; + qbg_initialize_query(&query); + std::vector vec(v.begin(), v.end()); + query.query = vec.data(); + query.number_of_results = k; + query.radius = radius; + query.epsilon = epsilon; + + NGTError err = ngt_create_error_object(); + NGTObjectDistances results = ngt_create_empty_results(err); + bool ok = qbg_search_index(index, query, results, err); + if (!ok) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + qbg_destroy_results(results); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + + size_t rsize = qbg_get_result_size(results, err); + if (rsize == 0) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + qbg_destroy_results(results); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + size_t limit = std::min(k, rsize); + std::vector searchResults; + for (size_t i = 0; i < limit; i++) + { + NGTObjectDistance obj = qbg_get_result(results, i, err); + if (obj.id == 0) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + qbg_destroy_results(results); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + searchResults.push_back(SearchResult{obj.id, obj.distance}); + } + ngt_destroy_error_object(err); + qbg_destroy_results(results); + return std::make_unique>(searchResults); +} + +rust::f32 *Index::get_object(rust::usize id) +{ + NGTError err = ngt_create_error_object(); + float *vec = qbg_get_object(index, id, err); + if (vec == 0) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); + return vec; +} + +rust::usize Index::get_dimension() +{ + NGTError err = ngt_create_error_object(); + size_t dim = qbg_get_dimension(index, err); + if (dim == 0) + { + string s = ngt_get_error_string(err); + ngt_destroy_error_object(err); + std::cerr << "Error: " << __func__ << std::endl; + std::cerr << s << std::endl; + throw std::runtime_error(s); + } + ngt_destroy_error_object(err); + return dim; +} + +std::unique_ptr new_property() +{ + return std::make_unique(); +} + +std::unique_ptr new_index(const rust::String &path, Property &p) +{ + return std::make_unique(path, p); +} + +std::unique_ptr new_prebuilt_index(const rust::String &path, bool prebuilt) +{ + return std::make_unique(path, prebuilt); +} \ No newline at end of file diff --git a/rust/libs/algorithms/qbg/src/input.h b/rust/libs/algorithms/qbg/src/input.h new file mode 100644 index 0000000000..02ccbb2432 --- /dev/null +++ b/rust/libs/algorithms/qbg/src/input.h @@ -0,0 +1,118 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#pragma once + +#include +#include "NGT/NGTQ/Capi.h" +#include "NGT/NGTQ/QuantizedGraph.h" +#include "rust/cxx.h" + +struct SearchResult +{ + rust::u32 id; + rust::f32 distance; + + rust::u32 get_id() { return id; } + rust::f32 get_distance() { return distance; } +}; + +class Property +{ + QBGConstructionParameters *qbg_construction_parameters; + QBGBuildParameters *qbg_build_parameters; + +public: + Property(); + ~Property(); + QBGConstructionParameters *get_qbg_construction_parameters(); + void init_qbg_construction_parameters(); + void set_qbg_construction_parameters( + rust::usize, + rust::usize, + rust::usize, + rust::usize, + rust::i32, + rust::i32, + rust::i32); + void set_extended_dimension(rust::usize); + void set_dimension(rust::usize); + void set_number_of_subvectors(rust::usize); + void set_number_of_blobs(rust::usize); + void set_internal_data_type(rust::i32); + void set_data_type(rust::i32); + void set_distance_type(rust::i32); + QBGBuildParameters *get_qbg_build_parameters(); + void init_qbg_build_parameters(); + void set_qbg_build_parameters( + // hierarchical kmeans + rust::i32, + rust::usize, + rust::usize, + rust::usize, + rust::usize, + rust::usize, + // optimization + rust::usize, + rust::usize, + rust::i32, + rust::usize, + rust::usize, + rust::usize, + bool, + bool); + void set_hierarchical_clustering_init_mode(rust::i32); + void set_number_of_first_objects(rust::usize); + void set_number_of_first_clusters(rust::usize); + void set_number_of_second_objects(rust::usize); + void set_number_of_second_clusters(rust::usize); + void set_number_of_third_clusters(rust::usize); + void set_number_of_objects(rust::usize); + void set_number_of_subvectors_for_bp(rust::usize); + void set_optimization_clustering_init_mode(rust::i32); + void set_rotation_iteration(rust::usize); + void set_subvector_iteration(rust::usize); + void set_number_of_matrices(rust::usize); + void set_rotation(bool); + void set_repositioning(bool); +}; + +class Index +{ + void *index; + +public: + Index( + const rust::String &, + Property &); + Index( + const rust::String &, + bool); + ~Index(); + void open_index(const rust::String &, bool); + void build_index(const rust::String &, Property &); + void save_index(); + void close_index(); + rust::i32 append(rust::Slice); + rust::i32 insert(rust::Slice); + void remove(rust::usize); + std::unique_ptr> search(rust::Slice, rust::usize, rust::f32, rust::f32); + rust::f32 *get_object(rust::usize); + rust::usize get_dimension(); +}; + +std::unique_ptr new_property(); +std::unique_ptr new_index(const rust::String &, Property &); +std::unique_ptr new_prebuilt_index(const rust::String &, bool); \ No newline at end of file diff --git a/rust/libs/algorithms/qbg/src/lib.rs b/rust/libs/algorithms/qbg/src/lib.rs new file mode 100644 index 0000000000..c4b8bdc231 --- /dev/null +++ b/rust/libs/algorithms/qbg/src/lib.rs @@ -0,0 +1,671 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#[cxx::bridge] +pub mod ffi { + unsafe extern "C++" { + include!("qbg/src/input.h"); + + type Property; + fn new_property() -> UniquePtr; + fn init_qbg_construction_parameters(self: Pin<&mut Property>); + fn set_qbg_construction_parameters( + self: Pin<&mut Property>, + extended_dimension: usize, + dimension: usize, + number_of_subvectors: usize, + number_of_blobs: usize, + internal_data_type: i32, + data_type: i32, + distance_type: i32, + ); + fn set_extended_dimension(self: Pin<&mut Property>, extended_dimension: usize); + fn set_dimension(self: Pin<&mut Property>, dimension: usize); + fn set_number_of_subvectors(self: Pin<&mut Property>, number_of_subvectors: usize); + fn set_number_of_blobs(self: Pin<&mut Property>, number_of_blobs: usize); + fn set_internal_data_type(self: Pin<&mut Property>, internal_data_type: i32); + fn set_data_type(self: Pin<&mut Property>, data_type: i32); + fn set_distance_type(self: Pin<&mut Property>, distance_type: i32); + fn init_qbg_build_parameters(self: Pin<&mut Property>); + fn set_qbg_build_parameters( + self: Pin<&mut Property>, + hierarchical_clustering_init_mode: i32, + number_of_first_objects: usize, + number_of_first_clusters: usize, + number_of_second_objects: usize, + number_of_second_clusters: usize, + number_of_third_clusters: usize, + number_of_objects: usize, + number_of_subvectors: usize, + optimization_clustering_init_mode: i32, + rotation_iteration: usize, + subvector_iteration: usize, + number_of_matrices: usize, + rotation: bool, + repositioning: bool, + ); + fn set_hierarchical_clustering_init_mode( + self: Pin<&mut Property>, + hierarchical_clustering_init_mode: i32, + ); + fn set_number_of_first_objects(self: Pin<&mut Property>, number_of_first_objects: usize); + fn set_number_of_first_clusters(self: Pin<&mut Property>, number_of_first_clusters: usize); + fn set_number_of_second_objects(self: Pin<&mut Property>, number_of_second_objects: usize); + fn set_number_of_second_clusters( + self: Pin<&mut Property>, + number_of_second_clusters: usize, + ); + fn set_number_of_third_clusters(self: Pin<&mut Property>, number_of_third_clusters: usize); + fn set_number_of_objects(self: Pin<&mut Property>, number_of_objects: usize); + fn set_number_of_subvectors_for_bp(self: Pin<&mut Property>, number_of_subvectors: usize); + fn set_optimization_clustering_init_mode( + self: Pin<&mut Property>, + optimization_clustering_init_mode: i32, + ); + fn set_rotation_iteration(self: Pin<&mut Property>, rotation_iteration: usize); + fn set_subvector_iteration(self: Pin<&mut Property>, subvector_iteration: usize); + fn set_number_of_matrices(self: Pin<&mut Property>, number_of_matrices: usize); + fn set_rotation(self: Pin<&mut Property>, rotation: bool); + fn set_repositioning(self: Pin<&mut Property>, repositioning: bool); + + type SearchResult; + fn get_id(self: Pin<&mut SearchResult>) -> u32; + fn get_distance(self: Pin<&mut SearchResult>) -> f32; + + type Index; + fn new_index(path: &String, p: Pin<&mut Property>) -> Result>; + fn new_prebuilt_index(path: &String, p: bool) -> Result>; + fn open_index(self: Pin<&mut Index>, path: &String, prebuilt: bool) -> Result<()>; + fn build_index(self: Pin<&mut Index>, path: &String, p: Pin<&mut Property>) -> Result<()>; + fn save_index(self: Pin<&mut Index>) -> Result<()>; + fn close_index(self: Pin<&mut Index>); + fn append(self: Pin<&mut Index>, v: &[f32]) -> Result; + fn insert(self: Pin<&mut Index>, v: &[f32]) -> Result; + fn remove(self: Pin<&mut Index>, id: usize) -> Result<()>; + fn search( + self: Pin<&mut Index>, + v: &[f32], + k: usize, + radius: f32, + epsilon: f32, + ) -> UniquePtr>; + fn get_object(self: Pin<&mut Index>, id: usize) -> Result<*mut f32>; + fn get_dimension(self: Pin<&mut Index>) -> Result; + } +} + +pub mod property { + use super::ffi; + use cxx::UniquePtr; + use std::pin::Pin; + + pub struct Property { + inner: UniquePtr, + } + + impl Property { + pub fn new() -> Self { + let inner = ffi::new_property(); + Property { inner } + } + + pub fn get_property(&mut self) -> Pin<&mut ffi::Property> { + self.inner.pin_mut() + } + + pub fn init_qbg_construction_parameters(&mut self) { + self.inner.pin_mut().init_qbg_construction_parameters() + } + + pub fn set_qbg_construction_parameters( + &mut self, + extended_dimension: usize, + dimension: usize, + number_of_subvectors: usize, + number_of_blobs: usize, + internal_data_type: i32, + data_type: i32, + distance_type: i32, + ) { + self.inner.pin_mut().set_qbg_construction_parameters( + extended_dimension, + dimension, + number_of_subvectors, + number_of_blobs, + internal_data_type, + data_type, + distance_type, + ) + } + + pub fn set_extended_dimension(&mut self, extended_dimension: usize) { + self.inner + .pin_mut() + .set_extended_dimension(extended_dimension) + } + + pub fn set_dimension(&mut self, dimension: usize) { + self.inner.pin_mut().set_dimension(dimension) + } + + pub fn set_number_of_subvectors(&mut self, number_of_subvectors: usize) { + self.inner + .pin_mut() + .set_number_of_subvectors(number_of_subvectors) + } + + pub fn set_number_of_blobs(&mut self, number_of_blobs: usize) { + self.inner.pin_mut().set_number_of_blobs(number_of_blobs) + } + + pub fn set_internal_data_type(&mut self, internal_data_type: i32) { + self.inner + .pin_mut() + .set_internal_data_type(internal_data_type) + } + + pub fn set_data_type(&mut self, data_type: i32) { + self.inner.pin_mut().set_data_type(data_type) + } + + pub fn set_distance_type(&mut self, distance_type: i32) { + self.inner.pin_mut().set_distance_type(distance_type) + } + + pub fn init_qbg_build_parameters(&mut self) { + self.inner.pin_mut().init_qbg_build_parameters() + } + + pub fn set_qbg_build_parameters( + &mut self, + hierarchical_clustering_init_mode: i32, + number_of_first_objects: usize, + number_of_first_clusters: usize, + number_of_second_objects: usize, + number_of_second_clusters: usize, + number_of_third_clusters: usize, + number_of_objects: usize, + number_of_subvectors: usize, + optimization_clustering_init_mode: i32, + rotation_iteration: usize, + subvector_iteration: usize, + number_of_matrices: usize, + rotation: bool, + repositioning: bool, + ) { + self.inner.pin_mut().set_qbg_build_parameters( + hierarchical_clustering_init_mode, + number_of_first_objects, + number_of_first_clusters, + number_of_second_objects, + number_of_second_clusters, + number_of_third_clusters, + number_of_objects, + number_of_subvectors, + optimization_clustering_init_mode, + rotation_iteration, + subvector_iteration, + number_of_matrices, + rotation, + repositioning, + ) + } + + pub fn set_hierarchical_clustering_init_mode( + &mut self, + hierarchical_clustering_init_mode: i32, + ) { + self.inner + .pin_mut() + .set_hierarchical_clustering_init_mode(hierarchical_clustering_init_mode) + } + + pub fn set_number_of_first_objects(&mut self, number_of_first_objects: usize) { + self.inner + .pin_mut() + .set_number_of_first_objects(number_of_first_objects) + } + + pub fn set_number_of_first_clusters(&mut self, number_of_first_clusters: usize) { + self.inner + .pin_mut() + .set_number_of_first_clusters(number_of_first_clusters) + } + + pub fn set_number_of_second_objects(&mut self, number_of_second_objects: usize) { + self.inner + .pin_mut() + .set_number_of_second_objects(number_of_second_objects) + } + + pub fn set_number_of_second_clusters(&mut self, number_of_second_clusters: usize) { + self.inner + .pin_mut() + .set_number_of_second_clusters(number_of_second_clusters) + } + + pub fn set_number_of_third_clusters(&mut self, number_of_third_clusters: usize) { + self.inner + .pin_mut() + .set_number_of_third_clusters(number_of_third_clusters) + } + + pub fn set_number_of_objects(&mut self, number_of_objects: usize) { + self.inner + .pin_mut() + .set_number_of_objects(number_of_objects) + } + + pub fn set_number_of_subvectors_for_bp(&mut self, number_of_subvectors: usize) { + self.inner + .pin_mut() + .set_number_of_subvectors_for_bp(number_of_subvectors) + } + + pub fn set_optimization_clustering_init_mode( + &mut self, + optimization_clustering_init_mode: i32, + ) { + self.inner + .pin_mut() + .set_optimization_clustering_init_mode(optimization_clustering_init_mode) + } + + pub fn set_rotation_iteration(&mut self, rotation_iteration: usize) { + self.inner + .pin_mut() + .set_rotation_iteration(rotation_iteration) + } + + pub fn set_subvector_iteration(&mut self, subvector_iteration: usize) { + self.inner + .pin_mut() + .set_subvector_iteration(subvector_iteration) + } + + pub fn set_number_of_matrices(&mut self, number_of_matrices: usize) { + self.inner + .pin_mut() + .set_number_of_matrices(number_of_matrices) + } + + pub fn set_rotation(&mut self, rotation: bool) { + self.inner.pin_mut().set_rotation(rotation) + } + + pub fn set_repositioning(&mut self, repositioning: bool) { + self.inner.pin_mut().set_repositioning(repositioning) + } + } +} + +pub mod index { + use super::ffi; + use super::property; + use core::slice; + use cxx::UniquePtr; + + pub struct Index { + inner: UniquePtr, + } + + impl Index { + pub fn new(path: &String, p: &mut property::Property) -> Result { + let inner = ffi::new_index(path, p.get_property())?; + Ok(Index { inner }) + } + + pub fn new_prebuilt(path: &String, p: bool) -> Result { + let inner = ffi::new_prebuilt_index(path, p)?; + Ok(Index { inner }) + } + + pub fn open_index(&mut self, path: &String, prebuilt: bool) -> Result<(), cxx::Exception> { + self.inner.pin_mut().open_index(path, prebuilt) + } + + pub fn build_index( + &mut self, + path: &String, + p: &mut property::Property, + ) -> Result<(), cxx::Exception> { + self.inner.pin_mut().build_index(path, p.get_property()) + } + + pub fn save_index(&mut self) -> Result<(), cxx::Exception> { + self.inner.pin_mut().save_index() + } + + pub fn close_index(&mut self) { + self.inner.pin_mut().close_index() + } + + pub fn append(&mut self, v: &[f32]) -> Result { + self.inner.pin_mut().append(v) + } + + pub fn insert(&mut self, v: &[f32]) -> Result { + self.inner.pin_mut().insert(v) + } + + pub fn remove(&mut self, id: usize) -> Result<(), cxx::Exception> { + self.inner.pin_mut().remove(id) + } + + pub fn search( + &mut self, + v: &[f32], + k: usize, + radius: f32, + epsilon: f32, + ) -> Result, cxx::Exception> { + let mut search_results = self.inner.pin_mut().search(v, k, radius, epsilon); + Ok(search_results + .pin_mut() + .into_iter() + .map(|mut s| (s.as_mut().get_id(), s.as_mut().get_distance())) + .collect()) + } + + pub fn get_object(&mut self, id: usize) -> Result<&[f32], cxx::Exception> { + let dim = self.inner.pin_mut().get_dimension()?; + match self.inner.pin_mut().get_object(id) { + Ok(v) => Ok(unsafe { slice::from_raw_parts(v, dim) }), + Err(e) => Err(e), + } + } + + pub fn get_dimension(&mut self) -> Result { + self.inner.pin_mut().get_dimension() + } + } +} + +#[cfg(test)] +mod tests { + use crate::{ffi, index::Index, property::Property}; + use anyhow::Result; + + const DIMENSION: usize = 128; + const K: usize = 30; + const RADIUS: f32 = 0.0; + const EPSILON: f32 = 0.1; + + #[test] + fn test_ffi_qbg() -> Result<()> { + // New + println!("create an empty index..."); + let path: String = "index".to_string(); + let mut p = ffi::new_property(); + ////////// Test Setter ////////// + p.pin_mut().set_extended_dimension(1); + p.pin_mut().set_dimension(1); + p.pin_mut().set_number_of_subvectors(1); + p.pin_mut().set_number_of_blobs(1); + p.pin_mut().set_internal_data_type(1); + p.pin_mut().set_data_type(1); + p.pin_mut().set_distance_type(1); + p.pin_mut().set_hierarchical_clustering_init_mode(1); + p.pin_mut().set_number_of_first_objects(1); + p.pin_mut().set_number_of_first_clusters(1); + p.pin_mut().set_number_of_second_objects(1); + p.pin_mut().set_number_of_second_clusters(1); + p.pin_mut().set_number_of_third_clusters(1); + p.pin_mut().set_number_of_objects(1); + p.pin_mut().set_number_of_subvectors_for_bp(1); + p.pin_mut().set_optimization_clustering_init_mode(1); + p.pin_mut().set_rotation_iteration(1); + p.pin_mut().set_subvector_iteration(1); + p.pin_mut().set_number_of_matrices(1); + p.pin_mut().set_rotation(false); + p.pin_mut().set_repositioning(false); + ////////// /////////// ////////// + p.pin_mut().init_qbg_construction_parameters(); + p.pin_mut().set_dimension(DIMENSION); + p.pin_mut().set_number_of_subvectors(64); + p.pin_mut().set_number_of_blobs(0); + p.pin_mut().init_qbg_build_parameters(); + p.pin_mut().set_number_of_objects(500); + let mut index = ffi::new_index(&path, p.pin_mut()).unwrap(); + + // Append + println!("append objects..."); + for i in 0..100 { + let vec: Vec = (0..DIMENSION).into_iter().map(|x| (x + i) as f32).collect(); + let id = index.pin_mut().append(vec.as_slice()).unwrap(); + assert_eq!((i + 1) as i32, id) + } + index.pin_mut().save_index().unwrap(); + index.pin_mut().close_index(); + + // Build + println!("building the index..."); + index.pin_mut().build_index(&path, p.pin_mut()).unwrap(); + index.pin_mut().open_index(&path, true).unwrap(); + + // Insert + for i in 0..100 { + let vec: Vec = (0..DIMENSION).into_iter().map(|x| (x + i) as f32).collect(); + let id = index.pin_mut().insert(vec.as_slice()).unwrap(); + assert_eq!((i + 1 + 100) as i32, id) + } + + // Get Object + let vec = index.pin_mut().get_object(1).unwrap(); + println!("vec:\n\t{:?}", vec); + + // Get Dimension + let dim = index.pin_mut().get_dimension().unwrap(); + println!("dimension:\n\t{:?}", dim); + + // Search + println!("search the index for the specified query..."); + let vec: Vec = (0..DIMENSION).into_iter().map(|i| i as f32).collect(); + let mut search_results = index.pin_mut().search(vec.as_slice(), K, RADIUS, EPSILON); + let ids: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_id()) + .collect(); + let distances: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_distance()) + .collect(); + println!("ids:\n\t{:?}", ids); + println!("distances:\n\t{:?}", distances); + + // Remove + index.pin_mut().remove(1).unwrap(); + let vec: Vec = (0..DIMENSION).into_iter().map(|i| i as f32).collect(); + let mut search_results = index.pin_mut().search(vec.as_slice(), K, RADIUS, EPSILON); + let ids: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_id()) + .collect(); + let distances: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_distance()) + .collect(); + println!("ids:\n\t{:?}", ids); + println!("distances:\n\t{:?}", distances); + + index.pin_mut().close_index(); + + Ok(()) + } + + #[test] + fn test_ffi_qbg_prebuilt() -> Result<()> { + // New + let path = "index".to_string(); + let mut index = ffi::new_prebuilt_index(&path, true).unwrap(); + + // Insert + for i in 0..100 { + let vec: Vec = (0..DIMENSION).into_iter().map(|x| (x + i) as f32).collect(); + let id = index.pin_mut().insert(vec.as_slice()).unwrap(); + assert_eq!((i + 1 + 100) as i32, id) + } + + // Get Object + let vec = index.pin_mut().get_object(1).unwrap(); + println!("vec:\n\t{:?}", vec); + + // Get Dimension + let dim = index.pin_mut().get_dimension().unwrap(); + println!("dimension:\n\t{:?}", dim); + + // Search + let vec: Vec = (0..DIMENSION).into_iter().map(|i| i as f32).collect(); + let mut search_results = index.pin_mut().search(vec.as_slice(), K, RADIUS, EPSILON); + let ids: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_id()) + .collect(); + let distances: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_distance()) + .collect(); + println!("ids:\n\t{:?}", ids); + println!("distances:\n\t{:?}", distances); + + // Remove + index.pin_mut().remove(1).unwrap(); + let vec: Vec = (0..DIMENSION).into_iter().map(|i| i as f32).collect(); + let mut search_results = index.pin_mut().search(vec.as_slice(), K, RADIUS, EPSILON); + let ids: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_id()) + .collect(); + let distances: Vec = search_results + .pin_mut() + .into_iter() + .map(|s| s.get_distance()) + .collect(); + println!("ids:\n\t{:?}", ids); + println!("distances:\n\t{:?}", distances); + + index.pin_mut().close_index(); + + Ok(()) + } + + #[test] + fn test_property() -> Result<()> { + let mut p = Property::new(); + p.init_qbg_construction_parameters(); + p.set_qbg_construction_parameters(1, 1, 1, 1, 1, 1, 1); + p.set_extended_dimension(1); + p.set_dimension(1); + p.set_number_of_subvectors(1); + p.set_number_of_blobs(1); + p.set_internal_data_type(1); + p.set_data_type(1); + p.set_distance_type(1); + p.init_qbg_build_parameters(); + p.set_qbg_build_parameters(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, true, false); + p.set_hierarchical_clustering_init_mode(1); + p.set_number_of_first_objects(1); + p.set_number_of_first_clusters(1); + p.set_number_of_second_objects(1); + p.set_number_of_second_clusters(1); + p.set_number_of_third_clusters(1); + p.set_number_of_objects(1); + p.set_number_of_subvectors_for_bp(1); + p.set_optimization_clustering_init_mode(1); + p.set_rotation_iteration(1); + p.set_subvector_iteration(1); + p.set_number_of_matrices(1); + p.set_rotation(false); + p.set_repositioning(false); + + Ok(()) + } + + #[test] + fn test_index() -> Result<()> { + // New + println!("create an empty index..."); + let path: String = "index".to_string(); + let mut p = Property::new(); + p.init_qbg_construction_parameters(); + p.set_dimension(DIMENSION); + p.set_number_of_subvectors(64); + p.set_number_of_blobs(0); + p.init_qbg_build_parameters(); + p.set_number_of_objects(500); + let mut index = Index::new(&path, &mut p).unwrap(); + + // Append + println!("append objects..."); + for i in 0..100 { + let vec: Vec = (0..DIMENSION).into_iter().map(|x| (x + i) as f32).collect(); + let id = index.append(vec.as_slice()).unwrap(); + assert_eq!((i + 1) as i32, id) + } + index.save_index().unwrap(); + index.close_index(); + + // Build + println!("building the index..."); + index.build_index(&path, &mut p).unwrap(); + index.open_index(&path, true).unwrap(); + + // Insert + for i in 0..100 { + let vec: Vec = (0..DIMENSION).into_iter().map(|x| (x + i) as f32).collect(); + let id = index.insert(vec.as_slice()).unwrap(); + assert_eq!((i + 1 + 100) as i32, id) + } + + // Get Object + let vec = index.get_object(1).unwrap(); + println!("vec:\n\t{:?}", vec); + + // Get Dimension + let dim = index.get_dimension().unwrap(); + println!("dimension:\n\t{:?}", dim); + + // Search + println!("search the index for the specified query..."); + let vec: Vec = (0..DIMENSION).into_iter().map(|i| i as f32).collect(); + let search_results = index.search(vec.as_slice(), K, RADIUS, EPSILON).unwrap(); + let ids: Vec = search_results.iter().map(|s| s.0).collect(); + let distances: Vec = search_results.iter().map(|s| s.1).collect(); + println!("search results:\n\t{:?}", search_results); + println!("ids:\n\t{:?}", ids); + println!("distances:\n\t{:?}", distances); + + // Remove + index.remove(1).unwrap(); + let vec: Vec = (0..DIMENSION).into_iter().map(|i| i as f32).collect(); + let search_results = index.search(vec.as_slice(), K, RADIUS, EPSILON).unwrap(); + let ids: Vec = search_results.iter().map(|s| s.0).collect(); + let distances: Vec = search_results.iter().map(|s| s.1).collect(); + println!("search results:\n\t{:?}", search_results); + println!("ids:\n\t{:?}", ids); + println!("distances:\n\t{:?}", distances); + + index.close_index(); + + Ok(()) + } +}