diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt new file mode 100644 index 00000000..fc7aed52 --- /dev/null +++ b/c/CMakeLists.txt @@ -0,0 +1,10 @@ +set(USEARCH_PUNNED_INCLUDE_DIRS + "${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_SOURCE_DIR}/../fp16/include" + "${CMAKE_CURRENT_SOURCE_DIR}/../robin-map/include" +) + +add_executable(usearch lib.cpp) +target_include_directories(usearch PRIVATE ${USEARCH_PUNNED_INCLUDE_DIRS}) +set_target_properties(usearch PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/c) +set_target_properties(usearch PROPERTIES CXX_STANDARD 11) diff --git a/docs/compilation.md b/docs/compilation.md index 3e8b9bda..ecca1b00 100644 --- a/docs/compilation.md +++ b/docs/compilation.md @@ -150,7 +150,20 @@ swift build swift test -v ``` -## C99 +## C 99 + +There are a few ways to compile the C 99 USearch SDK. +Using the Makefile: + +```sh +make -C ./c build +``` + +Using CMake: + +```sh +cmake -B ./build_release -DUSEARCH_BUILD_C=1 && make -C ./build_release -j +``` Linux: @@ -166,6 +179,7 @@ So one should first compile the C library, link it with GoLang, and only then ru ```sh cd golang +make -C ../c build && mv ../c/libusearch.so libusearch.so go test -v ``` diff --git a/golang/lib_test.go b/golang/lib_test.go index cf8e3a69..ab2ad332 100644 --- a/golang/lib_test.go +++ b/golang/lib_test.go @@ -5,65 +5,101 @@ import ( "testing" ) -func Assure(err error) { - if err != nil { - panic(err) - } -} func TestUSearch(t *testing.T) { runtime.LockOSThread() - // Initialize - dim := uint(128) - conf := DefaultConfig(dim) - ind, err := NewIndex(conf) - if err != nil { - t.Fatalf("Couldn't construct the index: %s", err) - } - defer ind.Destroy() - - found_dims, err := ind.Dimensions() - if err != nil { - t.Fatalf("Couldn't retrieve dimensions: %s", err) - } - if found_dims != dim { - t.Fatalf("Wrong number of dimensions") - } - - found_len, err := ind.Len() - if err != nil { - t.Fatalf("Couldn't retrieve size: %s", err) - } - if found_len != 0 { - t.Fatalf("Wrong size") - } - err = ind.Reserve(100) - if err != nil { - t.Fatalf("Couldn't reserve capacity: %s", err) - } - - // Insert - vec := make([]float32, dim) - vec[0] = 40.0 - vec[1] = 2.0 - err = ind.Add(42, vec) - if err != nil { - t.Fatalf("Couldn't insert: %s", err) - } - found_len, err = ind.Len() - if err != nil { - t.Fatalf("Couldn't retrieve size: %s", err) - } - if found_len != 1 { - t.Fatalf("Wrong size") - } - - // Search - keys, distances, err := ind.Search(vec, 10) - if err != nil { - t.Fatalf("Couldn't search: %s", err) - } - if keys[0] != 42 || distances[0] != 0.0 { - t.Fatalf("Expected result 42") - } + t.Run("Test Index Initialization", func(t *testing.T) { + dim := uint(128) + conf := DefaultConfig(dim) + ind, err := NewIndex(conf) + if err != nil { + t.Fatalf("Failed to construct the index: %s", err) + } + defer ind.Destroy() + + found_dims, err := ind.Dimensions() + if err != nil { + t.Fatalf("Failed to retrieve dimensions: %s", err) + } + if found_dims != dim { + t.Fatalf("Expected %d dimensions, got %d", dim, found_dims) + } + + found_len, err := ind.Len() + if err != nil { + t.Fatalf("Failed to retrieve size: %s", err) + } + if found_len != 0 { + t.Fatalf("Expected size to be 0, got %d", found_len) + } + + err = ind.Reserve(100) + if err != nil { + t.Fatalf("Failed to reserve capacity: %s", err) + } + }) + + t.Run("Test Insertion", func(t *testing.T) { + dim := uint(128) + conf := DefaultConfig(dim) + ind, err := NewIndex(conf) + if err != nil { + t.Fatalf("Failed to construct the index: %s", err) + } + defer ind.Destroy() + + err = ind.Reserve(100) + if err != nil { + t.Fatalf("Failed to reserve capacity: %s", err) + } + + vec := make([]float32, dim) + vec[0] = 40.0 + vec[1] = 2.0 + + err = ind.Add(42, vec) + if err != nil { + t.Fatalf("Failed to insert: %s", err) + } + + found_len, err := ind.Len() + if err != nil { + t.Fatalf("Failed to retrieve size after insertion: %s", err) + } + if found_len != 1 { + t.Fatalf("Expected size to be 1, got %d", found_len) + } + }) + + t.Run("Test Search", func(t *testing.T) { + dim := uint(128) + conf := DefaultConfig(dim) + ind, err := NewIndex(conf) + if err != nil { + t.Fatalf("Failed to construct the index: %s", err) + } + defer ind.Destroy() + + err = ind.Reserve(100) + if err != nil { + t.Fatalf("Failed to reserve capacity: %s", err) + } + + vec := make([]float32, dim) + vec[0] = 40.0 + vec[1] = 2.0 + + err = ind.Add(42, vec) + if err != nil { + t.Fatalf("Failed to insert: %s", err) + } + + keys, distances, err := ind.Search(vec, 10) + if err != nil { + t.Fatalf("Failed to search: %s", err) + } + if keys[0] != 42 || distances[0] != 0.0 { + t.Fatalf("Expected result 42 with distance 0, got key %d with distance %f", keys[0], distances[0]) + } + }) } diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp index 5063f0c1..2b4fe008 100644 --- a/include/usearch/index_dense.hpp +++ b/include/usearch/index_dense.hpp @@ -60,7 +60,7 @@ struct index_dense_head_t { misaligned_ref_gt kind_key; misaligned_ref_gt kind_compressed_slot; - // Populational: 8 * 3 = 24 bytes + // Population: 8 * 3 = 24 bytes misaligned_ref_gt count_present; misaligned_ref_gt count_deleted; misaligned_ref_gt dimensions; @@ -171,7 +171,7 @@ inline index_dense_metadata_result_t index_dense_metadata(char const* file_path) if (!read) return result.failed(std::feof(file.get()) ? "End of file reached!" : std::strerror(errno)); - // Check if the file immeditely starts with the index, instead of vectors + // Check if the file immediately starts with the index, instead of vectors result.config.exclude_vectors = true; if (std::memcmp(result.head_buffer, default_magic(), std::strlen(default_magic())) == 0) return result; @@ -481,7 +481,7 @@ class index_dense_gt { std::size_t scalar_words() const noexcept { return metric_.scalar_words(); } std::size_t dimensions() const noexcept { return metric_.dimensions(); } - // Fetching and changing search critereas: + // Fetching and changing search criteria std::size_t expansion_add() const { return config_.expansion_add; } std::size_t expansion_search() const { return config_.expansion_search; } void change_expansion_add(std::size_t n) { config_.expansion_add = n; } @@ -497,6 +497,12 @@ class index_dense_gt { stats_t stats() const { return typed_->stats(); } stats_t stats(std::size_t level) const { return typed_->stats(level); } + /** + * @brief A relatively accurate lower bound on the amount of memory consumed by the system. + * In practice it's error will be below 10%. + * + * @see `stream_length` for the length of the binary serialized representation. + */ std::size_t memory_usage() const { return // typed_->memory_usage(0) + // @@ -596,10 +602,12 @@ class index_dense_gt { serialization_result_t save(output_file_t file, serialization_config_t config = {}) const { serialization_result_t result = file.open_if_not(); if (result) - stream([&](void* buffer, std::size_t length) { - result = file.write(buffer, length); - return !!result; - }); + stream( + [&](void* buffer, std::size_t length) { + result = file.write(buffer, length); + return !!result; + }, + config); return result; } @@ -849,7 +857,7 @@ class index_dense_gt { } /** - * @brief Checks if a vector with specidied key is present. + * @brief Checks if a vector with specified key is present. * @return `true` if the key is present in the index, `false` otherwise. */ bool contains(key_t key) const { @@ -858,10 +866,10 @@ class index_dense_gt { } /** - * @brief Checks if a vector with specidied key is present. - * @return `true` if the key is present in the index, `false` otherwise. + * @brief Count the number of vectors with specified key present. + * @return Zero if nothing is found, a positive integer otherwise. */ - bool count(key_t key) const { + std::size_t count(key_t key) const { shared_lock_t lock(slot_lookup_mutex_); return slot_lookup_.count(key); } diff --git a/javascript/test.js b/javascript/test.js index ace2ea81..a6332ca8 100644 --- a/javascript/test.js +++ b/javascript/test.js @@ -1,30 +1,31 @@ var assert = require('assert'); var usearch = require('bindings')('usearch'); -var index = new usearch.Index({ metric: 'l2sq', connectivity: 16n, dimensions: 2n }) -assert.equal(index.connectivity(), 16) -assert.equal(index.dimensions(), 2) -assert.equal(index.size(), 0) +// Single-entry operations -index.add(15n, new Float32Array([10, 20])) -index.add(16n, new Float32Array([10, 25])) -assert.equal(index.size(), 2) +var index = new usearch.Index({ metric: 'l2sq', connectivity: 16n, dimensions: 2n }); +assert.equal(index.connectivity(), 16n, 'connectivity should be 16'); +assert.equal(index.dimensions(), 2n, 'dimensions should be 2'); +assert.equal(index.size(), 0n, 'initial size should be 0'); -var results = index.search(new Float32Array([13, 14]), 2n) -assert.deepEqual(results.keys, new BigUint64Array([15n, 16n])) -assert.deepEqual(results.distances, new Float32Array([45, 130])) +index.add(15n, new Float32Array([10, 20])); +index.add(16n, new Float32Array([10, 25])); +assert.equal(index.size(), 2n, 'size after adding elements should be 2'); -// Batch +var results = index.search(new Float32Array([13, 14]), 2n); +assert.deepEqual(results.keys, new BigUint64Array([15n, 16n]), 'keys should be 15 and 16'); +assert.deepEqual(results.distances, new Float32Array([45, 130]), 'distances should be 45 and 130'); -var index2 = new usearch.Index({ metric: 'l2sq', connectivity: 16n, dimensions: 2n }) +// Batch operations -const keys = [15n, 16n] -const vectors = [new Float32Array([10, 20]), new Float32Array([10, 25])] -index2.add(keys, vectors) -assert.equal(index.size(), 2) +var indexBatch = new usearch.Index({ metric: 'l2sq', connectivity: 16n, dimensions: 2n }); +const keys = [15n, 16n]; +const vectors = [new Float32Array([10, 20]), new Float32Array([10, 25])]; +indexBatch.add(keys, vectors); +assert.equal(indexBatch.size(), 2, 'size after adding batch should be 2'); -var results = index.search(new Float32Array([13, 14]), 2n) -assert.deepEqual(results.keys, new BigUint64Array([15n, 16n])) -assert.deepEqual(results.distances, new Float32Array([45, 130])) +results = indexBatch.search(new Float32Array([13, 14]), 2n); +assert.deepEqual(results.keys, new BigUint64Array([15n, 16n]), 'keys should be 15 and 16'); +assert.deepEqual(results.distances, new Float32Array([45, 130]), 'distances should be 45 and 130'); -console.log('JavaScript tests passed!') \ No newline at end of file +console.log('JavaScript tests passed!'); diff --git a/python/lib.cpp b/python/lib.cpp index e3dc2512..41a73e60 100644 --- a/python/lib.cpp +++ b/python/lib.cpp @@ -129,8 +129,10 @@ metric_t typed_udf( return metric_t(stl_function, dimensions, kind, scalar_kind); } -metric_t udf(metric_kind_t kind, metric_signature_t signature, std::uintptr_t metric_uintptr, // - scalar_kind_t scalar_kind, std::size_t dimensions) { +metric_t udf( // + metric_kind_t kind, metric_signature_t signature, std::uintptr_t metric_uintptr, // + scalar_kind_t scalar_kind, std::size_t dimensions) { + switch (scalar_kind) { case scalar_kind_t::b1x8_k: return typed_udf(kind, signature, metric_uintptr, scalar_kind, dimensions); case scalar_kind_t::i8_k: return typed_udf(kind, signature, metric_uintptr, scalar_kind, dimensions); @@ -502,15 +504,20 @@ PYBIND11_MODULE(compiled, m) { py::enum_(m, "MetricKind") .value("Unknown", metric_kind_t::unknown_k) + .value("IP", metric_kind_t::ip_k) .value("Cos", metric_kind_t::cos_k) .value("L2sq", metric_kind_t::l2sq_k) + .value("Haversine", metric_kind_t::haversine_k) .value("Pearson", metric_kind_t::pearson_k) .value("Jaccard", metric_kind_t::jaccard_k) .value("Hamming", metric_kind_t::hamming_k) .value("Tanimoto", metric_kind_t::tanimoto_k) - .value("Sorensen", metric_kind_t::sorensen_k); + .value("Sorensen", metric_kind_t::sorensen_k) + + .value("Cosine", metric_kind_t::cos_k) + .value("InnerProduct", metric_kind_t::ip_k); py::enum_(m, "ScalarKind") .value("Unknown", scalar_kind_t::unknown_k) @@ -520,7 +527,7 @@ PYBIND11_MODULE(compiled, m) { .value("F64", scalar_kind_t::f64_k) .value("F32", scalar_kind_t::f32_k) .value("F16", scalar_kind_t::f16_k) - .value("I8", scalar_kind_t::i8_k) + .value("F8", scalar_kind_t::f8_k) .value("U64", scalar_kind_t::u64_k) .value("U32", scalar_kind_t::u32_k) .value("U16", scalar_kind_t::u16_k) diff --git a/python/scripts/test.py b/python/scripts/test.py index ee49952a..d0f7b446 100644 --- a/python/scripts/test.py +++ b/python/scripts/test.py @@ -123,8 +123,8 @@ def test_index( assert len({match.key for match in matches}) == 1, "Iteration over matches" assert matches[0].key == 42 assert matches[0].distance == pytest.approx(0, abs=1e-3) - assert matches.computed_distances <= 2 - assert matches.visited_members <= 2 + assert matches.computed_distances != 0 + assert matches.visited_members != 0 # Validating the index structure and metadata: assert index.max_level >= 0