Skip to content

Commit

Permalink
storage: Update usearch and simsimd (pingcap#270)
Browse files Browse the repository at this point in the history
Signed-off-by: Wish <[email protected]>
  • Loading branch information
breezewish authored Aug 20, 2024
1 parent 0b1f21e commit 3e333fe
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 22 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ jobs:
run: |
chmod +x /tmp/gtests_dbms
cd /tmp
/tmp/gtests_dbms --gtest_filter="Vector*:FileCacheTest*" 2>/tmp/gtests_dbms.log
/tmp/gtests_dbms --gtest_filter="Vector*:FileCacheTest*:DeltaMergeStoreVectorTest*" 2>/tmp/gtests_dbms.log
- name: Detailed Test Log
if: failure()
Expand All @@ -126,6 +126,6 @@ jobs:
echo "Download test log: ${{ vars.MINIO_ENDPOINT }}/$LOG_PATH"
- name: Clean up artifact
if: always()
if: success()
run: |
mc rm minio_server/tiflash-cse-ci/ci_unit_test_artifact/${{ github.run_id }}/gtests_dbms
2 changes: 1 addition & 1 deletion contrib/usearch
Submodule usearch updated 64 files
+39 −7 .github/workflows/prerelease.yml
+37 −19 .github/workflows/release.yml
+0 −19 .github/workflows/update_version.sh
+6 −4 .gitignore
+13 −0 .vscode/launch.json
+11 −1 .vscode/settings.json
+2 −2 .vscode/tasks.json
+14 −14 BENCHMARKS.md
+1 −1 CITATION.cff
+8 −8 CMakeLists.txt
+59 −27 CONTRIBUTING.md
+15 −15 Cargo.lock
+1 −1 Cargo.toml
+1 −0 MANIFEST.in
+98 −28 README.md
+1 −1 VERSION
+11 −12 binding.gyp
+5 −2 build.gradle
+12 −1 build.rs
+3 −0 c/README.md
+50 −13 c/lib.cpp
+60 −55 c/test.c
+23 −4 c/usearch.h
+1 −1 conanfile.py
+34 −13 cpp/README.md
+99 −47 cpp/bench.cpp
+649 −176 cpp/test.cpp
+1 −1 csharp/nuget/nuget-package.props
+10 −10 csharp/src/Cloud.Unum.USearch.Tests/USearchIndexTests.cs
+6 −44 csharp/src/Cloud.Unum.USearch/NativeMethods.cs
+295 −60 csharp/src/Cloud.Unum.USearch/USearchIndex.cs
+117 −8 csharp/src/Cloud.Unum.USearch/USearchTypes.cs
+1 −1 docs/index.rst
+0 −5 docs/java/index.rst
+0 −5 docs/javascript/index.rst
+35 −14 golang/README.md
+32 −24 golang/lib.go
+857 −264 include/usearch/index.hpp
+412 −156 include/usearch/index_dense.hpp
+84 −39 include/usearch/index_plugins.hpp
+56 −5 java/README.md
+126 −36 java/cloud/unum/usearch/Index.java
+3 −0 java/cloud/unum/usearch/NativeUtils.java
+38 −2 java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
+16 −0 java/cloud/unum/usearch/cloud_unum_usearch_Index.h
+57 −3 java/test/IndexTest.java
+3 −2 javascript/tsconfig-esm.json
+8 −7 javascript/usearch.ts
+6 −2 objc/USearchObjective.mm
+0 −10 package-ci.json
+2 −2 package.json
+12 −7 python/lib.cpp
+8 −0 python/scripts/test_index.py
+119 −0 python/scripts/test_jit.py
+0 −0 python/usearch/py.typed
+42 −29 rust/lib.cpp
+1 −1 rust/lib.hpp
+53 −5 rust/lib.rs
+10 −0 setup.py
+1 −1 simsimd
+30 −0 swift/Test.swift
+1 −1 wasmer.toml
+44 −4 wolfram/CMakeLists.txt
+30 −19 wolfram/lib.cpp
10 changes: 6 additions & 4 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,7 @@ void VectorIndexHNSWBuilder::addBlock(

const auto * del_mark_data = (!del_mark) ? nullptr : &(del_mark->getData());

if (!index.reserve(unum::usearch::ceil2(index.size() + column.size())))
{
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for HNSW index");
}
index.reserve(unum::usearch::ceil2(index.size() + column.size()));

Stopwatch w;
SCOPE_EXIT({ total_duration += w.elapsedSeconds(); });
Expand Down Expand Up @@ -165,6 +162,7 @@ VectorIndexViewerPtr VectorIndexHNSWViewer::view(const dtpb::VectorIndexFileProp
SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_view).Observe(w.elapsedSeconds()); });

auto vi = std::make_shared<VectorIndexHNSWViewer>(file_props);

vi->index = USearchImplType::make(
unum::usearch::metric_punned_t( //
file_props.dimensions(),
Expand All @@ -174,6 +172,10 @@ VectorIndexViewerPtr VectorIndexHNSWViewer::view(const dtpb::VectorIndexFileProp
unum::usearch::default_expansion_add(),
16 /* default is 64 */));

// Currently may have a lot of threads querying concurrently
auto limit = unum::usearch::index_limits_t(0, /* threads */ std::thread::hardware_concurrency() * 10);
vi->index.reserve(limit);

auto result = vi->index.view(unum::usearch::memory_mapped_file_t(path.data()));
RUNTIME_CHECK_MSG(result, "Failed to load vector index: {}", result.error.what());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ try

auto filter = std::make_shared<PushDownFilter>(wrapWithANNQueryInfo(nullptr, ann_query_info));

read(left_segment_range, filter, createVecFloat32Column<Array>({})); // FIXME: should be 63.0
read(left_segment_range, filter, createVecFloat32Column<Array>({{63.0}}));
}

// merge segment
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/TiDB/Decode/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <TiDB/Decode/Vector.h>
#include <VectorSearch/simdsimd-internals.h>
#include <VectorSearch/SimSIMD.h>

#include <compare>

Expand Down
40 changes: 31 additions & 9 deletions dbms/src/VectorSearch/DistanceSIMDFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// polluting all compile units.

#include <VectorSearch/DistanceSIMDFeatures.h>
#include <VectorSearch/simdsimd-internals.h>
#include <VectorSearch/SimSIMD.h>

namespace simsimd_details
{
Expand Down Expand Up @@ -52,18 +52,40 @@ std::vector<std::string> VectorDistanceSIMDFeatures::get()
simsimd_capability_t cap_l2 = simsimd_details::actual_capability(simsimd_datatype_f32_k, simsimd_metric_l2sq_k);
simsimd_capability_t cap_cos = simsimd_details::actual_capability(simsimd_datatype_f32_k, simsimd_metric_cos_k);

auto cap_to_string = [](simsimd_capability_t cap) -> std::string {
if (cap & simsimd_cap_neon_k)
auto cap_to_string = [](simsimd_capability_t isa_kind) -> std::string {
switch (isa_kind)
{
case simsimd_cap_serial_k:
return "serial";
case simsimd_cap_neon_k:
return "neon";
if (cap & simsimd_cap_sve_k)
case simsimd_cap_neon_i8_k:
return "neon_i8";
case simsimd_cap_neon_f16_k:
return "neon_f16";
case simsimd_cap_neon_bf16_k:
return "neon_bf16";
case simsimd_cap_sve_k:
return "sve";
if (cap & simsimd_cap_sve2_k)
return "sve2";
if (cap & simsimd_cap_haswell_k)
case simsimd_cap_sve_i8_k:
return "sve_i8";
case simsimd_cap_sve_f16_k:
return "sve_f16";
case simsimd_cap_sve_bf16_k:
return "sve_bf16";
case simsimd_cap_haswell_k:
return "haswell";
if (cap & simsimd_cap_skylake_k)
case simsimd_cap_skylake_k:
return "skylake";
return "serial";
case simsimd_cap_ice_k:
return "ice";
case simsimd_cap_genoa_k:
return "genoa";
case simsimd_cap_sapphire_k:
return "sapphire";
default:
return "unknown";
}
};

std::vector<std::string> ret{};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,11 @@
#define SIMSIMD_TARGET_SVE 0 // Clang13's header does not support enableing SVE for region
#define SIMSIMD_TARGET_HASWELL 1
#define SIMSIMD_TARGET_SKYLAKE 1
#define SIMSIMD_TARGET_ICE 0
#define SIMSIMD_TARGET_ICE 1
#define SIMSIMD_TARGET_GENOA 0
#define SIMSIMD_TARGET_SAPPHIRE 0
#include <simsimd/simsimd.h>


namespace simsimd_details
{

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/VectorSearch/USearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#define SIMSIMD_TARGET_SVE 0 // Clang13's header does not support enableing SVE for region
#define SIMSIMD_TARGET_HASWELL 1
#define SIMSIMD_TARGET_SKYLAKE 1
#define SIMSIMD_TARGET_ICE 0
#define SIMSIMD_TARGET_ICE 1
#define SIMSIMD_TARGET_GENOA 0
#define SIMSIMD_TARGET_SAPPHIRE 0

Expand Down

0 comments on commit 3e333fe

Please sign in to comment.