From c549527feba03a3afe191af59b74e58a88576a32 Mon Sep 17 00:00:00 2001 From: Doo Yong Kim <0ctopus13prime@gmail.com> Date: Fri, 27 Sep 2024 12:39:21 -0700 Subject: [PATCH] Added NMSLIB patched allowing load/write APIs with a stream object. (#2144) Signed-off-by: Dooyong Kim Co-authored-by: Dooyong Kim (cherry picked from commit eba9d98f8fbc7c107b289ea9019f42f12045217d) Signed-off-by: John Mazanec --- jni/cmake/init-nmslib.cmake | 1 + ...is-using-stream-to-load-save-in-Hnsw.patch | 93 +++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 jni/patches/nmslib/0003-Adding-two-apis-using-stream-to-load-save-in-Hnsw.patch diff --git a/jni/cmake/init-nmslib.cmake b/jni/cmake/init-nmslib.cmake index b2c16f1fe..64df457c1 100644 --- a/jni/cmake/init-nmslib.cmake +++ b/jni/cmake/init-nmslib.cmake @@ -19,6 +19,7 @@ if(NOT DEFINED APPLY_LIB_PATCHES OR "${APPLY_LIB_PATCHES}" STREQUAL true) set(PATCH_FILE_LIST) list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0001-Initialize-maxlevel-during-add-from-enterpoint-level.patch") list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0002-Adds-ability-to-pass-ef-parameter-in-the-query-for-h.patch") + list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0003-Adding-two-apis-using-stream-to-load-save-in-Hnsw.patch") # Get patch id of the last commit execute_process(COMMAND sh -c "git --no-pager show HEAD | git patch-id --stable" OUTPUT_VARIABLE PATCH_ID_OUTPUT_FROM_COMMIT WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib) diff --git a/jni/patches/nmslib/0003-Adding-two-apis-using-stream-to-load-save-in-Hnsw.patch b/jni/patches/nmslib/0003-Adding-two-apis-using-stream-to-load-save-in-Hnsw.patch new file mode 100644 index 000000000..bbba329b4 --- /dev/null +++ b/jni/patches/nmslib/0003-Adding-two-apis-using-stream-to-load-save-in-Hnsw.patch @@ -0,0 +1,93 @@ +From 7e099ec111e5c9db4b243da249c73f0ecc206281 Mon Sep 17 00:00:00 2001 +From: Dooyong Kim +Date: Thu, 26 Sep 2024 15:20:53 -0700 +Subject: [PATCH] Adding two apis using stream to load/save in Hnsw. + +Signed-off-by: Dooyong Kim +--- + similarity_search/include/method/hnsw.h | 4 +++ + similarity_search/src/method/hnsw.cc | 44 +++++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + +diff --git a/similarity_search/include/method/hnsw.h b/similarity_search/include/method/hnsw.h +index 57d99d0..7ff3f3d 100644 +--- a/similarity_search/include/method/hnsw.h ++++ b/similarity_search/include/method/hnsw.h +@@ -455,8 +455,12 @@ namespace similarity { + public: + virtual void SaveIndex(const string &location) override; + ++ void SaveIndexWithStream(std::ostream& output); ++ + virtual void LoadIndex(const string &location) override; + ++ void LoadIndexWithStream(std::istream& in); ++ + Hnsw(bool PrintProgress, const Space &space, const ObjectVector &data); + void CreateIndex(const AnyParams &IndexParams) override; + +diff --git a/similarity_search/src/method/hnsw.cc b/similarity_search/src/method/hnsw.cc +index 35b372c..e7a2c9e 100644 +--- a/similarity_search/src/method/hnsw.cc ++++ b/similarity_search/src/method/hnsw.cc +@@ -771,6 +771,25 @@ namespace similarity { + output.close(); + } + ++ template ++ void Hnsw::SaveIndexWithStream(std::ostream &output) { ++ output.exceptions(ios::badbit | ios::failbit); ++ ++ unsigned int optimIndexFlag = data_level0_memory_ != nullptr; ++ ++ writeBinaryPOD(output, optimIndexFlag); ++ ++ if (!optimIndexFlag) { ++#if USE_TEXT_REGULAR_INDEX ++ SaveRegularIndexText(output); ++#else ++ SaveRegularIndexBin(output); ++#endif ++ } else { ++ SaveOptimizedIndex(output); ++ } ++ } ++ + template + void + Hnsw::SaveOptimizedIndex(std::ostream& output) { +@@ -1021,6 +1040,31 @@ namespace similarity { + + } + ++ template ++ void Hnsw::LoadIndexWithStream(std::istream& input) { ++ LOG(LIB_INFO) << "Loading index from an input stream."; ++ CHECK_MSG(input, "Cannot open file for reading with an input stream"); ++ ++ input.exceptions(ios::badbit | ios::failbit); ++ ++#if USE_TEXT_REGULAR_INDEX ++ LoadRegularIndexText(input); ++#else ++ unsigned int optimIndexFlag= 0; ++ ++ readBinaryPOD(input, optimIndexFlag); ++ ++ if (!optimIndexFlag) { ++ LoadRegularIndexBin(input); ++ } else { ++ LoadOptimizedIndex(input); ++ } ++#endif ++ ++ LOG(LIB_INFO) << "Finished loading index"; ++ visitedlistpool = new VisitedListPool(1, totalElementsStored_); ++ } ++ + + template + void +-- +2.39.5 (Apple Git-154) +