From 1b425df7c90fe78fc28d305be3be4dfbdbe03ba9 Mon Sep 17 00:00:00 2001 From: Heemin Kim Date: Tue, 16 Jul 2024 21:49:13 -0700 Subject: [PATCH] Apply custom patch only once by comparing the last patch id (#1833) Signed-off-by: Heemin Kim --- .github/workflows/CI.yml | 2 -- .github/workflows/test_security.yml | 2 -- CHANGELOG.md | 1 + jni/cmake/init-faiss.cmake | 44 ++++++++++++++++++++++------- jni/cmake/init-nmslib.cmake | 39 +++++++++++++++++++------ 5 files changed, 66 insertions(+), 22 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0b9b24d98..e6dec4daf 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -37,8 +37,6 @@ jobs: steps: - name: Checkout k-NN uses: actions/checkout@v1 - with: - submodules: true # Setup git user so that patches for native libraries can be applied and committed - name: Setup git user diff --git a/.github/workflows/test_security.yml b/.github/workflows/test_security.yml index e0f2dbf45..77b726a69 100644 --- a/.github/workflows/test_security.yml +++ b/.github/workflows/test_security.yml @@ -37,8 +37,6 @@ jobs: steps: - name: Checkout k-NN uses: actions/checkout@v1 - with: - submodules: true # Setup git user so that patches for native libraries can be applied and committed - name: Setup git user run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index cae3783b8..29c0f1841 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Release memory properly for an array type [#1820](https://github.com/opensearch-project/k-NN/pull/1820) * FIX Same Suffix Cause Recall Drop to zero [#1802](https://github.com/opensearch-project/k-NN/pull/1802) ### Infrastructure +* Apply custom patch only once by comparing the last patch id [#1833](https://github.com/opensearch-project/k-NN/pull/1833) ### Documentation * Update dev guide to fix clang linking issue on arm [#1746](https://github.com/opensearch-project/k-NN/pull/1746) ### Maintenance diff --git a/jni/cmake/init-faiss.cmake b/jni/cmake/init-faiss.cmake index befed4703..c2ec24a3b 100644 --- a/jni/cmake/init-faiss.cmake +++ b/jni/cmake/init-faiss.cmake @@ -12,20 +12,44 @@ if (NOT EXISTS ${FAISS_REPO_DIR}) execute_process(COMMAND git submodule update --init -- external/faiss WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif () -# Check if patch exist, this is to skip git apply during CI build. See CI.yml with ubuntu. -find_path(PATCH_FILE NAMES 0001-Custom-patch-to-support-multi-vector.patch 0002-Enable-precomp-table-to-be-shared-ivfpq.patch 0003-Custom-patch-to-support-range-search-params.patch 0004-Custom-patch-to-support-binary-vector.patch PATHS ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss NO_DEFAULT_PATH) +# Define list of patch files +set(PATCH_FILE_LIST) +list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0001-Custom-patch-to-support-multi-vector.patch") +list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0002-Enable-precomp-table-to-be-shared-ivfpq.patch") +list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0003-Custom-patch-to-support-range-search-params.patch") +list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0004-Custom-patch-to-support-binary-vector.patch") -# If it exists, apply patches -if (EXISTS ${PATCH_FILE}) - message(STATUS "Applying custom patches.") - execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0001-Custom-patch-to-support-multi-vector.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) - execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0002-Enable-precomp-table-to-be-shared-ivfpq.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) - execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0003-Custom-patch-to-support-range-search-params.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) - execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0004-Custom-patch-to-support-binary-vector.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) +# Get patch id of the last commit +execute_process(COMMAND sh -c "git --no-pager show HEAD | git patch-id --stable" OUTPUT_VARIABLE PATCH_ID_OUTPUT_FROM_COMMIT WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss) +string(REPLACE " " ";" PATCH_ID_LIST_FROM_COMMIT ${PATCH_ID_OUTPUT_FROM_COMMIT}) +list(GET PATCH_ID_LIST_FROM_COMMIT 0 PATCH_ID_FROM_COMMIT) + +# Find all patch files need to apply +list(SORT PATCH_FILE_LIST ORDER DESCENDING) +set(PATCH_FILES_TO_APPLY) +foreach(PATCH_FILE IN LISTS PATCH_FILE_LIST) + # Get patch id of a patch file + execute_process(COMMAND sh -c "cat ${PATCH_FILE} | git patch-id --stable" OUTPUT_VARIABLE PATCH_ID_OUTPUT) + string(REPLACE " " ";" PATCH_ID_LIST ${PATCH_ID_OUTPUT}) + list(GET PATCH_ID_LIST 0 PATCH_ID) + + # Add the file to patch list if patch id does not match + if (${PATCH_ID} STREQUAL ${PATCH_ID_FROM_COMMIT}) + break() + else() + list(APPEND PATCH_FILES_TO_APPLY ${PATCH_FILE}) + endif() +endforeach() + +# Apply patch files +list(SORT PATCH_FILES_TO_APPLY) +foreach(PATCH_FILE IN LISTS PATCH_FILES_TO_APPLY) + message(STATUS "Applying patch of ${PATCH_FILE}") + execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${PATCH_FILE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) if(RESULT_CODE) message(FATAL_ERROR "Failed to apply patch:\n${ERROR_MSG}") endif() -endif() +endforeach() if (${CMAKE_SYSTEM_NAME} STREQUAL Darwin) if(CMAKE_C_COMPILER_ID MATCHES "Clang\$") diff --git a/jni/cmake/init-nmslib.cmake b/jni/cmake/init-nmslib.cmake index 387dce6bc..56c52bb69 100644 --- a/jni/cmake/init-nmslib.cmake +++ b/jni/cmake/init-nmslib.cmake @@ -12,18 +12,41 @@ if (NOT EXISTS ${NMS_REPO_DIR}) execute_process(COMMAND git submodule update --init -- external/nmslib WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif () -# Check if patch exist, this is to skip git apply during CI build. See CI.yml with ubuntu. -find_path(PATCH_FILE NAMES 0001-Initialize-maxlevel-during-add-from-enterpoint-level.patch 0002-Adds-ability-to-pass-ef-parameter-in-the-query-for-h.patch PATHS ${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib NO_DEFAULT_PATH) +# Define list of patch files +set(PATCH_FILE_LIST) +list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0001-Initialize-maxlevel-during-add-from-enterpoint-level.patch") +list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0002-Adds-ability-to-pass-ef-parameter-in-the-query-for-h.patch") -# If it exists, apply patches -if (EXISTS ${PATCH_FILE}) - message(STATUS "Applying custom patches.") - execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0001-Initialize-maxlevel-during-add-from-enterpoint-level.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) - execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0002-Adds-ability-to-pass-ef-parameter-in-the-query-for-h.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) +# Get patch id of the last commit +execute_process(COMMAND sh -c "git --no-pager show HEAD | git patch-id --stable" OUTPUT_VARIABLE PATCH_ID_OUTPUT_FROM_COMMIT WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib) +string(REPLACE " " ";" PATCH_ID_LIST_FROM_COMMIT ${PATCH_ID_OUTPUT_FROM_COMMIT}) +list(GET PATCH_ID_LIST_FROM_COMMIT 0 PATCH_ID_FROM_COMMIT) +# Find all patch files need to apply +list(SORT PATCH_FILE_LIST ORDER DESCENDING) +set(PATCH_FILES_TO_APPLY) +foreach(PATCH_FILE IN LISTS PATCH_FILE_LIST) + # Get patch id of a patch file + execute_process(COMMAND sh -c "cat ${PATCH_FILE} | git patch-id --stable" OUTPUT_VARIABLE PATCH_ID_OUTPUT) + string(REPLACE " " ";" PATCH_ID_LIST ${PATCH_ID_OUTPUT}) + list(GET PATCH_ID_LIST 0 PATCH_ID) + + # Add the file to patch list if patch id does not match + if (${PATCH_ID} STREQUAL ${PATCH_ID_FROM_COMMIT}) + break() + else() + list(APPEND PATCH_FILES_TO_APPLY ${PATCH_FILE}) + endif() +endforeach() + +# Apply patch files +list(SORT PATCH_FILES_TO_APPLY) +foreach(PATCH_FILE IN LISTS PATCH_FILES_TO_APPLY) + message(STATUS "Applying patch of ${PATCH_FILE}") + execute_process(COMMAND git ${GIT_PATCH_COMMAND} --3way --ignore-space-change --ignore-whitespace ${PATCH_FILE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) if(RESULT_CODE) message(FATAL_ERROR "Failed to apply patch:\n${ERROR_MSG}") endif() -endif() +endforeach() add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search)