From 7ee661b4ed78678953dec5c78241ef57164af1f6 Mon Sep 17 00:00:00 2001 From: John Lees Date: Tue, 19 Dec 2023 17:14:07 +0000 Subject: [PATCH] Attempt to fix for clang 15 (#93) * Update Xoshiro return type * Prints to help debug in makefile * Try and resolve hdf5 version issue * Pin h5py version in tests * Pin exact versions * Revert the hdf5 installation * Revert original change to check for segfault * Move the testing to gha * Try to get segfault backtrace * Change library location * Attempt to add ssh to gha * Try ssh again * Fix typo in yaml * Change install command * Use single threaded tests * Revert changes in the wrapper * Add tests back to the azure version * Try adding libgomp via mamba, single multicore test * Another interactive debugging run * Forgot to remove delete line * Check for keyboard interrupt only on main thread * Add test for openmp dists; try to fix dist segfault * Try restricting signal check to thread 0 --- .github/workflows/azure_ci.yml | 52 ++++++++++++++++++++++++++++++++++ CMakeLists.txt | 13 +++++---- azure-pipelines.yml | 5 +--- environment.yml | 2 +- pp_sketch/__init__.py | 2 +- src/Makefile | 4 +++ src/api.cpp | 28 ++++++++++-------- src/random/rng.hpp | 2 +- test/run_test.py | 23 ++++++++------- 9 files changed, 96 insertions(+), 35 deletions(-) create mode 100644 .github/workflows/azure_ci.yml diff --git a/.github/workflows/azure_ci.yml b/.github/workflows/azure_ci.yml new file mode 100644 index 00000000..97771e8a --- /dev/null +++ b/.github/workflows/azure_ci.yml @@ -0,0 +1,52 @@ +# Python package +# Create and test a Python package on multiple Python versions. +# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: +# https://docs.microsoft.com/azure/devops/pipelines/languages/python + +name: Run tests + +on: [push] + +jobs: + test: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Get current date + id: date + run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}" + - name: Install Conda environment from environment.yml + uses: mamba-org/setup-micromamba@v1 + with: + micromamba-version: '1.4.6-0' + environment-file: environment.yml + # persist on the same day. + cache-environment-key: environment-${{ steps.date.outputs.date }} + cache-downloads-key: downloads-${{ steps.date.outputs.date }} + - name: Install and run_test.py + shell: bash -l {0} + run: | + python -m pip install --no-deps --ignore-installed . -vvv + cd test && python run_test.py + + ## For debugging + - name: Install debug version + if: failure() + shell: bash -l {0} + run: | + python setup.py build_ext --debug install + - name: Setup tmate session for interactive debugging + if: failure() + uses: mxschmitt/action-tmate@v3 + with: + limit-access-to-actor: true + detached: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 266c191a..ee34f5db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,12 +45,13 @@ if(DEFINED ENV{CONDA_PREFIX}) include_directories($ENV{CONDA_PREFIX}/include) link_directories($ENV{CONDA_PREFIX}/lib) link_directories($ENV{CONDA_PREFIX}/lib/intel64) +else() + set(HDF5_FIND_DEBUG TRUE) + find_package(HDF5 REQUIRED COMPONENTS CXX) + include_directories(${HDF5_INCLUDE_DIRS}) endif() # Add libraries - -find_package(HDF5 REQUIRED COMPONENTS CXX) -include_directories(${HDF5_INCLUDE_DIRS}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/vendor/highfive/include) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) @@ -148,11 +149,11 @@ if(CMAKE_CUDA_COMPILER) #set_property(TARGET "${TARGET_NAME}" PROPERTY CUDA_ARCHITECTURES OFF) endif() target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen - ${HDF5_LIBRARIES} ${BLAS_LIBRARIES} gfortran m dl) + ${BLAS_LIBRARIES} gfortran m dl) if(DEFINED ENV{CONDA_PREFIX} AND (NOT APPLE OR CMAKE_COMPILER_IS_GNUCC OR ENV{SKETCHLIB_INSTALL} EQUAL "conda")) - target_link_libraries("${TARGET_NAME}" PRIVATE gomp z) + target_link_libraries("${TARGET_NAME}" PRIVATE hdf5_cpp hdf5 gomp z) else() - target_link_libraries("${TARGET_NAME}" PRIVATE ZLIB::ZLIB) + target_link_libraries("${TARGET_NAME}" PRIVATE ${HDF5_LIBRARIES} ZLIB::ZLIB) find_package(OpenMP) if(OpenMP_CXX_FOUND) target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ac446684..876e32b2 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,7 +1,4 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python +# This just checks the package can be installed using CUDA, no testing trigger: - master diff --git a/environment.yml b/environment.yml index 020aa66f..13fb63cb 100644 --- a/environment.yml +++ b/environment.yml @@ -17,7 +17,7 @@ dependencies: - highfive - hdf5 - h5py - - nlohmann_json + - libgomp - openblas - libgfortran-ng - nvcc_linux-64 diff --git a/pp_sketch/__init__.py b/pp_sketch/__init__.py index f3b1b2e5..92c7ca45 100644 --- a/pp_sketch/__init__.py +++ b/pp_sketch/__init__.py @@ -3,4 +3,4 @@ '''PopPUNK sketching functions''' -__version__ = '2.1.1' +__version__ = '2.1.2' diff --git a/src/Makefile b/src/Makefile index bb424f1a..cd07ec6f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -116,6 +116,10 @@ install_python: python install $(PYTHON_LIB) $(PYTHON_LIB_PATH) gpu/dist.cu.o: + echo ${CUDAFLAGS} + echo ${CPPFLAGS} + echo ${CXXFLAGS} + echo ${CFLAGS} nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/dist.cu -o $@ gpu/sketch.cu.o: diff --git a/src/api.cpp b/src/api.cpp index 1a8a647e..9fc2d9cf 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -81,9 +81,7 @@ std::vector create_sketches( std::vector errors; #pragma omp parallel for schedule(dynamic, 5) num_threads(num_threads) for (unsigned int i = 0; i < names.size(); i++) { - if (interrupt || PyErr_CheckSignals() != 0) { - interrupt = true; - } else { + if (!interrupt) { try { SeqBuf seq_in(files[i], kmer_lengths.back()); sketches[i] = Reference(names[i], seq_in, kmer_seeds, sketchsize64, @@ -101,6 +99,9 @@ std::vector create_sketches( if (omp_get_thread_num() == 0) { sketch_progress.tick_count(done_count); + if (PyErr_CheckSignals() != 0) { + interrupt = true; + } } } sketch_progress.finalise(); @@ -198,9 +199,7 @@ NumpyMatrix query_db(std::vector &ref_sketches, // Iterate upper triangle #pragma omp parallel for schedule(dynamic, 5) num_threads(num_threads) shared(progress) for (size_t i = 0; i < ref_sketches.size(); i++) { - if (interrupt || PyErr_CheckSignals() != 0) { - interrupt = true; - } else { + if (!interrupt) { for (size_t j = i + 1; j < ref_sketches.size(); j++) { size_t pos = square_to_condensed(i, j, ref_sketches.size()); if (jaccard) { @@ -219,6 +218,9 @@ NumpyMatrix query_db(std::vector &ref_sketches, { progress += MAX(1, n_progress_ticks / dist_rows); dist_progress.tick_count(progress); + if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) { + interrupt = true; + } } } } @@ -244,9 +246,7 @@ NumpyMatrix query_db(std::vector &ref_sketches, #pragma omp parallel for collapse(2) schedule(static) num_threads(num_threads) for (unsigned int q_idx = 0; q_idx < query_sketches.size(); q_idx++) { for (unsigned int r_idx = 0; r_idx < ref_sketches.size(); r_idx++) { - if (interrupt || PyErr_CheckSignals() != 0) { - interrupt = true; - } else { + if (!interrupt) { const long dist_row = q_idx * ref_sketches.size() + r_idx; if (jaccard) { for (unsigned int kmer_idx = 0; kmer_idx < kmer_lengths.size(); @@ -270,6 +270,9 @@ NumpyMatrix query_db(std::vector &ref_sketches, { progress += MAX(1, n_progress_ticks / dist_rows); dist_progress.tick_count(progress); + if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) { + interrupt = true; + } } } } @@ -342,9 +345,7 @@ sparse_coo query_db_sparse(std::vector &ref_sketches, #pragma omp parallel for schedule(static) num_threads(num_threads) shared(progress) for (size_t i = 0; i < ref_sketches.size(); i++) { std::vector row_dists(ref_sketches.size()); - if (interrupt || PyErr_CheckSignals() != 0) { - interrupt = true; - } else { + if (!interrupt) { for (size_t j = 0; j < ref_sketches.size(); j++) { if (i != j) { if (jaccard) { @@ -370,6 +371,9 @@ sparse_coo query_db_sparse(std::vector &ref_sketches, { progress += MAX(1, n_progress_ticks / dist_rows); dist_progress.tick_count(progress); + if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) { + interrupt = true; + } } } long offset = i * kNN; diff --git a/src/random/rng.hpp b/src/random/rng.hpp index c1be7075..cbc8ff6b 100644 --- a/src/random/rng.hpp +++ b/src/random/rng.hpp @@ -16,7 +16,7 @@ class Xoshiro { public: // Definitions to be used as URNG in C++11 - typedef size_t result_type; + typedef uint64_t result_type; static constexpr size_t min() { return std::numeric_limits::min(); } static constexpr size_t max() { return std::numeric_limits::max(); } uint64_t operator()(); // generate random number U(min, max) diff --git a/test/run_test.py b/test/run_test.py index 6f418cd0..203716e9 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -26,17 +26,20 @@ # create sketches sys.stderr.write("Sketch smoke test\n") +subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db -s 10000 -k 15,29,4 --cpus 1", shell=True, check=True) +os.remove("test_db.h5") subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db -s 10000 -k 15,29,4 --cpus 2", shell=True, check=True) -subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db_phased --codon-phased --cpus 2", shell=True, check=True) +subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db_phased --codon-phased --cpus 1", shell=True, check=True) subprocess.run("python ../sketchlib-runner.py sketch 12673_8#24.contigs_velvet.fa 12673_8#34.contigs_velvet.fa -o test_db_small -s 1000 --kmer 14", shell=True, check=True) subprocess.run("python ../sketchlib-runner.py add random test_db --cpus 2", shell=True, check=True) # calculate distances sys.stderr.write("Distance integration test\n") -subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 2", shell=True, check=True) # checks if can be run -subprocess.run("python ../sketchlib-runner.py query dist test_db -o ppsketch --cpus 2", shell=True, check=True) # checks if can be run -subprocess.run("python ../sketchlib-runner.py query jaccard test_db_small --cpus 2", shell=True, check=True) # checks if can be run +subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 1", shell=True, check=True) # checks if can be run +subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 2", shell=True, check=True) # checks if can be run w/ openmp +subprocess.run("python ../sketchlib-runner.py query dist test_db -o ppsketch --cpus 1", shell=True, check=True) # checks if can be run +subprocess.run("python ../sketchlib-runner.py query jaccard test_db_small --cpus 1", shell=True, check=True) # checks if can be run subprocess.run("python test-dists.py --ref-db test_db --results ppsketch_ref", shell=True, check=True) # checks results match -subprocess.run("python ../sketchlib-runner.py query dist test_db_phased --cpus 2", shell=True, check=True) # checks if can be run +subprocess.run("python ../sketchlib-runner.py query dist test_db_phased --cpus 1", shell=True, check=True) # checks if can be run subprocess.run("python test-dists.py --ref-db test_db_phased --results ppsketch_ref_phased", shell=True, check=True) # checks results match sys.stderr.write("Sparse distance smoke test\n") @@ -46,19 +49,19 @@ subprocess.run("python ../sketchlib-runner.py query sparse jaccard test_db --kNN 2 --kmer 19", shell=True, check=True) # checks if can be run sys.stderr.write("Ref v query distance smoke test\n") -subprocess.run("python ../sketchlib-runner.py sketch -l rlist.txt -o r_db --cpus 2", shell=True, check=True) -subprocess.run("python ../sketchlib-runner.py sketch -l qlist.txt -o q_db --cpus 2", shell=True, check=True) +subprocess.run("python ../sketchlib-runner.py sketch -l rlist.txt -o r_db --cpus 1", shell=True, check=True) +subprocess.run("python ../sketchlib-runner.py sketch -l qlist.txt -o q_db --cpus 1", shell=True, check=True) subprocess.run("python ../sketchlib-runner.py query dist r_db q_db.h5", shell=True, check=True) # checks if can be run subprocess.run("python ../sketchlib-runner.py query jaccard r_db q_db", shell=True, check=True) # checks if can be run # Joining sys.stderr.write("Join smoke test\n") -subprocess.run("python ../sketchlib-runner.py sketch -l db1_refs.txt -o db1 --cpus 2", shell=True, check=True) -subprocess.run("python ../sketchlib-runner.py sketch -l db2_refs.txt -o db2 --cpus 2", shell=True, check=True) +subprocess.run("python ../sketchlib-runner.py sketch -l db1_refs.txt -o db1 --cpus 1", shell=True, check=True) +subprocess.run("python ../sketchlib-runner.py sketch -l db2_refs.txt -o db2 --cpus 1", shell=True, check=True) subprocess.run("python ../sketchlib-runner.py join db1.h5 db2.h5 -o joined", shell=True, check=True) # Random sys.stderr.write("Random test\n") -subprocess.run("python ../sketchlib-runner.py remove random test_db --cpus 2", shell=True, check=True) +subprocess.run("python ../sketchlib-runner.py remove random test_db --cpus 1", shell=True, check=True) # Matrix sys.stderr.write("Matrix integration test\n") subprocess.run("python test-matrix.py", shell=True, check=True)