From 7ee661b4ed78678953dec5c78241ef57164af1f6 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Tue, 19 Dec 2023 17:14:07 +0000
Subject: [PATCH] Attempt to fix for clang 15 (#93)

* Update Xoshiro return type

* Prints to help debug in makefile

* Try and resolve hdf5 version issue

* Pin h5py version in tests

* Pin exact versions

* Revert the hdf5 installation

* Revert original change to check for segfault

* Move the testing to gha

* Try to get segfault backtrace

* Change library location

* Attempt to add ssh to gha

* Try ssh again

* Fix typo in yaml

* Change install command

* Use single threaded tests

* Revert changes in the wrapper

* Add tests back to the azure version

* Try adding libgomp via mamba, single multicore test

* Another interactive debugging run

* Forgot to remove delete line

* Check for keyboard interrupt only on main thread

* Add test for openmp dists; try to fix dist segfault

* Try restricting signal check to thread 0
---
 .github/workflows/azure_ci.yml | 52 ++++++++++++++++++++++++++++++++++
 CMakeLists.txt                 | 13 +++++----
 azure-pipelines.yml            |  5 +---
 environment.yml                |  2 +-
 pp_sketch/__init__.py          |  2 +-
 src/Makefile                   |  4 +++
 src/api.cpp                    | 28 ++++++++++--------
 src/random/rng.hpp             |  2 +-
 test/run_test.py               | 23 ++++++++-------
 9 files changed, 96 insertions(+), 35 deletions(-)
 create mode 100644 .github/workflows/azure_ci.yml

diff --git a/.github/workflows/azure_ci.yml b/.github/workflows/azure_ci.yml
new file mode 100644
index 00000000..97771e8a
--- /dev/null
+++ b/.github/workflows/azure_ci.yml
@@ -0,0 +1,52 @@
+# Python package
+# Create and test a Python package on multiple Python versions.
+# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
+# https://docs.microsoft.com/azure/devops/pipelines/languages/python
+
+name: Run tests
+
+on: [push]
+
+jobs:
+  test:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Get current date
+      id: date
+      run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}"
+    - name: Install Conda environment from environment.yml
+      uses: mamba-org/setup-micromamba@v1
+      with:
+        micromamba-version: '1.4.6-0'
+        environment-file: environment.yml
+        # persist on the same day.
+        cache-environment-key: environment-${{ steps.date.outputs.date }}
+        cache-downloads-key: downloads-${{ steps.date.outputs.date }}
+    - name: Install and run_test.py
+      shell: bash -l {0}
+      run: |
+        python -m pip install --no-deps --ignore-installed . -vvv
+        cd test && python run_test.py
+
+    ## For debugging
+    - name: Install debug version
+      if: failure()
+      shell: bash -l {0}
+      run: |
+        python setup.py build_ext --debug install
+    - name: Setup tmate session for interactive debugging
+      if: failure()
+      uses: mxschmitt/action-tmate@v3
+      with:
+        limit-access-to-actor: true
+        detached: true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 266c191a..ee34f5db 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,12 +45,13 @@ if(DEFINED ENV{CONDA_PREFIX})
     include_directories($ENV{CONDA_PREFIX}/include)
     link_directories($ENV{CONDA_PREFIX}/lib)
     link_directories($ENV{CONDA_PREFIX}/lib/intel64)
+else()
+    set(HDF5_FIND_DEBUG TRUE)
+    find_package(HDF5 REQUIRED COMPONENTS CXX)
+    include_directories(${HDF5_INCLUDE_DIRS})
 endif()
 
 # Add libraries
-
-find_package(HDF5 REQUIRED COMPONENTS CXX)
-include_directories(${HDF5_INCLUDE_DIRS})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/vendor/highfive/include)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
 
@@ -148,11 +149,11 @@ if(CMAKE_CUDA_COMPILER)
     #set_property(TARGET "${TARGET_NAME}" PROPERTY CUDA_ARCHITECTURES OFF)
 endif()
 target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen
-    ${HDF5_LIBRARIES} ${BLAS_LIBRARIES} gfortran m dl)
+    ${BLAS_LIBRARIES} gfortran m dl)
 if(DEFINED ENV{CONDA_PREFIX} AND (NOT APPLE OR CMAKE_COMPILER_IS_GNUCC OR ENV{SKETCHLIB_INSTALL} EQUAL "conda"))
-    target_link_libraries("${TARGET_NAME}" PRIVATE gomp z)
+    target_link_libraries("${TARGET_NAME}" PRIVATE hdf5_cpp hdf5 gomp z)
 else()
-    target_link_libraries("${TARGET_NAME}" PRIVATE ZLIB::ZLIB)
+    target_link_libraries("${TARGET_NAME}" PRIVATE ${HDF5_LIBRARIES} ZLIB::ZLIB)
     find_package(OpenMP)
     if(OpenMP_CXX_FOUND)
         target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index ac446684..876e32b2 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -1,7 +1,4 @@
-# Python package
-# Create and test a Python package on multiple Python versions.
-# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
-# https://docs.microsoft.com/azure/devops/pipelines/languages/python
+# This just checks the package can be installed using CUDA, no testing
 
 trigger:
 - master
diff --git a/environment.yml b/environment.yml
index 020aa66f..13fb63cb 100644
--- a/environment.yml
+++ b/environment.yml
@@ -17,7 +17,7 @@ dependencies:
   - highfive
   - hdf5
   - h5py
-  - nlohmann_json
+  - libgomp
   - openblas
   - libgfortran-ng
   - nvcc_linux-64
diff --git a/pp_sketch/__init__.py b/pp_sketch/__init__.py
index f3b1b2e5..92c7ca45 100644
--- a/pp_sketch/__init__.py
+++ b/pp_sketch/__init__.py
@@ -3,4 +3,4 @@
 
 '''PopPUNK sketching functions'''
 
-__version__ = '2.1.1'
+__version__ = '2.1.2'
diff --git a/src/Makefile b/src/Makefile
index bb424f1a..cd07ec6f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -116,6 +116,10 @@ install_python: python
 	install $(PYTHON_LIB) $(PYTHON_LIB_PATH)
 
 gpu/dist.cu.o:
+	echo ${CUDAFLAGS}
+	echo ${CPPFLAGS}
+	echo ${CXXFLAGS}
+	echo ${CFLAGS}
 	nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/dist.cu -o $@
 
 gpu/sketch.cu.o:
diff --git a/src/api.cpp b/src/api.cpp
index 1a8a647e..9fc2d9cf 100644
--- a/src/api.cpp
+++ b/src/api.cpp
@@ -81,9 +81,7 @@ std::vector<Reference> create_sketches(
     std::vector<std::runtime_error> errors;
 #pragma omp parallel for schedule(dynamic, 5) num_threads(num_threads)
     for (unsigned int i = 0; i < names.size(); i++) {
-      if (interrupt || PyErr_CheckSignals() != 0) {
-        interrupt = true;
-      } else {
+      if (!interrupt) {
         try {
           SeqBuf seq_in(files[i], kmer_lengths.back());
           sketches[i] = Reference(names[i], seq_in, kmer_seeds, sketchsize64,
@@ -101,6 +99,9 @@ std::vector<Reference> create_sketches(
 
       if (omp_get_thread_num() == 0) {
         sketch_progress.tick_count(done_count);
+        if (PyErr_CheckSignals() != 0) {
+          interrupt = true;
+        }
       }
     }
     sketch_progress.finalise();
@@ -198,9 +199,7 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
     // Iterate upper triangle
 #pragma omp parallel for schedule(dynamic, 5) num_threads(num_threads) shared(progress)
     for (size_t i = 0; i < ref_sketches.size(); i++) {
-      if (interrupt || PyErr_CheckSignals() != 0) {
-        interrupt = true;
-      } else {
+      if (!interrupt) {
         for (size_t j = i + 1; j < ref_sketches.size(); j++) {
           size_t pos = square_to_condensed(i, j, ref_sketches.size());
           if (jaccard) {
@@ -219,6 +218,9 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
               {
                 progress += MAX(1, n_progress_ticks / dist_rows);
                 dist_progress.tick_count(progress);
+                if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) {
+                  interrupt = true;
+                }
               }
           }
         }
@@ -244,9 +246,7 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
 #pragma omp parallel for collapse(2) schedule(static) num_threads(num_threads)
     for (unsigned int q_idx = 0; q_idx < query_sketches.size(); q_idx++) {
       for (unsigned int r_idx = 0; r_idx < ref_sketches.size(); r_idx++) {
-        if (interrupt || PyErr_CheckSignals() != 0) {
-          interrupt = true;
-        } else {
+        if (!interrupt) {
           const long dist_row = q_idx * ref_sketches.size() + r_idx;
           if (jaccard) {
             for (unsigned int kmer_idx = 0; kmer_idx < kmer_lengths.size();
@@ -270,6 +270,9 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
             {
               progress += MAX(1, n_progress_ticks / dist_rows);
               dist_progress.tick_count(progress);
+              if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) {
+                interrupt = true;
+              }
             }
           }
         }
@@ -342,9 +345,7 @@ sparse_coo query_db_sparse(std::vector<Reference> &ref_sketches,
 #pragma omp parallel for schedule(static) num_threads(num_threads) shared(progress)
   for (size_t i = 0; i < ref_sketches.size(); i++) {
     std::vector<float> row_dists(ref_sketches.size());
-    if (interrupt || PyErr_CheckSignals() != 0) {
-      interrupt = true;
-    } else {
+    if (!interrupt) {
       for (size_t j = 0; j < ref_sketches.size(); j++) {
         if (i != j) {
           if (jaccard) {
@@ -370,6 +371,9 @@ sparse_coo query_db_sparse(std::vector<Reference> &ref_sketches,
           {
             progress += MAX(1, n_progress_ticks / dist_rows);
             dist_progress.tick_count(progress);
+            if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) {
+              interrupt = true;
+            }
           }
         }
         long offset = i * kNN;
diff --git a/src/random/rng.hpp b/src/random/rng.hpp
index c1be7075..cbc8ff6b 100644
--- a/src/random/rng.hpp
+++ b/src/random/rng.hpp
@@ -16,7 +16,7 @@ class Xoshiro
 {
 public:
   // Definitions to be used as URNG in C++11
-  typedef size_t result_type;
+  typedef uint64_t result_type;
   static constexpr size_t min() { return std::numeric_limits<uint64_t>::min(); }
   static constexpr size_t max() { return std::numeric_limits<uint64_t>::max(); }
   uint64_t operator()(); // generate random number U(min, max)
diff --git a/test/run_test.py b/test/run_test.py
index 6f418cd0..203716e9 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -26,17 +26,20 @@
 
 # create sketches
 sys.stderr.write("Sketch smoke test\n")
+subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db -s 10000 -k 15,29,4 --cpus 1", shell=True, check=True)
+os.remove("test_db.h5")
 subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db -s 10000 -k 15,29,4 --cpus 2", shell=True, check=True)
-subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db_phased --codon-phased --cpus 2", shell=True, check=True)
+subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db_phased --codon-phased --cpus 1", shell=True, check=True)
 subprocess.run("python ../sketchlib-runner.py sketch 12673_8#24.contigs_velvet.fa 12673_8#34.contigs_velvet.fa -o test_db_small -s 1000 --kmer 14", shell=True, check=True)
 subprocess.run("python ../sketchlib-runner.py add random test_db --cpus 2", shell=True, check=True)
 # calculate distances
 sys.stderr.write("Distance integration test\n")
-subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 2", shell=True, check=True) # checks if can be run
-subprocess.run("python ../sketchlib-runner.py query dist test_db -o ppsketch --cpus 2", shell=True, check=True) # checks if can be run
-subprocess.run("python ../sketchlib-runner.py query jaccard test_db_small --cpus 2", shell=True, check=True) # checks if can be run
+subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 1", shell=True, check=True) # checks if can be run
+subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 2", shell=True, check=True) # checks if can be run w/ openmp
+subprocess.run("python ../sketchlib-runner.py query dist test_db -o ppsketch --cpus 1", shell=True, check=True) # checks if can be run
+subprocess.run("python ../sketchlib-runner.py query jaccard test_db_small --cpus 1", shell=True, check=True) # checks if can be run
 subprocess.run("python test-dists.py --ref-db test_db --results ppsketch_ref", shell=True, check=True) # checks results match
-subprocess.run("python ../sketchlib-runner.py query dist test_db_phased --cpus 2", shell=True, check=True) # checks if can be run
+subprocess.run("python ../sketchlib-runner.py query dist test_db_phased --cpus 1", shell=True, check=True) # checks if can be run
 subprocess.run("python test-dists.py --ref-db test_db_phased --results ppsketch_ref_phased", shell=True, check=True) # checks results match
 
 sys.stderr.write("Sparse distance smoke test\n")
@@ -46,19 +49,19 @@
 subprocess.run("python ../sketchlib-runner.py query sparse jaccard test_db --kNN 2 --kmer 19", shell=True, check=True) # checks if can be run
 
 sys.stderr.write("Ref v query distance smoke test\n")
-subprocess.run("python ../sketchlib-runner.py sketch -l rlist.txt -o r_db --cpus 2", shell=True, check=True)
-subprocess.run("python ../sketchlib-runner.py sketch -l qlist.txt -o q_db --cpus 2", shell=True, check=True)
+subprocess.run("python ../sketchlib-runner.py sketch -l rlist.txt -o r_db --cpus 1", shell=True, check=True)
+subprocess.run("python ../sketchlib-runner.py sketch -l qlist.txt -o q_db --cpus 1", shell=True, check=True)
 subprocess.run("python ../sketchlib-runner.py query dist r_db q_db.h5", shell=True, check=True) # checks if can be run
 subprocess.run("python ../sketchlib-runner.py query jaccard r_db q_db", shell=True, check=True) # checks if can be run
 
 # Joining
 sys.stderr.write("Join smoke test\n")
-subprocess.run("python ../sketchlib-runner.py sketch -l db1_refs.txt -o db1 --cpus 2", shell=True, check=True)
-subprocess.run("python ../sketchlib-runner.py sketch -l db2_refs.txt -o db2 --cpus 2", shell=True, check=True)
+subprocess.run("python ../sketchlib-runner.py sketch -l db1_refs.txt -o db1 --cpus 1", shell=True, check=True)
+subprocess.run("python ../sketchlib-runner.py sketch -l db2_refs.txt -o db2 --cpus 1", shell=True, check=True)
 subprocess.run("python ../sketchlib-runner.py join db1.h5 db2.h5 -o joined", shell=True, check=True)
 # Random
 sys.stderr.write("Random test\n")
-subprocess.run("python ../sketchlib-runner.py remove random test_db --cpus 2", shell=True, check=True)
+subprocess.run("python ../sketchlib-runner.py remove random test_db --cpus 1", shell=True, check=True)
 # Matrix
 sys.stderr.write("Matrix integration test\n")
 subprocess.run("python test-matrix.py", shell=True, check=True)