eth-cscs · rasolca · Feb 1, 2022 · Nov 23, 2021 · Nov 23, 2021 · Nov 25, 2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -92,8 +92,8 @@ else()
   find_package(LAPACK REQUIRED)
 endif()
 
-# ----- HPX
-find_package(HPX 1.7.0 REQUIRED)
+# ----- pika
+find_package(pika 0.1.0 REQUIRED EXACT)
 
 # ----- BLASPP/LAPACKPP
 find_package(blaspp REQUIRED)

diff --git a/INSTALL.md b/INSTALL.md
@@ -2,20 +2,20 @@
 
 - MPI
 - OpenMP
-- HPX
+- pika
 - BLAS/LAPACK
 - BLASPP & LAPACKPP
 
 ## MPI
 
 ## OpenMP
 
-## HPX
+## pika
 
-HPX provides a CMake config script in `$HPX_ROOT/lib/cmake/HPX`. To make it available, the variable
-`HPX_DIR` has to be set to this path.
+pika provides a CMake config script in `$pika_ROOT/lib/cmake/pika`. To make it available, the variable
+`pika_DIR` has to be set to this path. Depending on the platform, the files may also be in `lib64` instead of `lib`.
 
-e.g. `cmake -DHPX_DIR=${HPX_ROOT}/lib/cmake/HPX ..`
+e.g. `cmake -Dpika_DIR=${PIKA_ROOT}/lib/cmake/pika ..`
 
 ## BLAS/LAPACK
 

diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ Otherwise you can download the archive of the latest `master` branch as a [zip](
 ### Dependencies
 
 - MPI
-- [HPX](https://github.com/STEllAR-GROUP/hpx)
+- [pika](https://github.com/pika-org/pika)
 - [blaspp](https://bitbucket.org/icl/blaspp/src/default/)
 - [lapackpp](https://bitbucket.org/icl/lapackpp/src/default/)
 - Intel MKL or other LAPACK implementation
@@ -37,27 +37,22 @@ Example installation:
 
 `spack install dla-future ^intel-mkl`
 
-Or you can go even further with a more detailed spec like this one, which builds dla-future in debug mode, using the clang compiler, specifying that the HPX on which it depends has to be built
-in debug mode too, with APEX instrumentation enabled, and that we want to use MPICH as MPI implementation, without fortran support (because clang does not support it).
+Or you can go even further with a more detailed spec like this one, which builds dla-future in debug mode, using the clang compiler, specifying that the pika on which it depends has to be built
+in debug mode too, and that we want to use MPICH as MPI implementation, without fortran support (because clang does not support it).
 
-`spack install dla-future %clang build_type=Debug ^hpx build_type=Debug instrumentation=apex ^mpich ~fortran`
-
-Notice that, for the package to work correctly, the HPX option `max_cpu_count` must be set accordingly to the architecture, as it represents the size of the bitmask to interface with hardware threads.
-
-`spack install dla-future ^intel-mkl ^hpx max_cpu_count=256`
+`spack install dla-future %clang build_type=Debug ^pika build_type=Debug ^mpich ~fortran`
 
 #### Build the old good way
 
 You can build all the dependencies by yourself, but you have to ensure that:
 - BLAS/LAPACK implementation is not multithreaded
-- HPX: `HPX_WITH_NETWORKING=none` + `HPX_WITH_MAX_CPU_COUNT=n` (according to number of cores in the architecture, suggested the next closest power of 2)
-- HPX and DLAF must have a compatible `CMAKE_BUILD_TYPE`: they must be built both in Debug, or with any combination of release types (Release, RelWithDebInfo or MinSizeRel)
+- pika: `PIKA_WITH_CUDA=ON` (if building for CUDA) + `PIKA_WITH_MPI`
 
 And here the main CMake options for DLAF build customization:
 
 CMake option | Values | Note
 :---|:---|:---
-`HPX_DIR` | CMAKE:PATH | Location of the HPX CMake-config file
+`pika_DIR` | CMAKE:PATH | Location of the pika CMake-config file
 `blaspp_DIR` | CMAKE:PATH | Location of the blaspp CMake-config file
 `lapackpp_DIR` | CMAKE:PATH | Location of the lapackpp CMake-config file
 `DLAF_WITH_MKL` | `{ON,OFF}` (default: `OFF`) | if blaspp/lapackpp is built with MKL
@@ -71,7 +66,7 @@ CMake option | Values | Note
 `DLAF_INSTALL_TESTS` | `{ON,OFF}` (default: `OFF`) | enable/disable installing tests
 `DLAF_MPI_PRESET` | `{plain-mpi, slurm, custom}` (default `plain-mpi`) | presets for MPI configuration for tests. See [CMake Doc](https://cmake.org/cmake/help/latest/module/FindMPI.html?highlight=mpiexec_executable#usage-of-mpiexec) for additional information
 `DLAF_TEST_RUNALL_WITH_MPIEXEC` | `{ON, OFF}` (default: `OFF`) | Use mpi runner also for non-MPI based tests
-`DLAF_HPXTEST_EXTRA_ARGS` | CMAKE:STRING | Additional HPX command-line options for tests
+`DLAF_PIKATEST_EXTRA_ARGS` | CMAKE:STRING | Additional pika command-line options for tests
 `DLAF_BUILD_DOC` | `{ON,OFF}` (default: `OFF`) | enable/disable documentation generation
 
 ### Link your program/library with DLAF

diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml
@@ -19,7 +19,7 @@ stages:
     - trying
   variables:
     GIT_SUBMODULE_STRATEGY: recursive
-    SPACK_SHA: 522a7c8ee0d51f92aa8cd685f378d54735a5307e
+    SPACK_SHA: 4b52f0e4d739998b8aca49356d66fadd0b22af17
   before_script:
     - docker login -u $CSCS_REGISTRY_USER -p $CSCS_REGISTRY_PASSWORD $CSCS_REGISTRY
   script:

diff --git a/ci/docker/cpu-debug.yaml b/ci/docker/cpu-debug.yaml
@@ -14,10 +14,9 @@ spack:
       variants:
         - '~cuda'
         - '~openmp'
-    hpx:
+    pika:
       variants:
         - 'build_type=Debug'
-        - 'max_cpu_count=128'
     mpich:
       variants:
         - '~fortran'
diff --git a/ci/docker/cpu-release.yaml b/ci/docker/cpu-release.yaml
@@ -14,9 +14,6 @@ spack:
       variants:
         - '~cuda'
         - '~openmp'
-    hpx:
-      variants:
-        - 'max_cpu_count=128'
     mpich:
       variants:
         - '~fortran'
diff --git a/ci/docker/gpu-debug.yaml b/ci/docker/gpu-debug.yaml
@@ -14,10 +14,9 @@ spack:
       variants:
         - '~cuda'
         - '~openmp'
-    hpx:
+    pika:
       variants:
         - 'build_type=Debug'
-        - 'max_cpu_count=128'
     mpich:
       variants:
         - '~fortran'
diff --git a/ci/docker/gpu-release.yaml b/ci/docker/gpu-release.yaml
@@ -14,9 +14,6 @@ spack:
       variants:
         - '~cuda'
         - '~openmp'
-    hpx:
-      variants:
-        - 'max_cpu_count=128'
     mpich:
       variants:
         - '~fortran'
diff --git a/cmake/DLAF_AddTest.cmake b/cmake/DLAF_AddTest.cmake
@@ -14,7 +14,7 @@
 #   [INCLUDE_DIRS <arguments for target_include_directories>]
 #   [LIBRARIES <arguments for target_link_libraries>]
 #   [MPIRANKS <number of rank>]
-#   [USE_MAIN {PLAIN | HPX | MPI | MPIHPX}]
+#   [USE_MAIN {PLAIN | PIKA | MPI | MPIPIKA}]
 # )
 #
 # At least one source file has to be specified, while other parameters are optional.
@@ -27,9 +27,9 @@
 #
 # USE_MAIN links to an external main function, in particular:
 #   - PLAIN: uses the classic gtest_main
-#   - HPX: uses a main that initializes HPX
+#   - PIKA: uses a main that initializes pika
 #   - MPI: uses a main that initializes MPI
-#   - MPIHPX: uses a main that initializes both HPX and MPI
+#   - MPIPIKA: uses a main that initializes both pika and MPI
 # If not specified, no external main is used and it should exist in the test source code.
 #
 # e.g.
@@ -58,21 +58,21 @@ function(DLAF_addTest test_target_name)
   endif()
 
   set(IS_AN_MPI_TEST FALSE)
-  set(IS_AN_HPX_TEST FALSE)
+  set(IS_AN_PIKA_TEST FALSE)
   if (NOT DLAF_AT_USE_MAIN)
     set(_gtest_tgt gtest)
   elseif (DLAF_AT_USE_MAIN STREQUAL PLAIN)
     set(_gtest_tgt gtest_main)
-  elseif (DLAF_AT_USE_MAIN STREQUAL HPX)
-    set(_gtest_tgt DLAF_gtest_hpx_main)
-    set(IS_AN_HPX_TEST TRUE)
+  elseif (DLAF_AT_USE_MAIN STREQUAL PIKA)
+    set(_gtest_tgt DLAF_gtest_pika_main)
+    set(IS_AN_PIKA_TEST TRUE)
   elseif (DLAF_AT_USE_MAIN STREQUAL MPI)
     set(_gtest_tgt DLAF_gtest_mpi_main)
     set(IS_AN_MPI_TEST TRUE)
-  elseif (DLAF_AT_USE_MAIN STREQUAL MPIHPX)
-    set(_gtest_tgt DLAF_gtest_mpihpx_main)
+  elseif (DLAF_AT_USE_MAIN STREQUAL MPIPIKA)
+    set(_gtest_tgt DLAF_gtest_mpipika_main)
     set(IS_AN_MPI_TEST TRUE)
-    set(IS_AN_HPX_TEST TRUE)
+    set(IS_AN_PIKA_TEST TRUE)
   else()
     message(FATAL_ERROR "USE_MAIN=${DLAF_AT_USE_MAIN} is not a supported option")
   endif()
@@ -136,19 +136,19 @@ function(DLAF_addTest test_target_name)
     set(_TEST_LABEL "RANK_1")
   endif()
 
-  if (IS_AN_HPX_TEST)
-    separate_arguments(_HPX_EXTRA_ARGS_LIST UNIX_COMMAND ${DLAF_HPXTEST_EXTRA_ARGS})
+  if (IS_AN_PIKA_TEST)
+    separate_arguments(_PIKA_EXTRA_ARGS_LIST UNIX_COMMAND ${DLAF_PIKATEST_EXTRA_ARGS})
 
     # APPLE platform does not support thread binding
     if (NOT APPLE)
-      list(APPEND _TEST_ARGUMENTS "--hpx:use-process-mask")
+      list(APPEND _TEST_ARGUMENTS "--pika:use-process-mask")
     endif()
 
     if(NOT DLAF_TEST_THREAD_BINDING_ENABLED)
-      list(APPEND _TEST_ARGUMENTS "--hpx:bind=none")
+      list(APPEND _TEST_ARGUMENTS "--pika:bind=none")
     endif()
 
-    list(APPEND _TEST_ARGUMENTS ${_HPX_EXTRA_ARGS_LIST})
+    list(APPEND _TEST_ARGUMENTS ${_PIKA_EXTRA_ARGS_LIST})
   endif()
 
   ### Test executable target

diff --git a/cmake/template/DLAFConfig.cmake.in b/cmake/template/DLAFConfig.cmake.in
@@ -34,7 +34,7 @@ endif()
 find_dependency(blaspp PATHS @blaspp_DIR@)
 find_dependency(lapackpp PATHS @lapackpp_DIR@)
 
-# ----- HPX
-find_dependency(HPX PATHS @HPX_DIR@)
+# ----- pika
+find_dependency(pika PATHS @pika_DIR@)
 
 check_required_components(DLAF)
diff --git a/include/dlaf/auxiliary/norm/mc.h b/include/dlaf/auxiliary/norm/mc.h
@@ -9,8 +9,8 @@
 //
 #pragma once
 
-#include <hpx/local/future.hpp>
-#include <hpx/local/unwrap.hpp>
+#include <pika/future.hpp>
+#include <pika/unwrap.hpp>
 
 #include "dlaf/auxiliary/norm/api.h"
 #include "dlaf/common/range2d.h"
@@ -49,7 +49,7 @@ dlaf::BaseType<T> Norm<Backend::MC, Device::CPU, T>::max_L(comm::CommunicatorGri
 
   using dlaf::common::internal::vector;
   using dlaf::common::make_data;
-  using hpx::unwrapping;
+  using pika::unwrapping;
 
   using dlaf::tile::internal::lange;
   using dlaf::tile::internal::lantr;
@@ -61,7 +61,7 @@ dlaf::BaseType<T> Norm<Backend::MC, Device::CPU, T>::max_L(comm::CommunicatorGri
   DLAF_ASSERT(square_size(matrix), matrix);
   DLAF_ASSERT(square_blocksize(matrix), matrix);
 
-  vector<hpx::future<NormT>> tiles_max;
+  vector<pika::future<NormT>> tiles_max;
   tiles_max.reserve(distribution.localNrTiles().rows() * distribution.localNrTiles().cols());
 
   // for each local tile in the (global) lower triangular matrix, create a task that finds the max element in the tile
@@ -78,20 +78,20 @@ dlaf::BaseType<T> Norm<Backend::MC, Device::CPU, T>::max_L(comm::CommunicatorGri
       else
         return lange(lapack::Norm::Max, tile);
     });
-    auto current_tile_max = hpx::dataflow(norm_max_f, matrix.read(tile_wrt_local));
+    auto current_tile_max = pika::dataflow(norm_max_f, matrix.read(tile_wrt_local));
 
     tiles_max.emplace_back(std::move(current_tile_max));
   }
 
   // than it is necessary to reduce max values from all ranks into a single max value for the matrix
 
   // TODO unwrapping can be skipped for optimization reasons
-  NormT local_max_value = hpx::dataflow(unwrapping([](const auto&& values) {
-                                          if (values.size() == 0)
-                                            return std::numeric_limits<NormT>::min();
-                                          return *std::max_element(values.begin(), values.end());
-                                        }),
-                                        tiles_max)
+  NormT local_max_value = pika::dataflow(unwrapping([](const auto&& values) {
+                                           if (values.size() == 0)
+                                             return std::numeric_limits<NormT>::min();
+                                           return *std::max_element(values.begin(), values.end());
+                                         }),
+                                         tiles_max)
                               .get();
   NormT max_value;
   dlaf::comm::sync::reduce(comm_grid.rankFullCommunicator(rank), comm_grid.fullCommunicator(), MPI_MAX,

diff --git a/include/dlaf/blas/tile.h b/include/dlaf/blas/tile.h
@@ -45,7 +45,7 @@ void gemm(const blas::Op op_a, const blas::Op op_b, const T alpha, const Tile<co
 /// This overload takes a policy argument and a sender which must send all required arguments for the
 /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done.
 template <Backend B, typename Sender,
-          typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>>
+          typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
 auto gemm(const dlaf::internal::Policy<B>& p, Sender&& s);
 
 /// \overload gemm
@@ -67,7 +67,7 @@ void hemm(const blas::Side side, const blas::Uplo uplo, const T alpha, const Til
 /// This overload takes a policy argument and a sender which must send all required arguments for the
 /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done.
 template <Backend B, typename Sender,
-          typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>>
+          typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
 auto hemm(const dlaf::internal::Policy<B>& p, Sender&& s);
 
 /// \overload hemm
@@ -89,7 +89,7 @@ void her2k(const blas::Uplo uplo, const blas::Op op, const T alpha, const Tile<c
 /// This overload takes a policy argument and a sender which must send all required arguments for the
 /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done.
 template <Backend B, typename Sender,
-          typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>>
+          typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
 auto her2k(const dlaf::internal::Policy<B>& p, Sender&& s);
 
 /// \overload her2k
@@ -111,7 +111,7 @@ void herk(const blas::Uplo uplo, const blas::Op op, const BaseType<T> alpha, con
 /// This overload takes a policy argument and a sender which must send all required arguments for the
 /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done.
 template <Backend B, typename Sender,
-          typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>>
+          typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
 auto herk(const dlaf::internal::Policy<B>& p, Sender&& s);
 
 /// \overload herk
@@ -134,7 +134,7 @@ void trmm(const dlaf::internal::Policy<B>& policy, const blas::Side side, const
 /// This overload takes a policy argument and a sender which must send all required arguments for the
 /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done.
 template <Backend B, typename Sender,
-          typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>>
+          typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
 auto trmm(const dlaf::internal::Policy<B>& p, Sender&& s);
 
 /// \overload trmm
@@ -157,7 +157,7 @@ void trsm(const dlaf::internal::Policy<B>& policy, const blas::Side side, const
 /// This overload takes a policy argument and a sender which must send all required arguments for the
 /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done.
 template <Backend B, typename Sender,
-          typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>>
+          typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
 auto trsm(const dlaf::internal::Policy<B>& p, Sender&& s);
 
 /// \overload trsm

diff --git a/include/dlaf/blas/tile_extensions.h b/include/dlaf/blas/tile_extensions.h
@@ -42,7 +42,7 @@ void add(T alpha, const matrix::Tile<const T, D>& tile_b, const matrix::Tile<T,
 /// This overload takes a policy argument and a sender which must send all required arguments for the
 /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done.
 template <Backend B, typename Sender,
-          typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>>
+          typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
 auto add(const dlaf::internal::Policy<B>& p, Sender&& s);
 
 /// \overload add