Skip to content

Commit

Permalink
Format files
Browse files Browse the repository at this point in the history
  • Loading branch information
Mikael Simberg committed Jan 24, 2022
1 parent 730980c commit 5092573
Show file tree
Hide file tree
Showing 29 changed files with 112 additions and 106 deletions.
10 changes: 5 additions & 5 deletions include/dlaf/auxiliary/norm/mc.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ dlaf::BaseType<T> Norm<Backend::MC, Device::CPU, T>::max_L(comm::CommunicatorGri

// TODO unwrapping can be skipped for optimization reasons
NormT local_max_value = pika::dataflow(unwrapping([](const auto&& values) {
if (values.size() == 0)
return std::numeric_limits<NormT>::min();
return *std::max_element(values.begin(), values.end());
}),
tiles_max)
if (values.size() == 0)
return std::numeric_limits<NormT>::min();
return *std::max_element(values.begin(), values.end());
}),
tiles_max)
.get();
NormT max_value;
dlaf::comm::sync::reduce(comm_grid.rankFullCommunicator(rank), comm_grid.fullCommunicator(), MPI_MAX,
Expand Down
2 changes: 1 addition & 1 deletion include/dlaf/common/pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class PromiseGuard {
};

private:
T object_; /// the object owned by the wrapper.
T object_; /// the object owned by the wrapper.
pika::lcos::local::promise<T> promise_; /// the shared state that will unlock the next user.
};

Expand Down
12 changes: 6 additions & 6 deletions include/dlaf/cublas/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
#include <pika/execution.hpp>
#include <pika/functional.hpp>
#include <pika/future.hpp>
#include <pika/modules/async_cuda.hpp>
#include <pika/mutex.hpp>
#include <pika/tuple.hpp>
#include <pika/type_traits.hpp>
#include <pika/modules/async_cuda.hpp>

#include "dlaf/common/assert.h"
#include "dlaf/cublas/error.h"
Expand Down Expand Up @@ -60,8 +60,8 @@ inline constexpr bool isAsyncCublasCallable_v = isAsyncCublasCallable<F, Ts...>:
template <typename F, typename Futures>
struct isDataflowCublasCallable
: pika::is_invocable<pika::util::functional::invoke_fused, F,
decltype(pika::tuple_cat(pika::tie(std::declval<cublasHandle_t&>()),
std::declval<Futures>()))> {};
decltype(pika::tuple_cat(pika::tie(std::declval<cublasHandle_t&>()),
std::declval<Futures>()))> {};
template <typename F, typename Futures>
inline constexpr bool isDataflowCublasCallable_v = isDataflowCublasCallable<F, Futures>::value;
}
Expand Down Expand Up @@ -122,14 +122,14 @@ class Executor : public cuda::Executor {
cudaStream_t stream = stream_pool_.getNextStream();
cublasHandle_t handle = handle_pool_.getNextHandle(stream);
auto r = pika::invoke_fused(std::forward<F>(f),
pika::tuple_cat(pika::tie(handle), std::forward<Futures>(futures)));
pika::tuple_cat(pika::tie(handle), std::forward<Futures>(futures)));
pika::future<void> fut = pika::cuda::experimental::detail::get_future_with_event(stream);

// The handle and stream pools are captured by value to ensure that the
// streams live at least until the event has completed.
fut.then(pika::launch::sync, [r = std::move(r), frame_p = std::move(frame_p),
stream_pool = stream_pool_, handle_pool = handle_pool_](
pika::future<void>&&) mutable { frame_p->set_data(std::move(r)); });
stream_pool = stream_pool_, handle_pool = handle_pool_](
pika::future<void>&&) mutable { frame_p->set_data(std::move(r)); });
}
};
}
Expand Down
4 changes: 2 additions & 2 deletions include/dlaf/cuda/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Executor {
// The stream pool is captured by value to ensure that the streams live at
// least until the event has completed.
return fut.then(pika::launch::sync, [r = std::move(r), stream_pool = stream_pool_](
pika::future<void>&&) mutable { return std::move(r); });
pika::future<void>&&) mutable { return std::move(r); });
}

template <class Frame, class F, class Futures>
Expand All @@ -77,7 +77,7 @@ class Executor {

cudaStream_t stream = stream_pool_.getNextStream();
auto r = pika::invoke_fused(std::forward<F>(f),
pika::tuple_cat(std::forward<Futures>(futures), pika::tie(stream)));
pika::tuple_cat(std::forward<Futures>(futures), pika::tie(stream)));
pika::future<void> fut = pika::cuda::experimental::detail::get_future_with_event(stream);

// The stream pool is captured by value to ensure that the streams live at
Expand Down
12 changes: 6 additions & 6 deletions include/dlaf/cusolver/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
#include <pika/execution.hpp>
#include <pika/functional.hpp>
#include <pika/future.hpp>
#include <pika/modules/async_cuda.hpp>
#include <pika/mutex.hpp>
#include <pika/tuple.hpp>
#include <pika/type_traits.hpp>
#include <pika/modules/async_cuda.hpp>

#include "dlaf/common/assert.h"
#include "dlaf/cublas/executor.h"
Expand Down Expand Up @@ -60,8 +60,8 @@ inline constexpr bool isAsyncCusolverCallable_v = isAsyncCusolverCallable<F, Ts.
template <typename F, typename Futures>
struct isDataflowCusolverCallable
: pika::is_invocable<pika::util::functional::invoke_fused, F,
decltype(pika::tuple_cat(pika::tie(std::declval<cusolverDnHandle_t&>()),
std::declval<Futures>()))> {};
decltype(pika::tuple_cat(pika::tie(std::declval<cusolverDnHandle_t&>()),
std::declval<Futures>()))> {};

template <typename F, typename Futures>
inline constexpr bool isDataflowCusolverCallable_v = isDataflowCusolverCallable<F, Futures>::value;
Expand Down Expand Up @@ -123,14 +123,14 @@ class Executor : public cublas::Executor {
cudaStream_t stream = stream_pool_.getNextStream();
cusolverDnHandle_t handle = handle_pool_.getNextHandle(stream);
auto r = pika::invoke_fused(std::forward<F>(f),
pika::tuple_cat(pika::tie(handle), std::forward<Futures>(futures)));
pika::tuple_cat(pika::tie(handle), std::forward<Futures>(futures)));
pika::future<void> fut = pika::cuda::experimental::detail::get_future_with_event(stream);

// The handle and stream pools are captured by value to ensure that the
// streams live at least until the event has completed.
fut.then(pika::launch::sync, [r = std::move(r), frame_p = std::move(frame_p),
stream_pool = stream_pool_, handle_pool = handle_pool_](
pika::future<void>&&) mutable { frame_p->set_data(std::move(r)); });
stream_pool = stream_pool_, handle_pool = handle_pool_](
pika::future<void>&&) mutable { frame_p->set_data(std::move(r)); });
}

template <typename F, typename... Ts>
Expand Down
6 changes: 3 additions & 3 deletions include/dlaf/eigensolver/band_to_tridiag/mc.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ struct BandToTridiag<Backend::MC, Device::CPU, T> {
deps.push_back(sf);
}
sf = pika::dataflow(executor_hp, unwrapping(copy_offdiag), k * nb,
mat_a.read(GlobalTileIndex{k + 1, k}), sf);
mat_a.read(GlobalTileIndex{k + 1, k}), sf);
deps.push_back(sf);
}
else {
Expand Down Expand Up @@ -342,7 +342,7 @@ struct BandToTridiag<Backend::MC, Device::CPU, T> {
const auto tile_index = sweep / nb;
const auto start = tile_index * nb;
pika::dataflow(executor_hp, unwrapping(copy_tridiag_task), start, std::min(nb, size - start),
std::min(nb, size - 1 - start), mat_trid(GlobalTileIndex{0, tile_index}), dep);
std::min(nb, size - 1 - start), mat_trid(GlobalTileIndex{0, tile_index}), dep);
}
};

Expand All @@ -362,7 +362,7 @@ struct BandToTridiag<Backend::MC, Device::CPU, T> {
const GlobalElementIndex index_v((sweep / b + step) * b, sweep);

pika::dataflow(pika::launch::sync, unwrapping(store_tau_v), w_pipeline(),
mat_v(dist_v.globalTileIndex(index_v)), dist_v.tileElementIndex(index_v));
mat_v(dist_v.globalTileIndex(index_v)), dist_v.tileElementIndex(index_v));
deps[step] = pika::dataflow(executor_hp, unwrapping(cont_sweep), w_pipeline(), deps[dep_index]);
}

Expand Down
4 changes: 2 additions & 2 deletions include/dlaf/eigensolver/reduction_to_band.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ std::vector<pika::shared_future<common::internal::vector<T>>> reductionToBand(Ma
/// @pre mat_a has a square block size
/// @pre mat_a is distributed according to @p grid
template <Backend backend, Device device, class T>
std::vector<pika::shared_future<common::internal::vector<T>>> reductionToBand(comm::CommunicatorGrid grid,
Matrix<T, device>& mat_a) {
std::vector<pika::shared_future<common::internal::vector<T>>> reductionToBand(
comm::CommunicatorGrid grid, Matrix<T, device>& mat_a) {
DLAF_ASSERT(matrix::square_size(mat_a), mat_a);
DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a);
DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid);
Expand Down
13 changes: 7 additions & 6 deletions include/dlaf/eigensolver/reduction_to_band/mc.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ namespace internal {

template <class T>
struct ReductionToBand<Backend::MC, Device::CPU, T> {
static std::vector<pika::shared_future<common::internal::vector<T>>> call(Matrix<T, Device::CPU>& mat_a);
static std::vector<pika::shared_future<common::internal::vector<T>>> call(
Matrix<T, Device::CPU>& mat_a);
static std::vector<pika::shared_future<common::internal::vector<T>>> call(
comm::CommunicatorGrid grid, Matrix<T, Device::CPU>& mat_a);
};
Expand Down Expand Up @@ -217,15 +218,15 @@ template <class Executor, class T>
void hemmDiag(const Executor& ex, pika::shared_future<TileT<const T>> tile_a,
pika::shared_future<TileT<const T>> tile_w, pika::future<TileT<T>> tile_x) {
pika::dataflow(ex, matrix::unwrapExtendTiles(tile::internal::hemm_o), blas::Side::Left,
blas::Uplo::Lower, T(1), std::move(tile_a), std::move(tile_w), T(1), std::move(tile_x));
blas::Uplo::Lower, T(1), std::move(tile_a), std::move(tile_w), T(1), std::move(tile_x));
}

// X += op(A) * W
template <class Executor, class T>
void hemmOffDiag(const Executor& ex, blas::Op op, pika::shared_future<TileT<const T>> tile_a,
pika::shared_future<TileT<const T>> tile_w, pika::future<TileT<T>> tile_x) {
pika::dataflow(ex, matrix::unwrapExtendTiles(tile::internal::gemm_o), op, blas::Op::NoTrans, T(1),
std::move(tile_a), std::move(tile_w), T(1), std::move(tile_x));
std::move(tile_a), std::move(tile_w), T(1), std::move(tile_x));
}

template <class Executor, class T>
Expand Down Expand Up @@ -357,7 +358,7 @@ void gemmUpdateX(PanelT<Coord::Col, T>& x, ConstMatrixT<T>& w2, MatrixLikeT& v)
// GEMM X = X - 0.5 . V . W2
for (const auto& index_i : v.iteratorLocal())
pika::dataflow(ex, unwrapExtendTiles(gemm_o), blas::Op::NoTrans, blas::Op::NoTrans, T(-0.5),
v.read(index_i), w2.read(LocalTileIndex(0, 0)), T(1), x(index_i));
v.read(index_i), w2.read(LocalTileIndex(0, 0)), T(1), x(index_i));
}

template <class T>
Expand Down Expand Up @@ -426,7 +427,7 @@ void gemmComputeW2(MatrixT<T>& w2, ConstPanelT<Coord::Col, T>& w, ConstPanelT<Co
// GEMM W2 = W* . X
for (const auto& index_tile : w.iteratorLocal())
pika::dataflow(ex, unwrapExtendTiles(gemm_o), blas::Op::ConjTrans, blas::Op::NoTrans, T(1),
w.read(index_tile), x.read(index_tile), T(1), w2(LocalTileIndex(0, 0)));
w.read(index_tile), x.read(index_tile), T(1), w2(LocalTileIndex(0, 0)));
}

template <class T>
Expand Down Expand Up @@ -532,7 +533,7 @@ pika::shared_future<common::internal::vector<T>> computePanelReflectors(
auto panel_tiles = pika::when_all(matrix::select(mat_a, ai_panel_range));

return pika::dataflow(getHpExecutor<Backend::MC>(), std::move(panel_task), std::move(panel_tiles),
mpi_col_chain_panel, std::move(trigger));
mpi_col_chain_panel, std::move(trigger));
}

template <class T>
Expand Down
2 changes: 1 addition & 1 deletion include/dlaf/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ struct [[nodiscard]] ScopedInitializer {
ScopedInitializer(int argc, const char* const argv[], configuration const& user_cfg = {});
~ScopedInitializer();

ScopedInitializer(ScopedInitializer&&) = delete;
ScopedInitializer(ScopedInitializer &&) = delete;
ScopedInitializer(ScopedInitializer const&) = delete;
ScopedInitializer& operator=(ScopedInitializer&&) = delete;
ScopedInitializer& operator=(ScopedInitializer const&) = delete;
Expand Down
4 changes: 2 additions & 2 deletions include/dlaf/matrix/copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ void copy(Matrix<const T, Source>& source, Matrix<T, Destination>& dest) {
for (SizeType j = 0; j < local_tile_cols; ++j) {
for (SizeType i = 0; i < local_tile_rows; ++i) {
pika::dataflow(dlaf::getCopyExecutor<Source, Destination>(),
unwrapExtendTiles(dlaf::matrix::internal::copy_o), source.read(LocalTileIndex(i, j)),
dest(LocalTileIndex(i, j)));
unwrapExtendTiles(dlaf::matrix::internal::copy_o),
source.read(LocalTileIndex(i, j)), dest(LocalTileIndex(i, j)));
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions include/dlaf/matrix/copy_tile.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@ void copyIfNeeded(FutureS<Tile<U, Source>> tile_from, FutureD<Tile<T, Destinatio
pika::future<void> wait_for_me = pika::make_ready_future<void>()) {
if constexpr (Destination != Source)
pika::dataflow(dlaf::getCopyExecutor<Source, Destination>(),
matrix::unwrapExtendTiles(internal::copy_o), wait_for_me, std::move(tile_from),
std::move(tile_to));
matrix::unwrapExtendTiles(internal::copy_o), wait_for_me, std::move(tile_from),
std::move(tile_to));
}
}
}
3 changes: 2 additions & 1 deletion include/dlaf/matrix/internal/tile_future_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ pika::future<ReturnTileType> setPromiseTileFuture(
using NonConstTileType = typename ReturnTileType::TileType;

DLAF_ASSERT_HEAVY(old_future.valid(), "");
return old_future.then(pika::launch::sync, [p = std::move(p)](pika::future<TileDataType>&& fut) mutable {
return old_future.then(pika::launch::sync, [p = std::move(p)](
pika::future<TileDataType>&& fut) mutable {
std::exception_ptr current_exception_ptr;

try {
Expand Down
13 changes: 7 additions & 6 deletions include/dlaf/matrix/tile.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ pika::shared_future<Tile<T, D>> splitTileInsertFutureInChain(pika::future<Tile<T

template <class T, Device D>
pika::future<Tile<T, D>> createSubTile(const pika::shared_future<Tile<T, D>>& tile,
const SubTileSpec& spec);
const SubTileSpec& spec);
}

/// The Tile object aims to provide an effective way to access the memory as a two dimensional
Expand Down Expand Up @@ -361,7 +361,7 @@ auto create_data(const Tile<T, device>& tile) {
namespace internal {
template <class T, Device D>
pika::future<Tile<T, D>> createSubTile(const pika::shared_future<Tile<T, D>>& tile,
const SubTileSpec& spec) {
const SubTileSpec& spec) {
return pika::dataflow(
pika::launch::sync, [](auto tile, auto spec) { return Tile<T, D>(tile, spec); }, tile, spec);
}
Expand Down Expand Up @@ -394,7 +394,8 @@ pika::shared_future<Tile<T, D>> splitTileInsertFutureInChain(pika::future<Tile<T

return pika::make_tuple(std::move(tile), std::move(dep_tracker));
};
auto tmp = pika::split_future(tile.then(pika::launch::sync, pika::unwrapping(std::move(swap_promise))));
auto tmp =
pika::split_future(tile.then(pika::launch::sync, pika::unwrapping(std::move(swap_promise))));
// old_tile = F1(PN) and will be used to create the subtiles
pika::shared_future<TileType> old_tile = std::move(pika::get<0>(tmp));
// 3. Set P2 or SF(P2) into FN to restore the chain: F1(PN) FN(*) ...
Expand All @@ -405,7 +406,7 @@ pika::shared_future<Tile<T, D>> splitTileInsertFutureInChain(pika::future<Tile<T
};
// tile = FN(*) (out argument) can be used to access the full tile after the subtiles tasks completed.
tile = pika::dataflow(pika::launch::sync, pika::unwrapping(set_promise_or_shfuture), tmp_tile,
std::move(pika::get<1>(tmp)));
std::move(pika::get<1>(tmp)));

return old_tile;
}
Expand All @@ -418,7 +419,7 @@ pika::shared_future<Tile<T, D>> splitTileInsertFutureInChain(pika::future<Tile<T
/// and the returned subtile go out of scope.
template <class T, Device D>
pika::shared_future<Tile<const T, D>> splitTile(const pika::shared_future<Tile<const T, D>>& tile,
const SubTileSpec& spec) {
const SubTileSpec& spec) {
return internal::createSubTile(tile, spec);
}

Expand Down Expand Up @@ -463,7 +464,7 @@ pika::future<Tile<T, D>> splitTile(pika::future<Tile<T, D>>& tile, const SubTile
/// (i.e. two different subtile cannot access the same element).
template <class T, Device D>
std::vector<pika::future<Tile<T, D>>> splitTileDisjoint(pika::future<Tile<T, D>>& tile,
const std::vector<SubTileSpec>& specs) {
const std::vector<SubTileSpec>& specs) {
if (specs.size() == 0)
return {};

Expand Down
16 changes: 8 additions & 8 deletions include/dlaf/multiplication/triangular/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ namespace internal {

namespace triangular_lln {
template <Backend backend, class T, typename InSender, typename OutSender>
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha, InSender&& in_tile,
OutSender&& out_tile) {
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha,
InSender&& in_tile, OutSender&& out_tile) {
dlaf::internal::whenAllLift(blas::Side::Left, blas::Uplo::Lower, blas::Op::NoTrans, diag, alpha,
std::forward<InSender>(in_tile), std::forward<OutSender>(out_tile)) |
tile::trmm(dlaf::internal::Policy<backend>(priority)) |
Expand Down Expand Up @@ -78,8 +78,8 @@ void gemmTrailingMatrixTile(pika::threads::thread_priority priority, blas::Op op

namespace triangular_lun {
template <Backend backend, class T, typename InSender, typename OutSender>
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha, InSender&& in_tile,
OutSender&& out_tile) {
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha,
InSender&& in_tile, OutSender&& out_tile) {
dlaf::internal::whenAllLift(blas::Side::Left, blas::Uplo::Upper, blas::Op::NoTrans, diag, alpha,
std::forward<InSender>(in_tile), std::forward<OutSender>(out_tile)) |
tile::trmm(dlaf::internal::Policy<backend>(priority)) |
Expand Down Expand Up @@ -118,8 +118,8 @@ void gemmTrailingMatrixTile(pika::threads::thread_priority priority, blas::Op op

namespace triangular_rln {
template <Backend backend, class T, typename InSender, typename OutSender>
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha, InSender&& in_tile,
OutSender&& out_tile) {
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha,
InSender&& in_tile, OutSender&& out_tile) {
dlaf::internal::whenAllLift(blas::Side::Right, blas::Uplo::Lower, blas::Op::NoTrans, diag, alpha,
std::forward<InSender>(in_tile), std::forward<OutSender>(out_tile)) |
tile::trmm(dlaf::internal::Policy<backend>(priority)) |
Expand Down Expand Up @@ -158,8 +158,8 @@ void gemmTrailingMatrixTile(pika::threads::thread_priority priority, blas::Op op

namespace triangular_run {
template <Backend backend, class T, typename InSender, typename OutSender>
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha, InSender&& in_tile,
OutSender&& out_tile) {
void trmmBPanelTile(pika::threads::thread_priority priority, blas::Diag diag, T alpha,
InSender&& in_tile, OutSender&& out_tile) {
dlaf::internal::whenAllLift(blas::Side::Right, blas::Uplo::Upper, blas::Op::NoTrans, diag, alpha,
std::forward<InSender>(in_tile), std::forward<OutSender>(out_tile)) |
tile::trmm(dlaf::internal::Policy<backend>(priority)) |
Expand Down
Loading

0 comments on commit 5092573

Please sign in to comment.