-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
algo: triangular solver LLT (distributed) (#409)
Implementation of the distributed Triangular Solver Left Lower Transposed. Note: keep_future is problematic with transform<GPU> because it releases the future during the submission of the work but then it does not keep it alive till the end of the async operation. Not using it results in getting the contained value, which instead will be kept alive (it does not get unwrapped as the future).
- Loading branch information
Showing
23 changed files
with
378 additions
and
150 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
// | ||
// Distributed Linear Algebra with Future (DLAF) | ||
// | ||
// Copyright (c) 2018-2021, ETH Zurich | ||
// All rights reserved. | ||
// | ||
// Please, refer to the LICENSE file in the root directory. | ||
// SPDX-License-Identifier: BSD-3-Clause | ||
// | ||
#pragma once | ||
|
||
#include <blas.hh> | ||
|
||
#include "dlaf/common/callable_object.h" | ||
#include "dlaf/matrix/tile.h" | ||
#include "dlaf/sender/make_sender_algorithm_overloads.h" | ||
#include "dlaf/sender/partial_transform.h" | ||
#include "dlaf/sender/policy.h" | ||
#include "dlaf/sender/transform.h" | ||
|
||
#ifdef DLAF_WITH_CUDA | ||
#include <cublas_v2.h> | ||
|
||
#include "dlaf/cublas/error.h" | ||
#include "dlaf/util_cublas.h" | ||
#endif | ||
|
||
namespace dlaf { | ||
namespace tile { | ||
using matrix::Tile; | ||
|
||
#ifdef DLAF_DOXYGEN | ||
|
||
/// Computes A += alpha * B | ||
/// | ||
/// This overload blocks until completion of the algorithm. | ||
template <Backend B, class T, Device D> | ||
void add(T alpha, const matrix::Tile<const T, D>& tile_b, const matrix::Tile<T, D>& tile_a); | ||
|
||
/// \overload add | ||
/// | ||
/// This overload takes a policy argument and a sender which must send all required arguments for the | ||
/// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. | ||
template <Backend B, typename Sender, | ||
typename = std::enable_if_t<hpx::execution::experimental::is_sender_v<Sender>>> | ||
auto add(const dlaf::internal::Policy<B>& p, Sender&& s); | ||
|
||
/// \overload add | ||
/// | ||
/// This overload partially applies the algorithm with a policy for later use with operator| with a | ||
/// sender on the left-hand side. | ||
template <Backend B> | ||
auto add(const dlaf::internal::Policy<B>& p); | ||
|
||
#else | ||
|
||
namespace internal { | ||
|
||
template <class T> | ||
void add(T alpha, const matrix::Tile<const T, Device::CPU>& tile_b, | ||
const matrix::Tile<T, Device::CPU>& tile_a) { | ||
DLAF_ASSERT(equal_size(tile_a, tile_b), tile_a, tile_b); | ||
for (auto j = 0; j < tile_a.size().cols(); ++j) | ||
blas::axpy(tile_a.size().rows(), alpha, tile_b.ptr({0, j}), 1, tile_a.ptr({0, j}), 1); | ||
} | ||
|
||
#ifdef DLAF_WITH_CUDA | ||
template <class T> | ||
void add(cublasHandle_t handle, T alpha, const matrix::Tile<const T, Device::GPU>& tile_b, | ||
const matrix::Tile<T, Device::GPU>& tile_a) { | ||
DLAF_ASSERT(equal_size(tile_a, tile_b), tile_a, tile_b); | ||
for (auto j = 0; j < tile_a.size().cols(); ++j) | ||
tile::internal::CublasAxpy<T>::call(handle, tile_a.size().rows(), util::blasToCublasCast(&alpha), | ||
util::blasToCublasCast(tile_b.ptr({0, j})), 1, | ||
util::blasToCublasCast(tile_a.ptr({0, j})), 1); | ||
} | ||
#endif | ||
|
||
DLAF_MAKE_CALLABLE_OBJECT(add); | ||
} | ||
|
||
DLAF_MAKE_SENDER_ALGORITHM_OVERLOADS(add, internal::add_o) | ||
|
||
#endif | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.