Skip to content

Commit

Permalink
change leaves computations
Browse files Browse the repository at this point in the history
  • Loading branch information
PierreMarchand20 committed Aug 9, 2024
1 parent 6b50680 commit ed3ee59
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 75 deletions.
9 changes: 9 additions & 0 deletions include/htool/basic_types/tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ void preorder_tree_traversal(NodeType &node, PreOrderFunction preorder_visitor)
}
}

template <typename NodeType, typename PostOrderFunction>
void postorder_tree_traversal(NodeType &node, PostOrderFunction postorder_visitor) {
const auto &children = node.get_children();
for (auto &child : children) {
postorder_tree_traversal(*child, postorder_visitor);
}
postorder_visitor(node);
}

} // namespace htool

#endif
124 changes: 62 additions & 62 deletions include/htool/hmatrix/hmatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,45 +43,12 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio
std::unique_ptr<Matrix<CoefficientPrecision>> m_dense_data{nullptr};
std::unique_ptr<LowRankMatrix<CoefficientPrecision, CoordinatePrecision>> m_low_rank_data{nullptr};

// Cached leaves
mutable std::vector<const HMatrix *> m_leaves{};
mutable std::vector<const HMatrix *> m_leaves_for_symmetry{};
mutable char m_symmetry_type_for_leaves{'N'};
mutable char m_UPLO_for_leaves{'N'};
//
char m_symmetry_type_for_leaves{'N'};
char m_UPLO_for_leaves{'N'};

StorageType m_storage_type{StorageType::Hierarchical};

void set_leaves_in_cache() const {
if (m_leaves.empty()) {
std::stack<std::pair<const HMatrix<CoefficientPrecision, CoordinatePrecision> *, bool>> hmatrix_stack;
hmatrix_stack.push(std::pair<const HMatrix<CoefficientPrecision, CoordinatePrecision> *, bool>(this, m_symmetry != 'N'));

while (!hmatrix_stack.empty()) {
auto &current_element = hmatrix_stack.top();
hmatrix_stack.pop();
const HMatrix<CoefficientPrecision, CoordinatePrecision> *current_hmatrix = current_element.first;
bool has_symmetric_ancestor = current_element.second;

if (current_hmatrix->is_leaf()) {
m_leaves.push_back(current_hmatrix);

if (has_symmetric_ancestor && current_hmatrix->get_target_cluster().get_offset() != current_hmatrix->get_source_cluster().get_offset()) {
m_leaves_for_symmetry.push_back(current_hmatrix);
}
}

if (m_symmetry_type_for_leaves == 'N' && current_hmatrix->get_symmetry() != 'N') {
m_symmetry_type_for_leaves = current_hmatrix->get_symmetry();
m_UPLO_for_leaves = current_hmatrix->get_UPLO();
}

for (const auto &child : current_hmatrix->get_children()) {
hmatrix_stack.push(std::pair<const HMatrix<CoefficientPrecision, CoordinatePrecision> *, bool>(child.get(), current_hmatrix->get_symmetry() != 'N' || has_symmetric_ancestor));
}
}
}
}

public:
// Root constructor
HMatrix(const Cluster<CoordinatePrecision> &target_cluster, const Cluster<CoordinatePrecision> &source_cluster) : TreeNode<HMatrix, HMatrixTreeData<CoefficientPrecision, CoordinatePrecision>>(), m_target_cluster(&target_cluster), m_source_cluster(&source_cluster) {
Expand All @@ -90,7 +57,7 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio
// Child constructor
HMatrix(const HMatrix &parent, const Cluster<CoordinatePrecision> *target_cluster, const Cluster<CoordinatePrecision> *source_cluster) : TreeNode<HMatrix, HMatrixTreeData<CoefficientPrecision, CoordinatePrecision>>(parent), m_target_cluster(target_cluster), m_source_cluster(source_cluster) {}

HMatrix(const HMatrix &rhs) : TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecision>, HMatrixTreeData<CoefficientPrecision, CoordinatePrecision>>(rhs), m_target_cluster(rhs.m_target_cluster), m_source_cluster(rhs.m_source_cluster), m_symmetry(rhs.m_symmetry), m_UPLO(rhs.m_UPLO), m_leaves(), m_leaves_for_symmetry(), m_symmetry_type_for_leaves(), m_storage_type(rhs.m_storage_type) {
HMatrix(const HMatrix &rhs) : TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecision>, HMatrixTreeData<CoefficientPrecision, CoordinatePrecision>>(rhs), m_target_cluster(rhs.m_target_cluster), m_source_cluster(rhs.m_source_cluster), m_symmetry(rhs.m_symmetry), m_UPLO(rhs.m_UPLO), m_symmetry_type_for_leaves(), m_storage_type(rhs.m_storage_type) {
if (m_target_cluster->is_root() or is_cluster_on_partition(*m_target_cluster)) {
Logger::get_instance().log(LogLevel::INFO, "Deep copy of HMatrix");
}
Expand Down Expand Up @@ -120,20 +87,20 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio
for (auto &child : rhs.m_children) {
this->m_children.emplace_back(std::make_unique<HMatrix<CoefficientPrecision, CoordinatePrecision>>(*child));
}
m_target_cluster = rhs.m_target_cluster;
m_source_cluster = rhs.m_source_cluster;
m_symmetry = rhs.m_symmetry;
m_UPLO = rhs.m_UPLO;
m_storage_type = rhs.m_storage_type;
m_target_cluster = rhs.m_target_cluster;
m_source_cluster = rhs.m_source_cluster;
m_symmetry = rhs.m_symmetry;
m_UPLO = rhs.m_UPLO;
m_storage_type = rhs.m_storage_type;
m_symmetry_type_for_leaves = rhs.m_symmetry_type_for_leaves;
m_UPLO_for_leaves = rhs.m_UPLO_for_leaves;

if (rhs.m_dense_data) {
m_dense_data = std::make_unique<Matrix<CoefficientPrecision>>(*rhs.m_dense_data);
}
if (rhs.m_low_rank_data) {
m_low_rank_data = std::make_unique<LowRankMatrix<CoefficientPrecision, CoordinatePrecision>>(*rhs.m_low_rank_data);
}
m_leaves.clear();
m_leaves_for_symmetry.clear();
return *this;
}

Expand Down Expand Up @@ -175,14 +142,7 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio
int get_rank() const {
return m_storage_type == StorageType::LowRank ? m_low_rank_data->rank_of() : -1;
}
const std::vector<const HMatrix *> &get_leaves() const {
set_leaves_in_cache();
return m_leaves;
}
const std::vector<const HMatrix *> &get_leaves_for_symmetry() const {
set_leaves_in_cache();
return m_leaves_for_symmetry;
}

const Matrix<CoefficientPrecision> *get_dense_data() const { return m_dense_data.get(); }
Matrix<CoefficientPrecision> *get_dense_data() { return m_dense_data.get(); }
const LowRankMatrix<CoefficientPrecision, CoordinatePrecision> *get_low_rank_data() const { return m_low_rank_data.get(); }
Expand Down Expand Up @@ -233,6 +193,8 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio
// HMatrix node setters
void set_symmetry(char symmetry) { m_symmetry = symmetry; }
void set_UPLO(char UPLO) { m_UPLO = UPLO; }
void set_symmetry_for_leaves(char symmetry) const { m_symmetry_type_for_leaves = symmetry; }
void set_UPLO_for_leaves(char UPLO) const { m_UPLO_for_leaves = UPLO; }
void set_target_cluster(const Cluster<CoordinatePrecision> *new_target_cluster) { m_target_cluster = new_target_cluster; }

// Test properties
Expand Down Expand Up @@ -267,23 +229,52 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio

void set_dense_data(std::unique_ptr<Matrix<CoefficientPrecision>> dense_matrix_ptr) {
this->delete_children();
m_leaves.clear();
m_leaves_for_symmetry.clear();
m_dense_data = std::move(dense_matrix_ptr);
// m_dense_data = std::make_unique<Matrix<CoefficientPrecision>>();
// m_dense_data->assign(dense_matrix.nb_rows(), dense_matrix.nb_cols(), dense_matrix.release(), true);
m_dense_data = std::move(dense_matrix_ptr);
m_storage_type = StorageType::Dense;
}
};

template <typename CoefficientPrecision, typename CoordinatePrecision = underlying_type<CoefficientPrecision>>
std::pair<std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *>, std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *>> get_leaves_from(const HMatrix<CoefficientPrecision, CoordinatePrecision> &hmatrix) {
std::pair<std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *>, std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *>> result;
auto &leaves = result.first;
auto &leaves_for_symmetry = result.second;
std::stack<std::pair<const HMatrix<CoefficientPrecision, CoordinatePrecision> *, bool>> hmatrix_stack;
hmatrix_stack.push(std::pair<const HMatrix<CoefficientPrecision, CoordinatePrecision> *, bool>(&hmatrix, hmatrix.get_symmetry() != 'N'));

while (!hmatrix_stack.empty()) {
auto &current_element = hmatrix_stack.top();
hmatrix_stack.pop();
const HMatrix<CoefficientPrecision, CoordinatePrecision> *current_hmatrix = current_element.first;
bool has_symmetric_ancestor = current_element.second;

if (current_hmatrix->is_leaf()) {
leaves.push_back(current_hmatrix);

if (has_symmetric_ancestor && current_hmatrix->get_target_cluster().get_offset() != current_hmatrix->get_source_cluster().get_offset()) {
leaves_for_symmetry.push_back(current_hmatrix);
}
}

for (const auto &child : current_hmatrix->get_children()) {
hmatrix_stack.push(std::pair<const HMatrix<CoefficientPrecision, CoordinatePrecision> *, bool>(child.get(), current_hmatrix->get_symmetry() != 'N' || has_symmetric_ancestor));
}
}
return result;
}

template <typename CoefficientPrecision, typename CoordinatePrecision = underlying_type<CoefficientPrecision>>
void copy_to_dense(const HMatrix<CoefficientPrecision, CoordinatePrecision> &hmatrix, CoefficientPrecision *ptr) {

int target_offset = hmatrix.get_target_cluster().get_offset();
int source_offset = hmatrix.get_source_cluster().get_offset();
int target_size = hmatrix.get_target_cluster().get_size();

for (auto leaf : hmatrix.get_leaves()) {
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(hmatrix); // C++17 structured binding

for (auto leaf : leaves) {
int local_nr = leaf->get_target_cluster().get_size();
int local_nc = leaf->get_source_cluster().get_size();
int row_offset = leaf->get_target_cluster().get_offset() - target_offset;
Expand Down Expand Up @@ -332,8 +323,11 @@ void copy_to_dense_in_user_numbering(const HMatrix<CoefficientPrecision, Coordin
int target_size = target_cluster.get_size();
const auto &target_permutation = target_cluster.get_permutation();
const auto &source_permutation = source_cluster.get_permutation();
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(hmatrix); // C++17 structured binding

for (auto leaf : hmatrix.get_leaves()) {
for (auto leaf : leaves) {
int local_nr = leaf->get_target_cluster().get_size();
int local_nc = leaf->get_source_cluster().get_size();
int row_offset = leaf->get_target_cluster().get_offset();
Expand All @@ -357,7 +351,7 @@ void copy_to_dense_in_user_numbering(const HMatrix<CoefficientPrecision, Coordin
}
}
if (hmatrix.get_symmetry_for_leaves() != 'N') {
for (auto leaf : hmatrix.get_leaves_for_symmetry()) {
for (auto leaf : leaves_for_symmetry) {
int local_nr = leaf->get_target_cluster().get_size();
int local_nc = leaf->get_source_cluster().get_size();
int row_offset = leaf->get_target_cluster().get_offset();
Expand All @@ -381,8 +375,11 @@ void copy_diagonal(const HMatrix<CoefficientPrecision, CoordinatePrecision> &hma

int target_offset = hmatrix.get_target_cluster().get_offset();
int source_offset = hmatrix.get_source_cluster().get_offset();
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(hmatrix); // C++17 structured binding

for (auto leaf : hmatrix.get_leaves()) {
for (auto leaf : leaves) {
int local_nr = leaf->get_target_cluster().get_size();
int local_nc = leaf->get_source_cluster().get_size();
int offset_i = leaf->get_target_cluster().get_offset() - target_offset;
Expand Down Expand Up @@ -418,8 +415,11 @@ void copy_diagonal_in_user_numbering(const HMatrix<CoefficientPrecision, Coordin
const auto &permutation = hmatrix.get_target_cluster().get_permutation();
int target_offset = hmatrix.get_target_cluster().get_offset();
// int source_offset = hmatrix.get_source_cluster().get_offset();
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(hmatrix); // C++17 structured binding

for (auto leaf : hmatrix.get_leaves()) {
for (auto leaf : leaves) {
int local_nr = leaf->get_target_cluster().get_size();
int local_nc = leaf->get_source_cluster().get_size();
int offset_i = leaf->get_target_cluster().get_offset();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ void add_hmatrix_matrix_product_row_major(char transa, char transb, std::complex

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void sequential_add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *B, CoefficientPrecision beta, CoefficientPrecision *C, int mu) {
auto &leaves = A.get_leaves();
auto &leaves_for_symmetry = A.get_leaves_for_symmetry();
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(A); // C++17 structured binding

if ((transa == 'T' && A.get_symmetry_for_leaves() == 'H')
|| (transa == 'C' && A.get_symmetry_for_leaves() == 'S')) {
Expand Down Expand Up @@ -110,9 +111,9 @@ void sequential_add_hmatrix_matrix_product_row_major(char transa, char transb, C

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void openmp_add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *B, CoefficientPrecision beta, CoefficientPrecision *C, int mu) {
// set_leaves_in_cache();
auto &leaves = A.get_leaves();
auto &leaves_for_symmetry = A.get_leaves_for_symmetry();
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(A); // C++17 structured binding

if ((transa == 'T' && A.get_symmetry_for_leaves() == 'H')
|| (transa == 'C' && A.get_symmetry_for_leaves() == 'S')) {
Expand Down
16 changes: 8 additions & 8 deletions include/htool/hmatrix/linalg/add_hmatrix_vector_product.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ void sequential_add_hmatrix_vector_product(char trans, CoefficientPrecision alph
int local_input_offset = A.get_source_cluster().get_offset();
int local_output_offset = A.get_target_cluster().get_offset();
char trans_sym = (A.get_symmetry_for_leaves() == 'S') ? 'T' : 'C';
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(A); // C++17 structured binding

if (trans != 'N') {
out_size = A.get_source_cluster().get_size();
Expand All @@ -77,17 +80,15 @@ void sequential_add_hmatrix_vector_product(char trans, CoefficientPrecision alph

// Contribution champ lointain
std::vector<CoefficientPrecision> temp(out_size, 0);
// for (int b = 0; b < A.get_leaves().size(); b++) {
for (auto &leaf : A.get_leaves()) {
for (auto &leaf : leaves) {
int input_offset = (leaf->*get_input_cluster)().get_offset();
int output_offset = (leaf->*get_output_cluster)().get_offset();
add_hmatrix_vector_product(trans, alpha, *leaf, in + input_offset - local_input_offset, CoefficientPrecision(1), out + (output_offset - local_output_offset));
}

// Symmetry part of the diagonal part
if (A.get_symmetry_for_leaves() != 'N') {
// for (int b = 0; b < A.get_leaves_for_symmetry().size(); b++) {
for (auto &leaf_for_symmetry : A.get_leaves_for_symmetry()) {
for (auto &leaf_for_symmetry : leaves_for_symmetry) {
int input_offset = (leaf_for_symmetry->*get_input_cluster)().get_offset();
int output_offset = (leaf_for_symmetry->*get_output_cluster)().get_offset();
add_hmatrix_vector_product(trans_sym, alpha, *leaf_for_symmetry, in + output_offset - local_input_offset, CoefficientPrecision(1), out + (input_offset - local_output_offset));
Expand All @@ -97,10 +98,9 @@ void sequential_add_hmatrix_vector_product(char trans, CoefficientPrecision alph

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void openmp_add_hmatrix_vector_product(char trans, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out) {

// A.set_leaves_in_cache();
auto &leaves = A.get_leaves();
auto &leaves_for_symmetry = A.get_leaves_for_symmetry();
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves;
std::vector<const HMatrix<CoefficientPrecision, CoordinatePrecision> *> leaves_for_symmetry;
std::tie(leaves, leaves_for_symmetry) = get_leaves_from(A); // C++17 structured binding

int out_size(A.get_target_cluster().get_size());
auto get_output_cluster{&HMatrix<CoefficientPrecision, CoordinatePrecision>::get_target_cluster};
Expand Down
Loading

0 comments on commit ed3ee59

Please sign in to comment.