Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ginkgo interface #669

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/Drivers/Sparse/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@ add_executable(NlpSparseEx4.exe NlpSparseEx4.cpp NlpSparseEx4Driver.cpp)
target_link_libraries(NlpSparseEx4.exe HiOp::HiOp)

if(HIOP_USE_RAJA)
if(HIOP_USE_GPU AND HIOP_USE_CUDA)
if(HIOP_USE_GPU)
set_source_files_properties(
NlpSparseRajaEx2.cpp
NlpSparseRajaEx2Driver.cpp
PROPERTIES LANGUAGE CUDA
)

add_executable(NlpSparseRajaEx2.exe NlpSparseRajaEx2Driver.cpp NlpSparseRajaEx2.cpp)
Expand Down
2 changes: 2 additions & 0 deletions src/Drivers/Sparse/NlpSparseEx1Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,10 @@ int main(int argc, char **argv)
nlp.options->SetStringValue("fact_acceptor", "inertia_free");
nlp.options->SetIntegerValue("ir_outer_maxit", 0);
if (use_ginkgo_cuda) {
nlp.options->SetStringValue("compute_mode", "gpu");
nlp.options->SetStringValue("ginkgo_exec", "cuda");
} else if (use_ginkgo_hip) {
nlp.options->SetStringValue("compute_mode", "gpu");
nlp.options->SetStringValue("ginkgo_exec", "hip");
} else {
nlp.options->SetStringValue("ginkgo_exec", "reference");
Expand Down
2 changes: 2 additions & 0 deletions src/Drivers/Sparse/NlpSparseEx2Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,10 @@ int main(int argc, char **argv)
nlp.options->SetStringValue("linsol_mode", "speculative");
nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
if (use_ginkgo_cuda) {
nlp.options->SetStringValue("compute_mode", "gpu");
nlp.options->SetStringValue("ginkgo_exec", "cuda");
} else if (use_ginkgo_hip) {
nlp.options->SetStringValue("compute_mode", "gpu");
nlp.options->SetStringValue("ginkgo_exec", "hip");
} else {
nlp.options->SetStringValue("ginkgo_exec", "reference");
Expand Down
2 changes: 2 additions & 0 deletions src/Drivers/Sparse/NlpSparseEx4Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,10 @@ int main(int argc, char **argv)
nlp.options->SetStringValue("fact_acceptor", "inertia_free");
nlp.options->SetIntegerValue("ir_outer_maxit", 0);
if (use_ginkgo_cuda) {
nlp.options->SetStringValue("compute_mode", "gpu");
nlp.options->SetStringValue("ginkgo_exec", "cuda");
} else if (use_ginkgo_hip) {
nlp.options->SetStringValue("compute_mode", "gpu");
nlp.options->SetStringValue("ginkgo_exec", "hip");
} else {
nlp.options->SetStringValue("ginkgo_exec", "reference");
Expand Down
14 changes: 13 additions & 1 deletion src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,24 @@ int main(int argc, char **argv)
// only support cusolverLU right now, 2023.02.28
//lsq initialization of the duals fails for this example since the Jacobian is rank deficient
//use zero initialization
nlp.options->SetStringValue("linear_solver_sparse", "resolve");
if(use_resolve_cuda_rf) {
nlp.options->SetStringValue("linear_solver_sparse", "resolve");
nlp.options->SetStringValue("resolve_refactorization", "rf");
nlp.options->SetIntegerValue("ir_inner_maxit", 20);
nlp.options->SetIntegerValue("ir_outer_maxit", 0);
}
if (use_ginkgo) {
nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
nlp.options->SetIntegerValue("ir_outer_maxit", 0);
if (use_ginkgo_cuda) {
nlp.options->SetStringValue("ginkgo_exec", "cuda");
} else if (use_ginkgo_hip) {
nlp.options->SetStringValue("ginkgo_exec", "hip");
} else {
nlp.options->SetStringValue("ginkgo_exec", "reference");
nlp.options->SetStringValue("compute_mode", "cpu");
}
}
nlp.options->SetStringValue("duals_init", "zero");
nlp.options->SetStringValue("mem_space", "device");
nlp.options->SetStringValue("fact_acceptor", "inertia_free");
Expand Down
78 changes: 56 additions & 22 deletions src/LinAlg/hiopLinSolverSparseGinkgo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,11 @@ std::shared_ptr<gko::Executor> create_exec(std::string executor_string)
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(),
true);
return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
}},
{"hip",
[] {
return gko::HipExecutor::create(0, gko::ReferenceExecutor::create(),
true);
return gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
}},
{"dpcpp",
[] {
Expand Down Expand Up @@ -283,10 +281,19 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
nnz_{0},
index_covert_CSR2Triplet_{nullptr},
index_covert_extra_Diag2CSR_{nullptr}
{}
{
if(nlp_->options->GetString("mem_space") == "device") {
M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
}
}

hiopLinSolverSymSparseGinkgo::~hiopLinSolverSymSparseGinkgo()
{
// If memory space is device, delete allocated host mirrors
if(nlp_->options->GetString("mem_space") == "device") {
delete M_host_;
}

delete [] index_covert_CSR2Triplet_;
delete [] index_covert_extra_Diag2CSR_;
}
Expand All @@ -304,11 +311,34 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
auto gmres_restart = nlp_->options->GetInteger("ir_inner_restart");
iterative_refinement_ = gmres_iter > 0;

host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
// If the matrix is on device, copy it to the host mirror
std::string mem_space = nlp_->options->GetString("mem_space");
auto M = M_;
if(mem_space == "device") {
auto host = exec_->get_master();
auto nnz = M_->numberOfNonzeros();
//host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
auto dv = gko::make_const_array_view(exec_, nnz, M_->M());
auto hv = gko::make_array_view(host, nnz, M_host_->M());
host->copy_from(exec_.get(), nnz, dv.get_const_data(), hv.get_data());
auto di = gko::make_const_array_view(exec_, nnz, M_->i_row());
auto hi = gko::make_array_view(host, nnz, M_host_->i_row());
host->copy_from(exec_.get(), nnz, di.get_const_data(), hi.get_data());
auto dj = gko::make_const_array_view(exec_, nnz, M_->j_col());
auto hj = gko::make_array_view(host, nnz, M_host_->j_col());
host->copy_from(exec_.get(), nnz, dj.get_const_data(), hj.get_data());
//host->copy_from(exec_.get(), nnz, M_->i_row(), M_host_->i_row());
//host->copy_from(exec_.get(), nnz, M_->j_col(), M_host_->j_col());
M = M_host_;
}

host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_);
nnz_ = mtx_->get_num_stored_elements();

reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart);

dense_b_ = gko::matrix::Dense<double>::create(exec_, gko::dim<2>{n_, 1});
}

int hiopLinSolverSymSparseGinkgo::matrixChanged()
Expand All @@ -321,7 +351,15 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
if( !mtx_ ) {
this->firstCall();
} else {
update_matrix(M_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
std::string mem_space = nlp_->options->GetString("mem_space");
auto M = M_;
if(mem_space == "device") {
auto host = exec_->get_master();
auto nnz = M_->numberOfNonzeros();
host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
M = M_host_;
}
update_matrix(M, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
}

gko_solver_ = gko::share(reusable_factory_->generate(mtx_));
Expand Down Expand Up @@ -350,24 +388,20 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk

nlp_->runStats.linsolv.tmTriuSolves.start();

hiopVectorPar* x = dynamic_cast<hiopVectorPar*>(&x_);
assert(x != NULL);
hiopVectorPar* rhs = dynamic_cast<hiopVectorPar*>(x->new_copy());
double* dx = x->local_data();
double* drhs = rhs->local_data();
std::string mem_space = nlp_->options->GetString("mem_space");
auto exec = host;
if(mem_space == "device") {
exec = exec_;
}

double* dx = x_.local_data();
const auto size = gko::dim<2>{(long unsigned int)n_, 1};
auto dense_x_host = vec::create(host, size, arr::view(host, n_, dx), 1);
auto dense_x = vec::create(exec_, size);
dense_x->copy_from(dense_x_host.get());
auto dense_b_host = vec::create(host, size, arr::view(host, n_, drhs), 1);
auto dense_b = vec::create(exec_, size);
dense_b->copy_from(dense_b_host.get());

gko_solver_->apply(dense_b.get(), dense_x.get());
auto dense_x = vec::create(exec, size, arr::view(exec, n_, dx), 1);
dense_b_->copy_from(dense_x.get());

gko_solver_->apply(dense_b_.get(), dense_x.get());
nlp_->runStats.linsolv.tmTriuSolves.stop();

dense_x_host->copy_from(dense_x.get());
delete rhs; rhs=nullptr;
return 1;
}

Expand Down
3 changes: 3 additions & 0 deletions src/LinAlg/hiopLinSolverSparseGinkgo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,15 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse
std::shared_ptr<gko::Executor> exec_;
std::shared_ptr<gko::matrix::Csr<double, int>> mtx_;
std::shared_ptr<gko::matrix::Csr<double, int>> host_mtx_;
std::shared_ptr<gko::matrix::Dense<double>> dense_b_;
std::shared_ptr<gko::LinOpFactory> reusable_factory_;
std::shared_ptr<gko::LinOp> gko_solver_;
bool iterative_refinement_;

static const std::map<std::string, gko::solver::trisolve_algorithm> alg_map_;

hiopMatrixSparse* M_host_{ nullptr }; ///< Host mirror for the KKT matrix

public:

/** called the very first time a matrix is factored. Allocates space
Expand Down
28 changes: 25 additions & 3 deletions src/Optimization/hiopKKTLinSysSparse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,7 @@ namespace hiop
if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") {
//ma57, pardiso and strumpack are not available or user requested ginkgo
#ifdef HIOP_USE_GINKGO
nlp_->log->printf(hovScalars,
"KKT_SPARSE_XYcYd linsys: alloc GINKGO with matrix size %d (%d cons)\n",
n, neq+nineq);
linsol_actual = "GINKGO";
linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
#endif // HIOP_USE_GINKGO
}
Expand Down Expand Up @@ -376,6 +374,14 @@ namespace hiop
linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_);
#endif // HIOP_USE_PARDISO
}

if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") {
//ma57, pardiso and strumpack are not available or user requested ginkgo
#ifdef HIOP_USE_GINKGO
linsol_actual = "GINKGO";
linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
#endif // HIOP_USE_GINKGO
}

if(linSys_) {
nlp_->log->printf(hovScalars,
Expand Down Expand Up @@ -747,6 +753,14 @@ namespace hiop
#endif // HIOP_USE_PARDISO
}

if(nullptr == linSys_ && linear_solver == "ginkgo") {
//ma57, pardiso and strumpack are not available or user requested ginkgo
#ifdef HIOP_USE_GINKGO
linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
actual_lin_solver = "GINKGO";
#endif // HIOP_USE_GINKGO
}

if(linSys_) {
nlp_->log->printf(hovScalars,
"KKT_SPARSE_XDYcYd linsys: alloc [%s] size %d (%d cons) (hybrid)\n",
Expand Down Expand Up @@ -781,6 +795,14 @@ namespace hiop
}
#endif
} //end resolve

if(nullptr == linSys_ && linear_solver == "ginkgo") {
//ma57, pardiso and strumpack are not available or user requested ginkgo
#ifdef HIOP_USE_GINKGO
linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
actual_lin_solver = "GINKGO";
#endif // HIOP_USE_GINKGO
}
} // end of compute mode gpu
}
assert(linSys_&& "KKT_SPARSE_XDYcYd linsys: cannot instantiate backend linear solver");
Expand Down