From 7a68c2170fca568e4ad5f87b0113d773a7cf146f Mon Sep 17 00:00:00 2001
From: shaering <shaering@utexas.edu>
Date: Mon, 6 May 2024 09:41:38 -0500
Subject: [PATCH] add basic timing information (#278)

Co-authored-by: Sigfried Haering <shaering@oden.utexas.edu>
---
 src/loMach.cpp          | 74 +++++++++++++++++++++++++++++------------
 src/loMach.hpp          |  2 +-
 src/split_flow_base.hpp |  4 +++
 src/tomboulides.cpp     |  6 ++++
 src/tomboulides.hpp     |  6 ++++
 5 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/src/loMach.cpp b/src/loMach.cpp
index 2e5fcaee6..cfbf3bed0 100644
--- a/src/loMach.cpp
+++ b/src/loMach.cpp
@@ -102,6 +102,8 @@ void LoMachSolver::initialize() {
   bool verbose = rank0_;
   if (verbose) grvy_printf(ginfo, "Initializing loMach solver.\n");
 
+  sw_setup_.Start();
+
   // Stash the order, just for convenience
   order = loMach_opts_.order;
 
@@ -289,6 +291,8 @@ void LoMachSolver::initialize() {
   sponge_->initializeViz(*pvdc_);
   extData_->initializeViz(*pvdc_);
   average_->initializeViz();
+
+  sw_setup_.Stop();
 }
 
 void LoMachSolver::UpdateTimestepHistory(double dt) {
@@ -374,19 +378,25 @@ void LoMachSolver::solveBegin() {
 }
 
 void LoMachSolver::solveStep() {
-  sw_step.Start();
+  sw_step_.Start();
 
   if (loMach_opts_.ts_opts_.integrator_type_ == LoMachTemporalOptions::CURL_CURL) {
     SetTimeIntegrationCoefficients(iter - iter_start_);
     extData_->step();
+    sw_thermChem_.Start();
     thermo_->step();
+    sw_thermChem_.Stop();
+    sw_flow_.Start();
     flow_->step();
+    sw_flow_.Stop();
+    sw_turb_.Start();
     turbModel_->step();
+    sw_turb_.Stop();
   } else {
     if (rank0_) std::cout << "Time integration not updated." << endl;
     exit(1);
   }
-  sw_step.Stop();
+  sw_step_.Stop();
 
   UpdateTimestepHistory(temporal_coeff_.dt);
   temporal_coeff_.time += temporal_coeff_.dt;
@@ -394,8 +404,8 @@ void LoMachSolver::solveStep() {
   iter++;
 
   if ((iter % loMach_opts_.timing_frequency_) == 0) {
-    double time_per_step = (sw_step.RealTime() - tlast_) / loMach_opts_.timing_frequency_;
-    tlast_ = sw_step.RealTime();
+    double time_per_step = (sw_step_.RealTime() - tlast_) / loMach_opts_.timing_frequency_;
+    tlast_ = sw_step_.RealTime();
 
     double max_time_per_step = 0.0;
     MPI_Reduce(&time_per_step, &max_time_per_step, 1, MPI_DOUBLE, MPI_MAX, 0, groupsMPI->getTPSCommWorld());
@@ -477,6 +487,9 @@ void LoMachSolver::solveEnd() {
   average_->writeViz(iter, temporal_coeff_.time, avg_opts_->save_mean_history_);
   MPI_Barrier(groupsMPI->getTPSCommWorld());
   if (rank0_ == true) std::cout << " ...complete!" << endl;
+
+  // timing information
+  PrintTimingData();
 }
 
 void LoMachSolver::solve() {
@@ -739,24 +752,6 @@ void LoMachSolver::SetTimeIntegrationCoefficients(int step) {
   }
 }
 
-void LoMachSolver::PrintTimingData() {
-  double my_rt[2], rt_max[2];
-
-  my_rt[0] = sw_setup.RealTime();
-  my_rt[1] = sw_step.RealTime();
-
-  MPI_Reduce(my_rt, rt_max, 2, MPI_DOUBLE, MPI_MAX, 0, pmesh_->GetComm());
-
-  if (pmesh_->GetMyRank() == 0) {
-    mfem::out << std::setw(10) << "SETUP" << std::setw(10) << "STEP"
-              << "\n";
-
-    mfem::out << std::setprecision(3) << std::setw(10) << my_rt[0] << std::setw(10) << my_rt[1] << "\n";
-
-    mfem::out << std::setprecision(8);
-  }
-}
-
 // query solver-specific runtime controls
 void LoMachSolver::parseSolverOptions() {
   // if (verbose) grvy_printf(ginfo, "parsing solver options...\n");
@@ -823,3 +818,38 @@ void LoMachSolver::parseSolverOptions() {
     loMach_opts_.compute_wallDistance = true;
   }
 }
+
+void LoMachSolver::PrintTimingData() {
+  double my_rt[7], rt_max[7];
+  my_rt[0] = sw_setup_.RealTime();
+  my_rt[1] = sw_step_.RealTime();
+  my_rt[2] = sw_turb_.RealTime();
+  my_rt[3] = sw_thermChem_.RealTime();
+  my_rt[4] = sw_flow_.RealTime();
+  my_rt[5] = flow_->getPressureSolveTimer();
+  my_rt[6] = flow_->getHelmholtzSolveTimer();
+
+  double dIters;
+  dIters = (double)(loMach_opts_.max_steps_ - iter_start_);
+
+  MPI_Reduce(my_rt, rt_max, 7, MPI_DOUBLE, MPI_MAX, 0, pmesh_->GetComm());
+
+  if (rank0_) {
+    mfem::out << "\n";
+    mfem::out << "TIMING REPORT. (sec/step and fraction of step) \n";
+    mfem::out << std::setw(10) << "SETUP" << std::setw(10) << "STEP" << std::setw(15) << "TURB-MODEL" << std::setw(15)
+              << "THERM-CHEM" << std::setw(10) << "FLOW:" << std::setw(12) << "PSOLVE" << std::setw(12) << "HSOLVE"
+              << "\n";
+
+    mfem::out << std::setprecision(5) << std::setw(10) << my_rt[0] / dIters << std::setw(10) << my_rt[1] / dIters
+              << std::setw(15) << my_rt[2] / dIters << std::setw(15) << my_rt[3] / dIters << std::setw(10)
+              << my_rt[4] / dIters << std::setw(12) << my_rt[5] / dIters << std::setw(12) << my_rt[6] / dIters << "\n";
+
+    mfem::out << std::setprecision(5) << std::setw(10) << " " << std::setw(10) << my_rt[1] / my_rt[1] << std::setw(15)
+              << my_rt[2] / my_rt[1] << std::setw(15) << my_rt[3] / my_rt[1] << std::setw(10) << my_rt[4] / my_rt[1]
+              << std::setw(12) << my_rt[5] / my_rt[1] << std::setw(12) << my_rt[6] / my_rt[1] << "\n";
+
+    mfem::out << std::setprecision(8);
+    mfem::out << "\n";
+  }
+}
diff --git a/src/loMach.hpp b/src/loMach.hpp
index 7de29b8a6..85df0ba28 100644
--- a/src/loMach.hpp
+++ b/src/loMach.hpp
@@ -189,7 +189,7 @@ class LoMachSolver : public TPS::PlasmaSolver {
   int order;
 
   // Timers.
-  StopWatch sw_setup, sw_step;
+  StopWatch sw_setup_, sw_step_, sw_turb_, sw_thermChem_, sw_flow_, sw_press_;
   double tlast_;
 
   // I/O helpers
diff --git a/src/split_flow_base.hpp b/src/split_flow_base.hpp
index 8241ce453..9ef8949f0 100644
--- a/src/split_flow_base.hpp
+++ b/src/split_flow_base.hpp
@@ -134,6 +134,10 @@ class FlowBase {
    * cases where we have an exact solution.
    */
   virtual double computeL2Error() const { return -1.0; }
+
+  /// Get timing
+  virtual double getPressureSolveTimer() { return -1.0; }
+  virtual double getHelmholtzSolveTimer() { return -1.0; }
 };
 
 class ZeroFlow final : public FlowBase {
diff --git a/src/tomboulides.cpp b/src/tomboulides.cpp
index 2688517de..4cbffcb25 100644
--- a/src/tomboulides.cpp
+++ b/src/tomboulides.cpp
@@ -1117,6 +1117,7 @@ void Tomboulides::step() {
 
   // Update the variable coefficient Laplacian to account for change
   // in density
+  sw_press_.Start();
   L_iorho_form_->Update();
   L_iorho_form_->Assemble();
   L_iorho_form_->FormSystemMatrix(pres_ess_tdof_, L_iorho_op_);
@@ -1146,6 +1147,7 @@ void Tomboulides::step() {
   L_iorho_inv_->SetRelTol(pressure_solve_rtol_);
   L_iorho_inv_->SetAbsTol(pressure_solve_atol_);
   L_iorho_inv_->SetMaxIter(pressure_solve_max_iter_);
+  sw_press_.Stop();
 
   // Update density weighted mass
   Array<int> empty;
@@ -1156,6 +1158,7 @@ void Tomboulides::step() {
   Mv_rho_inv_->SetOperator(*Mv_rho_op_);
 
   // Update the Helmholtz operator and inverse
+  sw_helm_.Start();
   Hv_bdfcoeff_.constant = coeff_.bd0 / dt;
   Hv_form_->Update();
   Hv_form_->Assemble();
@@ -1166,6 +1169,7 @@ void Tomboulides::step() {
     // TODO(trevilo): Support partial assembly
     assert(false);
   }
+  sw_helm_.Stop();
 
   //------------------------------------------------------------------------
   // Step 2: Compute vstar / dt (as in eqn 2.3 from Tomboulides)
@@ -1355,6 +1359,7 @@ void Tomboulides::step() {
   resp_vec_.Add(-coeff_.bd0 / dt, u_bdr_vec_);
 
   // Orthogonalize rhs if no Dirichlet BCs on pressure
+  sw_press_.Start();
   if (pres_dbcs_.empty()) {
     Orthogonalize(resp_vec_, pfes_);
   }
@@ -1381,6 +1386,7 @@ void Tomboulides::step() {
     if (rank0_) std::cout << "ERROR: Poisson solve did not converge." << std::endl;
     exit(1);
   }
+  sw_press_.Stop();
 
   // iter_spsolve = SpInv->GetNumIterations();
   // res_spsolve = SpInv->GetFinalNorm();
diff --git a/src/tomboulides.hpp b/src/tomboulides.hpp
index d10e05e9b..31303af4c 100644
--- a/src/tomboulides.hpp
+++ b/src/tomboulides.hpp
@@ -115,6 +115,8 @@ class Tomboulides final : public FlowBase {
   // reference here.
   const temporalSchemeCoefficients &coeff_;
 
+  StopWatch sw_press_, sw_helm_;
+
   std::string ic_string_;
 
   // Object used to build forcing
@@ -378,6 +380,10 @@ class Tomboulides final : public FlowBase {
   /// Evaluate error (only when exact solution is known)
   double computeL2Error() const final;
 
+  /// Get timings
+  double getPressureSolveTimer() { return sw_press_.RealTime(); }
+  double getHelmholtzSolveTimer() { return sw_helm_.RealTime(); }
+
   /// Add a Dirichlet boundary condition to the velocity field
   void addVelDirichletBC(const mfem::Vector &u, mfem::Array<int> &attr);
   void addVelDirichletBC(mfem::VectorCoefficient *coeff, mfem::Array<int> &attr);