diff --git a/src/main_gpumd/electron_stop.cu b/src/main_gpumd/electron_stop.cu
index 43c6f7b7d..3f07d0397 100644
--- a/src/main_gpumd/electron_stop.cu
+++ b/src/main_gpumd/electron_stop.cu
@@ -27,6 +27,7 @@ Apply electron stopping.
 static void __global__ find_stopping_force(
   const int num_atoms,
   const int num_points,
+  const double time_step,
   const double energy_min,
   const double energy_max,
   const double energy_interval_inverse,
@@ -34,7 +35,8 @@ static void __global__ find_stopping_force(
   const int* g_type,
   const double* g_mass,
   const double* g_velocity,
-  double* g_force)
+  double* g_force,
+  double* g_power_loss)
 {
   const int i = blockIdx.x * blockDim.x + threadIdx.x;
   if (i < num_atoms) {
@@ -66,6 +68,8 @@ static void __global__ find_stopping_force(
     g_force[0 * num_atoms + i] = vx * factor;
     g_force[1 * num_atoms + i] = vy * factor;
     g_force[2 * num_atoms + i] = vz * factor;
+
+    g_power_loss[i] = stopping_power * sqrt(v2) * time_step; 
   }
 }
 
@@ -119,7 +123,42 @@ apply_electron_stopping(const int num_atoms, const double* g_stopping_force, dou
   }
 }
 
-void Electron_Stop::compute(Atom& atom)
+__device__ double device_power_loss;
+
+static __global__ void find_power_loss(int num_atoms, double* g_power_loss)
+{
+  //<<<1, 1024>>>
+  int tid = threadIdx.x;
+  int block_size = blockDim.x;
+
+  int number_of_batches = (num_atoms + block_size - 1) / block_size;
+  __shared__ double s_f[1024];
+  double f = 0.0;
+
+  for (int batch = 0; batch < number_of_batches; ++batch) {
+      int idx = tid + batch * block_size;
+      if (idx < num_atoms) {
+          f += g_power_loss[idx];
+      }
+  }
+
+  s_f[tid] = f;
+  __syncthreads();
+
+  for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
+      if (tid < offset) {
+          s_f[tid] += s_f[tid + offset];
+      }
+      __syncthreads(); 
+  }
+
+  if (tid == 0) {
+      device_power_loss = s_f[0];
+  }
+
+}
+
+void Electron_Stop::compute(double time_step, Atom& atom)
 {
   if (!do_electron_stop) {
     return;
@@ -128,6 +167,7 @@ void Electron_Stop::compute(Atom& atom)
   find_stopping_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
     atom.number_of_atoms,
     num_points,
+    time_step,
     energy_min,
     energy_max,
     1.0 / energy_interval,
@@ -135,7 +175,9 @@ void Electron_Stop::compute(Atom& atom)
     atom.type.data(),
     atom.mass.data(),
     atom.velocity_per_atom.data(),
-    stopping_force.data());
+    stopping_force.data(),
+    stopping_loss.data());
+
   CUDA_CHECK_KERNEL
 
   find_force_average<<<3, 1024>>>(atom.number_of_atoms, stopping_force.data());
@@ -144,6 +186,13 @@ void Electron_Stop::compute(Atom& atom)
   apply_electron_stopping<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
     atom.number_of_atoms, stopping_force.data(), atom.force_per_atom.data());
   CUDA_CHECK_KERNEL
+
+  find_power_loss<<<1, 1024>>>(atom.number_of_atoms, stopping_loss.data());
+  CUDA_CHECK_KERNEL
+
+  double power_loss_host;  
+  CHECK(cudaMemcpyFromSymbol(&power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost));
+  stopping_power_loss += power_loss_host;
 }
 
 void Electron_Stop::parse(
@@ -203,7 +252,15 @@ void Electron_Stop::parse(
   stopping_power_gpu.resize(num_points * num_types);
   stopping_power_gpu.copy_from_host(stopping_power_cpu.data());
   stopping_force.resize(num_atoms * 3);
+  stopping_loss.resize(num_atoms);
   do_electron_stop = true;
 }
 
-void Electron_Stop::finalize() { do_electron_stop = false; }
+void Electron_Stop::finalize() 
+{ 
+  if (do_electron_stop) { 
+    printf("Total electron stopping power loss = %g eV.\n", stopping_power_loss);
+  }
+  do_electron_stop = false; 
+  stopping_power_loss = 0.0;
+}
diff --git a/src/main_gpumd/electron_stop.cuh b/src/main_gpumd/electron_stop.cuh
index 653f92bef..912181ce3 100644
--- a/src/main_gpumd/electron_stop.cuh
+++ b/src/main_gpumd/electron_stop.cuh
@@ -24,8 +24,9 @@ class Electron_Stop
 {
 public:
   bool do_electron_stop = false;
+  double stopping_power_loss = 0.0;
   void parse(const char** param, int num_param, const int num_atoms, const int num_types);
-  void compute(Atom& atom);
+  void compute(double time_step, Atom& atom);
   void finalize();
 
 private:
@@ -36,4 +37,5 @@ private:
   std::vector<double> stopping_power_cpu;
   GPU_Vector<double> stopping_power_gpu;
   GPU_Vector<double> stopping_force;
+  GPU_Vector<double> stopping_loss;
 };
diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu
index 20c63acac..982971334 100644
--- a/src/main_gpumd/run.cu
+++ b/src/main_gpumd/run.cu
@@ -241,7 +241,7 @@ void Run::perform_a_run()
     }
 #endif
 
-    electron_stop.compute(atom);
+    electron_stop.compute(time_step, atom);
 
     integrate.compute2(time_step, double(step) / number_of_steps, group, box, atom, thermo);