diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 30f96ada6c..c678272efb 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -4,6 +4,9 @@
 ### Documentation
 - Notebook on [multilevel modeling](https://docs.pymc.io/notebooks/multilevel_modeling.html) has been rewritten to showcase ArviZ and xarray usage for inference result analysis (see [#3963](https://github.com/pymc-devs/pymc3/pull/3963))
 
+### New features
+- Add sampler stats `process_time_diff`, `perf_counter_diff` and `perf_counter_start`, that record wall and CPU times for each NUTS and HMC sample (see [ #3986](https://github.com/pymc-devs/pymc3/pull/3986)).
+
 ## PyMC3 3.9.2 (24 June 2020)
 ### Maintenance
 - Warning added in GP module when `input_dim` is lower than the number of columns in `X` to compute the covariance function (see [#3974](https://github.com/pymc-devs/pymc3/pull/3974)).
diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
index a426f49111..7bbc722dee 100644
--- a/pymc3/step_methods/hmc/base_hmc.py
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -13,6 +13,7 @@
 #   limitations under the License.
 
 from collections import namedtuple
+import time
 
 import numpy as np
 import logging
@@ -132,6 +133,9 @@ def _hamiltonian_step(self, start, p0, step_size):
 
     def astep(self, q0):
         """Perform a single HMC iteration."""
+        perf_start = time.perf_counter()
+        process_start = time.process_time()
+
         p0 = self.potential.random()
         start = self.integrator.compute_state(q0, p0)
 
@@ -166,6 +170,9 @@ def astep(self, q0):
 
         hmc_step = self._hamiltonian_step(start, p0, step_size)
 
+        perf_end = time.perf_counter()
+        process_end = time.process_time()
+
         self.step_adapt.update(hmc_step.accept_stat, adapt_step)
         self.potential.update(hmc_step.end.q, hmc_step.end.q_grad, self.tune)
         if hmc_step.divergence_info:
@@ -191,7 +198,13 @@ def astep(self, q0):
         if not self.tune:
             self._samples_after_tune += 1
 
-        stats = {"tune": self.tune, "diverging": bool(hmc_step.divergence_info)}
+        stats = {
+            "tune": self.tune,
+            "diverging": bool(hmc_step.divergence_info),
+            "perf_counter_diff": perf_end - perf_start,
+            "process_time_diff": process_end - process_start,
+            "perf_counter_start": perf_start,
+        }
 
         stats.update(hmc_step.stats)
         stats.update(self.step_adapt.stats())
diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py
index 3542f1320d..9c7a533461 100644
--- a/pymc3/step_methods/hmc/hmc.py
+++ b/pymc3/step_methods/hmc/hmc.py
@@ -48,6 +48,9 @@ class HamiltonianMC(BaseHMC):
         'path_length': np.float64,
         'accepted': np.bool,
         'model_logp': np.float64,
+        'process_time_diff': np.float64,
+        'perf_counter_diff': np.float64,
+        'perf_counter_start': np.float64,
     }]
 
     def __init__(self, vars=None, path_length=2., max_steps=1024, **kwargs):
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index d2409acca2..30b0705cc8 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -72,6 +72,13 @@ class NUTS(BaseHMC):
       samples, the step size is set to this value. This should converge
       during tuning.
     - `model_logp`: The model log-likelihood for this sample.
+    - `process_time_diff`: The time it took to draw the sample, as defined
+      by the python standard library `time.process_time`. This counts all
+      the CPU time, including worker processes in BLAS and OpenMP.
+    - `perf_counter_diff`: The time it took to draw the sample, as defined
+      by the python standard library `time.perf_counter` (wall time).
+    - `perf_counter_start`: The value of `time.perf_counter` at the beginning
+      of the computation of the draw.
 
     References
     ----------
@@ -96,6 +103,9 @@ class NUTS(BaseHMC):
             "energy": np.float64,
             "max_energy_error": np.float64,
             "model_logp": np.float64,
+            "process_time_diff": np.float64,
+            "perf_counter_diff": np.float64,
+            "perf_counter_start": np.float64,
         }
     ]
 
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index 2e4ce427a2..28448b8cad 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -979,7 +979,7 @@ def test_linalg(self, caplog):
 
     def test_sampler_stats(self):
         with Model() as model:
-            x = Normal("x", mu=0, sigma=1)
+            Normal("x", mu=0, sigma=1)
             trace = sample(draws=10, tune=1, chains=1)
 
         # Assert stats exist and have the correct shape.
@@ -995,6 +995,9 @@ def test_sampler_stats(self):
             "step_size_bar",
             "tree_size",
             "tune",
+            "perf_counter_diff",
+            "perf_counter_start",
+            "process_time_diff",
         }
         assert trace.stat_names == expected_stat_names
         for varname in trace.stat_names:
@@ -1002,7 +1005,8 @@ def test_sampler_stats(self):
 
         # Assert model logp is computed correctly: computing post-sampling
         # and tracking while sampling should give same results.
-        model_logp_ = np.array(
-            [model.logp(trace.point(i, chain=c)) for c in trace.chains for i in range(len(trace))]
-        )
+        model_logp_ = np.array([
+            model.logp(trace.point(i, chain=c))
+            for c in trace.chains for i in range(len(trace))
+        ])
         assert (trace.model_logp == model_logp_).all()