diff --git a/demonstrations/tutorial_spsa.py b/demonstrations/tutorial_spsa.py
index d4dcd6dee3..3c313a49df 100644
--- a/demonstrations/tutorial_spsa.py
+++ b/demonstrations/tutorial_spsa.py
@@ -14,7 +14,7 @@
    tutorial_vqe_qng Accelerating VQEs with quantum natural gradient
    qnspsa Quantum natural SPSA optimizer
 
-*Authors: Antal Szava & David Wierichs — Posted: 19 March 2021. Last updated: 10 February 2023.*
+*Authors: Antal Szava & David Wierichs — Posted: 19 March 2021. Last updated: 23 February 2023.*
 
 In this tutorial, we investigate using a stochastic optimizer called
 the Simultaneous Perturbation Stochastic Approximation (SPSA) algorithm to optimize quantum
@@ -29,7 +29,7 @@
 2. The variational quantum eigensolver on a simulated hardware device.
 
 Throughout the demo, we show results obtained with SPSA and with gradient
-descent and also compare the number of device executions required to complete
+descent and also compare the number of executed circuits required to complete
 each optimization.
 
 Background
@@ -147,7 +147,7 @@
 
 .. note::
 
-    Just as with other PennyLane device, the number of samples taken for a device
+    Just as with other PennyLane device, the number of samples taken for a circuit
     execution can be specified using the ``shots`` keyword argument of the
     device.
 
@@ -191,51 +191,44 @@ def circuit(param):
 
 ##############################################################################
 # We will execute a few optimizations in this demo, so let's prepare a convenience
-# function that runs an optimizer instance and records the cost values and the
-# number of device executions along the way. The latter will be an interesting
-# quantity to evaluate the optimization cost on hardware!
+# function that runs an optimizer instance and records the cost values
+# along the way. Together with the number of executed circuits, this will be an
+# interesting quantity to evaluate the optimization cost on hardware!
 
 
-def run_optimizer(optimizer, cost_function, init_param, num_steps, interval):
+def run_optimizer(opt, cost_function, init_param, num_steps, interval, execs_per_step):
     # Copy the initial parameters to make sure they are never overwritten
     param = init_param.copy()
 
     # Obtain the device used in the cost function
     dev = cost_function.device
 
-    # Initialize the memory for cost values and device executions during the optimization
+    # Initialize the memory for cost values during the optimization
     cost_history = []
-    exec_history = [0]
     # Monitor the initial cost value
     cost_history.append(cost_function(param))
-    execs_per_cost_eval = dev.num_executions
+    exec_history = [0]
 
-    print(
-        f"\nRunning the {optimizer.__class__.__name__} optimizer for {num_steps} iterations."
-    )
+    print(f"\nRunning the {opt.__class__.__name__} optimizer for {num_steps} iterations.")
     for step in range(num_steps):
-        # Perform an update step
-        param = optimizer.step(cost_function, param)
-
-        # Monitor the device executions, deducting the executions for cost monitoring
-        exec_history.append(dev.num_executions - (step + 1) * execs_per_cost_eval)
-
-        # Monitor the cost value
-        cost_history.append(cost_function(param))
-
         # Print out the status of the optimization
         if step % interval == 0:
             print(
-                f"Iteration = {step:3d}, "
-                f"Device executions = {exec_history[step]:4d}, "
+                f"Step {step:3d}: Circuit executions: {exec_history[step]:4d}, "
                 f"Cost = {cost_history[step]}"
             )
 
+        # Perform an update step
+        param = opt.step(cost_function, param)
+
+        # Monitor the cost value
+        cost_history.append(cost_function(param))
+        exec_history.append((step + 1) * execs_per_step)
+
     print(
-        f"Iteration = {num_steps:3d}, Device executions = {exec_history[-1]:4d}, "
+        f"Step {num_steps:3d}: Circuit executions: {exec_history[-1]:4d}, "
         f"Cost = {cost_history[-1]}"
     )
-
     return cost_history, exec_history
 
 
@@ -269,32 +262,30 @@ def run_optimizer(optimizer, cost_function, init_param, num_steps, interval):
 
 num_steps_spsa = 200
 opt = qml.SPSAOptimizer(maxiter=num_steps_spsa, c=0.15, a=0.2)
+# We spend 2 circuit evaluations per step:
+execs_per_step = 2
 cost_history_spsa, exec_history_spsa = run_optimizer(
-    opt, cost_function, init_param, num_steps_spsa, 20
+    opt, cost_function, init_param, num_steps_spsa, 20, execs_per_step
 )
 
 ##############################################################################
 # Now let's perform the same optimization using gradient descent. We set the
 # step size according to a favourable value found after grid search for fast
-# convergence. Note that we also create a new device in order to reset the
-# execution count to 0. With the new device, we recreate the cost function as well.
-
-# Create a new device and a qnode as cost function
-device = qml.device("qiskit.aer", wires=num_wires, shots=1000)
-cost_function = qml.QNode(circuit, device)
+# convergence.
 
 num_steps_grad = 15
 opt = qml.GradientDescentOptimizer(stepsize=0.3)
+# We spend 2 circuit evaluations per parameter per step:
+execs_per_step = 2 * np.prod(param_shape)
 cost_history_grad, exec_history_grad = run_optimizer(
-    opt, cost_function, init_param, num_steps_grad, 3
+    opt, cost_function, init_param, num_steps_grad, 3, execs_per_step
 )
 
 ##############################################################################
 # SPSA and gradient descent comparison
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# At this point, nothing else remains but to check which of these approaches did
-# better!
+# At this point, nothing else remains but to check which of these approaches did better!
 import matplotlib.pyplot as plt
 
 plt.figure(figsize=(10, 6))
@@ -302,7 +293,7 @@ def run_optimizer(optimizer, cost_function, init_param, num_steps, interval):
 plt.plot(exec_history_grad, cost_history_grad, label="Gradient descent")
 plt.plot(exec_history_spsa, cost_history_spsa, label="SPSA")
 
-plt.xlabel("Device executions", fontsize=14)
+plt.xlabel("Circuit executions", fontsize=14)
 plt.ylabel("Cost function value", fontsize=14)
 plt.grid()
 
@@ -316,15 +307,15 @@ def run_optimizer(optimizer, cost_function, init_param, num_steps, interval):
 # compared to gradient descent!
 #
 # Let's take a deeper dive to see how much better it actually is by computing
-# the ratio of required device executions to reach an absolute accuracy of 0.01.
+# the ratio of required circuit executions to reach an absolute accuracy of 0.01.
 #
 grad_execs_to_prec = exec_history_grad[np.where(np.array(cost_history_grad) < -0.99)[0][0]]
 spsa_execs_to_prec = exec_history_spsa[np.where(np.array(cost_history_spsa) < -0.99)[0][0]]
-print(f"Device execution ratio: {np.round(grad_execs_to_prec/spsa_execs_to_prec, 3)}.")
+print(f"Circuit execution ratio: {np.round(grad_execs_to_prec/spsa_execs_to_prec, 3)}.")
 
 ##############################################################################
 # This means that SPSA found the minimum up to an absolute accuracy of 0.01 while
-# using ten times fewer device executions than gradient descent! That's a huge
+# using multiple times fewer circuit executions than gradient descent! That's an important
 # saving, especially when running the algorithm on actual quantum hardware.
 #
 # SPSA and the variational quantum eigensolver
@@ -398,14 +389,18 @@ def circuit(param):
 # This random seed was used in the original VQE demo and is known to allow the
 # gradient descent algorithm to converge to the global minimum.
 np.random.seed(0)
-init_param = np.random.normal(0, np.pi, (2, num_qubits, 3), requires_grad=True)
+param_shape = (2, num_qubits, 3)
+init_param = np.random.normal(0, np.pi, param_shape, requires_grad=True)
 
 # Initialize the optimizer - optimal step size was found through a grid search
 opt = qml.GradientDescentOptimizer(stepsize=2.2)
 
+# We spend 2 * 15 circuit evaluations per parameter per step, as there are
+# 15 Hamiltonian terms
+execs_per_step = 2 * 15 * np.prod(param_shape)
 # Run the optimization
 cost_history_grad, exec_history_grad = run_optimizer(
-    opt, cost_function, init_param, num_steps_grad, 3
+    opt, cost_function, init_param, num_steps_grad, 3, execs_per_step
 )
 
 final_energy = cost_history_grad[-1]
@@ -426,11 +421,11 @@ def circuit(param):
 
 plt.xticks(fontsize=13)
 plt.yticks(fontsize=13)
-plt.xlabel("Device executions", fontsize=14)
+plt.xlabel("Circuit executions", fontsize=14)
 plt.ylabel("Energy (Ha)", fontsize=14)
 plt.grid()
 
-plt.axhline(y=true_energy, color="black", linestyle="dashed", label="True energy")
+plt.axhline(y=true_energy, color="black", linestyle="--", label="True energy")
 
 plt.legend(fontsize=14)
 
@@ -448,23 +443,19 @@ def circuit(param):
 # ^^^^^^^^^^^^^
 #
 # Now let's perform the same experiment using SPSA for the VQE optimization.
-# SPSA should use only 2 device executions per term in the expectation value.
+# SPSA should use only 2 circuit executions per term in the expectation value.
 # Since there are 15 terms and we choose 160 iterations with two evaluations for
 # each gradient estimate, we expect 4800 total device
-# executions. Again we create a new device and cost function in order to reset
-# the number of executions.
-
-noisy_device = qml.device(
-    "qiskit.aer", wires=num_qubits, shots=1000, noise_model=noise_model
-)
-cost_function = qml.QNode(circuit, noisy_device)
+# executions.
 
 num_steps_spsa = 160
 opt = qml.SPSAOptimizer(maxiter=num_steps_spsa, c=0.3, a=1.5)
 
-# Run the SPSA algorithm
+# We spend 2 * 15 circuit evaluations per step, as there are 15 Hamiltonian terms
+execs_per_step = 2 * 15
+# Run the optimization
 cost_history_spsa, exec_history_spsa = run_optimizer(
-    opt, cost_function, init_param, num_steps_spsa, 20
+    opt, cost_function, init_param, num_steps_spsa, 20, execs_per_step
 )
 final_energy = cost_history_spsa[-1]
 
@@ -476,15 +467,16 @@ def circuit(param):
 ##############################################################################
 # The SPSA optimization seems to have found a similar energy value.
 # We again take a look at how the optimization curves compare, in particular
-# with respect to the device executions spent on the task.
+# with respect to the circuit executions spent on the task.
 
 plt.figure(figsize=(10, 6))
 
 plt.plot(exec_history_grad, cost_history_grad, label="Gradient descent")
 plt.plot(exec_history_spsa, cost_history_spsa, label="SPSA")
+plt.axhline(y=true_energy, color="black", linestyle="--", label="True energy")
 
 plt.title("$H_2$ energy from VQE using gradient descent vs. SPSA", fontsize=16)
-plt.xlabel("Device executions", fontsize=14)
+plt.xlabel("Circuit executions", fontsize=14)
 plt.ylabel("Energy (Ha)", fontsize=14)
 plt.grid()
 
@@ -504,7 +496,7 @@ def circuit(param):
 # ----------
 #
 # SPSA is a useful optimization technique that may be particularly beneficial on
-# near-term quantum hardware. It uses significantly fewer device executions to achieve
+# near-term quantum hardware. It uses significantly fewer circuit executions to achieve
 # comparable results as gradient-based methods, giving it the potential
 # to save time and resources. It can be a good alternative to
 # gradient-based methods when the optimization problem involves executing