diff --git a/docs/examples/bayesian_optimisation.py b/docs/examples/bayesian_optimisation.py
index a2361814d..39feba64f 100644
--- a/docs/examples/bayesian_optimisation.py
+++ b/docs/examples/bayesian_optimisation.py
@@ -164,13 +164,21 @@
 # $$f(x) = (6x - 2)^2 \sin(12x - 4)$$
 #
 # treating $f$ as a black-box function. Moreover, we shall restrict the domain of the
-# function to $\mathbf{x} \in [0, 1]$. The global minimum of this function is located at
-# $x = 0.757$, where $f(x) = -6.021$.
+# function to $\mathbf{x} \in [0, 1]$. We shall also *standardise* the output of the function, such that
+# it has a mean of 0 and standard deviation of 1. This is quite common practice when using
+# GPs; we're using a zero mean prior, so ensuring that our data has a mean of zero aligns
+# with this, and often we have scale parameters in the covariance function, which
+# are frequently initialised, or have priors set on them, under the assumption that the
+# function being modelled has unit variance. For similar reasons, it can also be useful
+# to normalise the inputs to a GP. The global minimum of this (standardised)
+# function is located at $x = 0.757$, where $f(x) = -1.463$.
 
 
 # %%
-def forrester(x: Float[Array, "N 1"]) -> Float[Array, "N 1"]:
-    return (6 * x - 2) ** 2 * jnp.sin(12 * x - 4)
+def standardised_forrester(x: Float[Array, "N 1"]) -> Float[Array, "N 1"]:
+    mean = 0.45321
+    std = 4.4258
+    return ((6 * x - 2) ** 2 * jnp.sin(12 * x - 4) - mean) / std
 
 
 # %% [markdown]
@@ -189,7 +197,7 @@ def forrester(x: Float[Array, "N 1"]) -> Float[Array, "N 1"]:
 initial_x = tfp.mcmc.sample_halton_sequence(
     dim=1, num_results=initial_sample_num, seed=key, dtype=jnp.float64
 ).reshape(-1, 1)
-initial_y = forrester(initial_x)
+initial_y = standardised_forrester(initial_x)
 D = gpx.Dataset(X=initial_x, y=initial_y)
 
 
@@ -204,7 +212,7 @@ def return_optimised_posterior(
     data: gpx.Dataset, prior: gpx.base.Module, key: Array
 ) -> gpx.base.Module:
     likelihood = gpx.likelihoods.Gaussian(
-        num_datapoints=data.n, obs_stddev=jnp.array(1e-3)
+        num_datapoints=data.n, obs_stddev=jnp.array(1e-6)
     )  # Our function is noise-free, so we set the observation noise's standard deviation to a very small value
     likelihood = likelihood.replace_trainable(obs_stddev=False)
 
@@ -214,7 +222,7 @@ def return_optimised_posterior(
     negative_mll(posterior, train_data=data)
     negative_mll = jit(negative_mll)
 
-    opt_posterior, history = gpx.fit(
+    opt_posterior, _ = gpx.fit(
         model=posterior,
         objective=negative_mll,
         train_data=data,
@@ -301,7 +309,7 @@ def optimise_sample(
 
 
 x_star = optimise_sample(approx_sample, key, lower_bound, upper_bound, 100)
-y_star = forrester(x_star)
+y_star = standardised_forrester(x_star)
 
 
 # %% [markdown]
@@ -321,7 +329,7 @@ def plot_bayes_opt(
     queried_x: ScalarFloat,
 ) -> None:
     plt_x = jnp.linspace(0, 1, 1000).reshape(-1, 1)
-    forrester_y = forrester(plt_x)
+    forrester_y = standardised_forrester(plt_x)
     sample_y = sample(plt_x)
 
     latent_dist = posterior.predict(plt_x, train_data=dataset)
@@ -392,7 +400,7 @@ def plot_bayes_opt(
 initial_x = tfp.mcmc.sample_halton_sequence(
     dim=1, num_results=initial_sample_num, seed=key, dtype=jnp.float64
 ).reshape(-1, 1)
-initial_y = forrester(initial_x)
+initial_y = standardised_forrester(initial_x)
 D = gpx.Dataset(X=initial_x, y=initial_y)
 
 for i in range(bo_iters):
@@ -415,7 +423,7 @@ def plot_bayes_opt(
     plot_bayes_opt(opt_posterior, approx_sample, D, x_star)
 
     # Evaluate the black-box function at the best point observed so far, and add it to the dataset
-    y_star = forrester(x_star)
+    y_star = standardised_forrester(x_star)
     print(f"Queried Point: {x_star}, Black-Box Function Value: {y_star}")
     D = D + gpx.Dataset(X=x_star, y=y_star)
 
@@ -435,7 +443,7 @@ def plot_bayes_opt(
 cumulative_best_y = jax.lax.associative_scan(jax.numpy.minimum, D.y)
 ax.plot(fn_evaluations, cumulative_best_y)
 ax.axvline(x=initial_sample_num, linestyle=":")
-ax.axhline(y=-6.0207, linestyle="--", label="True Minimum")
+ax.axhline(y=-1.463, linestyle="--", label="True Minimum")
 ax.set_xlabel("Number of Black-Box Function Evaluations")
 ax.set_ylabel("Best Observed Value")
 ax.legend()
@@ -452,18 +460,23 @@ def plot_bayes_opt(
 #
 # $$f(x_1, x_2) = (4 - 2.1x_1^2 + \frac{x_1^4}{3})x_1^2 + x_1x_2 + (-4 + 4x_2^2)x_2^2$$
 #
-# We'll be evaluating it over the domain $x_1 \in [-2, 2]$ and $x_2 \in [-1, 1]$. The
-# global minima of this function are located at $\mathbf{x} = (0.0898, -0.7126)$ and $\mathbf{x} = (-0.0898, 0.7126)$, where the function takes the value $f(\mathbf{x}) = -1.0316$.
+# We'll be evaluating it over the domain $x_1 \in [-2, 2]$ and $x_2 \in [-1, 1]$, and
+# shall standardise it. The
+# global minima of this function are located at $\mathbf{x} = (0.0898, -0.7126)$ and
+# $\mathbf{x} = (-0.0898, 0.7126)$, where the standardised function takes the value $f(\mathbf{x}) =
+# -1.8377$.
 
 
 # %%
-def six_hump_camel(x: Float[Array, "N 2"]) -> Float[Array, "N 1"]:
+def standardised_six_hump_camel(x: Float[Array, "N 2"]) -> Float[Array, "N 1"]:
+    mean = 1.12767
+    std = 1.17500
     x1 = x[..., :1]
     x2 = x[..., 1:]
     term1 = (4 - 2.1 * x1**2 + x1**4 / 3) * x1**2
     term2 = x1 * x2
     term3 = (-4 + 4 * x2**2) * x2**2
-    return term1 + term2 + term3
+    return (term1 + term2 + term3 - mean) / std
 
 
 # %% [markdown]
@@ -474,7 +487,7 @@ def six_hump_camel(x: Float[Array, "N 2"]) -> Float[Array, "N 1"]:
 x2 = jnp.linspace(-1, 1, 100)
 x1, x2 = jnp.meshgrid(x1, x2)
 x = jnp.stack([x1.flatten(), x2.flatten()], axis=1)
-y = six_hump_camel(x)
+y = standardised_six_hump_camel(x)
 
 fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
 surf = ax.plot_surface(
@@ -521,7 +534,7 @@ def six_hump_camel(x: Float[Array, "N 2"]) -> Float[Array, "N 1"]:
 lower_bound = jnp.array([-2.0, -1.0])
 upper_bound = jnp.array([2.0, 1.0])
 initial_sample_num = 5
-bo_iters = 11
+bo_iters = 12
 num_experiments = 5
 bo_experiment_results = []
 
@@ -532,7 +545,7 @@ def six_hump_camel(x: Float[Array, "N 2"]) -> Float[Array, "N 1"]:
         dim=2, num_results=initial_sample_num, seed=key, dtype=jnp.float64
     )
     initial_x = jnp.array(lower_bound + (upper_bound - lower_bound) * initial_x)
-    initial_y = six_hump_camel(initial_x)
+    initial_y = standardised_six_hump_camel(initial_x)
     D = gpx.Dataset(X=initial_x, y=initial_y)
 
     for i in range(bo_iters):
@@ -559,7 +572,7 @@ def six_hump_camel(x: Float[Array, "N 2"]) -> Float[Array, "N 1"]:
         )
 
         # Evaluate the black-box function at the best point observed so far, and add it to the dataset
-        y_star = six_hump_camel(x_star)
+        y_star = standardised_six_hump_camel(x_star)
         print(
             f"BO Iteration: {i + 1}, Queried Point: {x_star}, Black-Box Function Value:"
             f" {y_star}"
@@ -587,7 +600,7 @@ def six_hump_camel(x: Float[Array, "N 2"]) -> Float[Array, "N 1"]:
         minval=lower_bound,
         maxval=upper_bound,
     )
-    final_y = six_hump_camel(final_x)
+    final_y = standardised_six_hump_camel(final_x)
     random_x = jnp.concatenate([initial_x, final_x], axis=0)
     random_y = jnp.concatenate([initial_y, final_y], axis=0)
     random_experiment_results.append(gpx.Dataset(X=random_x, y=random_y))
@@ -628,12 +641,12 @@ def obtain_log_regret_statistics(
 
 
 bo_log_regret_mean, bo_log_regret_std = obtain_log_regret_statistics(
-    bo_experiment_results, -1.031625
+    bo_experiment_results, -1.8377
 )
 (
     random_log_regret_mean,
     random_log_regret_std,
-) = obtain_log_regret_statistics(random_experiment_results, -1.031625)
+) = obtain_log_regret_statistics(random_experiment_results, -1.8377)
 
 # %% [markdown]
 # Now, when we plot the mean and standard deviation of the log regret at each iteration,
@@ -665,7 +678,7 @@ def obtain_log_regret_statistics(
 # %% [markdown]
 # It can also be useful to plot the queried points over the course of a single BO run, in
 # order to gain some insight into how the algorithm queries the search space. Below
-# we do this for the first BO experiment, and can see that the algorithm initially
+# we do this for one of the BO experiments, and can see that the algorithm initially
 # performs some exploration of the search space whilst it is uncertain about the black-box
 # function, but it then hones in one one of the global minima of the function, as we would hope!
 
@@ -684,8 +697,8 @@ def obtain_log_regret_statistics(
 )
 ax.scatter(x_star_two[0][0], x_star_two[0][1], marker="*", color=cols[2], zorder=2)
 ax.scatter(
-    bo_experiment_results[0].X[:, 0],
-    bo_experiment_results[0].X[:, 1],
+    bo_experiment_results[1].X[:, 0],
+    bo_experiment_results[1].X[:, 1],
     marker="x",
     color=cols[1],
     label="Bayesian Optimisation Queries",