Merge pull request #218 from pybop-team/210-add-likelihood-classes

Adds Base Likelihoods, Maximum Likelihood Example
pybop-team · Mar 8, 2024 · da016b4 · da016b4
2 parents 1e6b07d + 13309fb
commit da016b4
Show file tree

Hide file tree

Showing 14 changed files with 491 additions and 38 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Features
 
+- [#218](https://github.com/pybop-team/PyBOP/pull/218) - Adds likelihood base class, `GaussianLogLikelihoodKnownSigma`, `GaussianLogLikelihood`, and `ProbabilityBased` cost function. As well as addition of a maximum likelihood estimation (MLE) example.
 - [#185](https://github.com/pybop-team/PyBOP/pull/185) - Adds a pull request template, additional nox sessions `quick` for standard tests + docs, `pre-commit` for pre-commit, `test` to run all standard tests, `doctest` for docs.
 - [#215](https://github.com/pybop-team/PyBOP/pull/215) - Adds `release_workflow.md` and updates `release_action.yaml`
 - [#204](https://github.com/pybop-team/PyBOP/pull/204) - Splits integration, unit, examples, plots tests, update workflows. Adds pytest `--examples`, `--integration`, `--plots` args. Adds tests for coverage after removal of examples. Adds examples and integrations nox sessions. Adds `pybop.RMSE._evaluateS1()` method

diff --git a/examples/scripts/spm_IRPropMin.py b/examples/scripts/spm_IRPropMin.py
@@ -17,6 +17,7 @@
     ),
 ]
 
+# Generate data
 sigma = 0.001
 t_eval = np.arange(0, 900, 2)
 values = model.predict(t_eval=t_eval)

diff --git a/examples/scripts/spm_MLE.py b/examples/scripts/spm_MLE.py
@@ -0,0 +1,70 @@
+import pybop
+import numpy as np
+
+# Define model
+parameter_set = pybop.ParameterSet.pybamm("Chen2020")
+model = pybop.lithium_ion.SPM(parameter_set=parameter_set)
+
+# Fitting parameters
+parameters = [
+    pybop.Parameter(
+        "Negative electrode active material volume fraction",
+        prior=pybop.Gaussian(0.6, 0.05),
+        bounds=[0.5, 0.8],
+    ),
+    pybop.Parameter(
+        "Positive electrode active material volume fraction",
+        prior=pybop.Gaussian(0.48, 0.05),
+        bounds=[0.4, 0.7],
+    ),
+]
+
+# Set initial parameter values
+parameter_set.update(
+    {
+        "Negative electrode active material volume fraction": 0.63,
+        "Positive electrode active material volume fraction": 0.51,
+    }
+)
+# Generate data
+sigma = 0.005
+t_eval = np.arange(0, 900, 2)
+values = model.predict(t_eval=t_eval)
+corrupt_values = values["Voltage [V]"].data + np.random.normal(0, sigma, len(t_eval))
+
+# Form dataset
+dataset = pybop.Dataset(
+    {
+        "Time [s]": t_eval,
+        "Current function [A]": values["Current [A]"].data,
+        "Voltage [V]": corrupt_values,
+    }
+)
+
+# Generate problem, cost function, and optimisation class
+problem = pybop.FittingProblem(model, parameters, dataset)
+likelihood = pybop.GaussianLogLikelihoodKnownSigma(problem, sigma=[0.03, 0.03])
+optim = pybop.Optimisation(likelihood, optimiser=pybop.CMAES)
+optim.set_max_unchanged_iterations(20)
+optim.set_min_iterations(20)
+optim.set_max_iterations(100)
+
+# Run the optimisation
+x, final_cost = optim.run()
+print("Estimated parameters:", x)
+
+# Plot the timeseries output
+pybop.quick_plot(x[0:2], likelihood, title="Optimised Comparison")
+
+# Plot convergence
+pybop.plot_convergence(optim)
+
+# Plot the parameter traces
+pybop.plot_parameters(optim)
+
+# Plot the cost landscape
+pybop.plot_cost2d(likelihood, steps=15)
+
+# Plot the cost landscape with optimisation path and updated bounds
+bounds = np.array([[0.55, 0.77], [0.48, 0.68]])
+pybop.plot_cost2d(likelihood, optim=optim, bounds=bounds, steps=15)
diff --git a/examples/standalone/cost.py b/examples/standalone/cost.py
@@ -18,7 +18,7 @@ class StandaloneCost(pybop.BaseCost):
         BaseCost interface.
     x0 : array-like
         The initial guess for the optimization problem, set to [4.2].
-    n_parameters : int
+    _n_parameters : int
         The number of parameters in the model, which is 1 in this case.
     bounds : dict
         A dictionary containing the lower and upper bounds for the parameter,
@@ -40,7 +40,7 @@ def __init__(self, problem=None):
         super().__init__(problem)
 
         self.x0 = np.array([4.2])
-        self.n_parameters = len(self.x0)
+        self._n_parameters = len(self.x0)
 
         self.bounds = dict(
             lower=[-1],

diff --git a/pybop/__init__.py b/pybop/__init__.py
@@ -23,6 +23,11 @@
 # Absolute path to the pybop repo
 script_path = path.dirname(__file__)
 
+#
+# Problem class
+#
+from ._problem import BaseProblem, FittingProblem, DesignProblem
+
 #
 # Cost function class
 #
@@ -37,6 +42,11 @@
     GravimetricEnergyDensity,
     VolumetricEnergyDensity,
 )
+from .costs._likelihoods import (
+    BaseLikelihood,
+    GaussianLogLikelihood,
+    GaussianLogLikelihoodKnownSigma,
+)
 
 #
 # Dataset class
@@ -84,10 +94,6 @@
 from .parameters.parameter_set import ParameterSet
 from .parameters.priors import Gaussian, Uniform, Exponential
 
-#
-# Problem class
-#
-from ._problem import FittingProblem, DesignProblem
 
 #
 # Observer classes

diff --git a/pybop/_optimisation.py b/pybop/_optimisation.py
@@ -29,7 +29,7 @@ class Optimisation:
         Initial parameter values for the optimization.
     bounds : dict
         Dictionary containing the parameter bounds with keys 'lower' and 'upper'.
-    n_parameters : int
+    _n_parameters : int
         Number of parameters in the optimization problem.
     sigma0 : float or sequence
         Initial step size or standard deviation for the optimiser.
@@ -40,19 +40,20 @@ class Optimisation:
     def __init__(
         self,
         cost,
+        x0=None,
         optimiser=None,
         sigma0=None,
         verbose=False,
         physical_viability=True,
         allow_infeasible_solutions=True,
     ):
         self.cost = cost
+        self.x0 = x0 or cost.x0
         self.optimiser = optimiser
         self.verbose = verbose
-        self.x0 = cost.x0
         self.bounds = cost.bounds
         self.sigma0 = sigma0 or cost.sigma0
-        self.n_parameters = cost.n_parameters
+        self._n_parameters = cost._n_parameters
         self.physical_viability = physical_viability
         self.allow_infeasible_solutions = allow_infeasible_solutions
         self.log = []
@@ -74,12 +75,10 @@ def __init__(
         self._transformation = None
 
         # Check if minimising or maximising
-        self._minimising = not isinstance(cost, pints.LogPDF)
-        if self._minimising:
-            self._function = self.cost
-        else:
-            self._function = pints.ProbabilityBasedError(cost)
-        del cost
+        if isinstance(cost, pybop.BaseLikelihood):
+            self.cost._minimising = False
+        self._minimising = self.cost._minimising
+        self._function = self.cost
 
         # Construct Optimiser
         self.pints = True
@@ -122,6 +121,10 @@ def __init__(
         self._max_iterations = None
         self.set_max_iterations()
 
+        # Minimum iterations
+        self._min_iterations = None
+        self.set_min_iterations()
+
         # Maximum unchanged iterations
         self._unchanged_threshold = 1  # smallest significant f change
         self._unchanged_max_iterations = None
@@ -289,6 +292,7 @@ def _run_pints(self):
                 halt = (
                     self._unchanged_max_iterations is not None
                     and unchanged_iterations >= self._unchanged_max_iterations
+                    and iteration >= self._min_iterations
                 )
                 if running and halt:
                     running = False
@@ -448,6 +452,22 @@ def set_max_iterations(self, iterations=1000):
                 raise ValueError("Maximum number of iterations cannot be negative.")
         self._max_iterations = iterations
 
+    def set_min_iterations(self, iterations=2):
+        """
+        Set the minimum number of iterations as a stopping criterion.
+
+        Parameters
+        ----------
+        iterations : int, optional
+            The minimum number of iterations to run (default is 100).
+            Set to `None` to remove this stopping criterion.
+        """
+        if iterations is not None:
+            iterations = int(iterations)
+            if iterations < 0:
+                raise ValueError("Minimum number of iterations cannot be negative.")
+        self._min_iterations = iterations
+
     def set_max_unchanged_iterations(self, iterations=5, threshold=1e-5):
         """
         Set the maximum number of iterations without significant change as a stopping criterion.

diff --git a/pybop/costs/_likelihoods.py b/pybop/costs/_likelihoods.py
@@ -0,0 +1,164 @@
+import numpy as np
+from pybop.costs.base_cost import BaseCost
+
+
+class BaseLikelihood(BaseCost):
+    """
+    Base class for likelihoods
+    """
+
+    def __init__(self, problem, sigma=None):
+        super(BaseLikelihood, self).__init__(problem, sigma)
+        self._n_times = problem.n_time_data
+
+    def set_sigma(self, sigma):
+        """
+        Setter for sigma parameter
+        """
+
+        if sigma is not type(np.array([])):
+            try:
+                sigma = np.array(sigma)
+            except Exception:
+                raise ValueError("Sigma must be a numpy array")
+
+        if np.any(sigma <= 0):
+            raise ValueError("Sigma must not be negative")
+        else:
+            self.sigma0 = sigma
+
+    def get_sigma(self):
+        """
+        Getter for sigma parameter
+        """
+        return self.sigma0
+
+    def get_n_parameters(self):
+        """
+        Returns the number of parameters
+        """
+        return self._n_parameters
+
+
+class GaussianLogLikelihoodKnownSigma(BaseLikelihood):
+    """
+    This class represents a Gaussian Log Likelihood with a known sigma,
+    which assumes that the data follows a Gaussian distribution and computes
+    the log-likelihood of observed data under this assumption.
+
+    Attributes:
+        _logpi (float): Precomputed offset value for the log-likelihood function.
+    """
+
+    def __init__(self, problem, sigma):
+        super(GaussianLogLikelihoodKnownSigma, self).__init__(problem, sigma)
+        if sigma is not None:
+            self.set_sigma(sigma)
+        self._offset = -0.5 * self._n_times * np.log(2 * np.pi / self.sigma0)
+        self._multip = -1 / (2.0 * self.sigma0**2)
+        self.sigma2 = self.sigma0**-2
+        self._dl = np.ones(self._n_parameters)
+
+    def _evaluate(self, x, grad=None):
+        """
+        Calls the problem.evaluate method and calculates
+        the log-likelihood
+        """
+        e = self._target - self.problem.evaluate(x)
+        return np.sum(self._offset + self._multip * np.sum(e**2, axis=0))
+
+    def _evaluateS1(self, x, grad=None):
+        """
+        Calls the problem.evaluateS1 method and calculates
+        the log-likelihood
+        """
+
+        y, dy = self.problem.evaluateS1(x)
+        if len(y) < len(self._target):
+            likelihood = -np.float64(np.inf)
+            dl = self._dl * np.ones(self._n_parameters)
+        else:
+            dy = dy.reshape(
+                (
+                    self._n_times,
+                    self.n_outputs,
+                    self._n_parameters,
+                )
+            )
+            e = self._target - y
+            likelihood = np.sum(self._offset + self._multip * np.sum(e**2, axis=0))
+            dl = np.sum((self.sigma2 * np.sum((e.T * dy.T), axis=2)), axis=1)
+
+        return likelihood, dl
+
+
+class GaussianLogLikelihood(BaseLikelihood):
+    """
+    This class represents a Gaussian Log Likelihood, which assumes that the
+    data follows a Gaussian distribution and computes the log-likelihood of
+    observed data under this assumption.
+
+    Attributes:
+        _logpi (float): Precomputed offset value for the log-likelihood function.
+    """
+
+    def __init__(self, problem):
+        super(GaussianLogLikelihood, self).__init__(problem)
+        self._logpi = -0.5 * self._n_times * np.log(2 * np.pi)
+        self._dl = np.ones(self._n_parameters + self.n_outputs)
+
+    def _evaluate(self, x, grad=None):
+        """
+        Evaluates the Gaussian log-likelihood for the given parameters.
+
+        Args:
+            x (array_like): The parameters for which to evaluate the log-likelihood.
+                             The last `self.n_outputs` elements are assumed to be the
+                             standard deviations of the Gaussian distributions.
+
+        Returns:
+            float: The log-likelihood value, or -inf if the standard deviations are received as non-positive.
+        """
+        sigma = np.asarray(x[-self.n_outputs :])
+
+        if np.any(sigma <= 0):
+            return -np.inf
+
+        e = self._target - self.problem.evaluate(x[: -self.n_outputs])
+        return np.sum(
+            self._logpi
+            - self._n_times * np.log(sigma)
+            - np.sum(e**2, axis=0) / (2.0 * sigma**2)
+        )
+
+    def _evaluateS1(self, x, grad=None):
+        """
+        Calls the problem.evaluateS1 method and calculates
+        the log-likelihood
+        """
+        sigma = np.asarray(x[-self.n_outputs :])
+
+        if np.any(sigma <= 0):
+            return -np.inf, self._dl
+
+        y, dy = self.problem.evaluateS1(x[: -self.n_outputs])
+        if len(y) < len(self._target):
+            likelihood = -np.float64(np.inf)
+            dl = self._dl
+        else:
+            dy = dy.reshape(
+                (
+                    self._n_times,
+                    self.n_outputs,
+                    self._n_parameters,
+                )
+            )
+            e = self._target - y
+            likelihood = self._evaluate(x)
+            dl = np.sum((sigma**-(2.0) * np.sum((e.T * dy.T), axis=2)), axis=1)
+
+            # Add sigma gradient to dl
+            dsigma = -self._n_times / sigma + sigma**-(3.0) * np.sum(e**2, axis=0)
+            dl = np.concatenate((dl, dsigma))
+
+        return likelihood, dl