Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/likelihood check #987

Merged
merged 22 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions autofit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .graphical.declarative.factor.hierarchical import HierarchicalFactor
from .graphical.laplace import LaplaceOptimiser
from .non_linear.grid.grid_list import GridList
from .non_linear.samples.summary import SamplesSummary
from .non_linear.samples import SamplesMCMC
from .non_linear.samples import SamplesNest
from .non_linear.samples import Samples
Expand Down Expand Up @@ -82,6 +83,7 @@
from .non_linear.search.optimize.lbfgs.search import LBFGS
from .non_linear.search.optimize.pyswarms.search.globe import PySwarmsGlobal
from .non_linear.search.optimize.pyswarms.search.local import PySwarmsLocal
from .non_linear.paths.abstract import AbstractPaths
from .non_linear.paths import DirectoryPaths
from .non_linear.paths import DatabasePaths
from .non_linear.result import Result
Expand Down
1 change: 1 addition & 0 deletions autofit/config/general.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ profiling:
should_profile: false # If True, the ``profile_log_likelihood_function()`` function of an analysis class is called throughout a model-fit, profiling run times.
repeats: 1 # The number of repeat function calls used to measure run-times when profiling.
test:
check_likelihood_function: true # if True, when a search is resumed the likelihood of a previous sample is recalculated to ensure it is consistent with the previous run.
exception_override: false
lh_timeout_seconds: # If a float is input, the log_likelihood_function call is timed out after this many seconds, to diagnose infinite loops. Default is None, meaning no timeout.
parallel_profile: false
5 changes: 4 additions & 1 deletion autofit/graphical/declarative/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ def __init__(
results
Results from hierarchical factor optimisations
"""
super().__init__()
super().__init__(
samples_summary=None,
paths=None
)
self.results = results

@property
Expand Down
1 change: 1 addition & 0 deletions autofit/messages/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ def transform(self, p):

def inv_transform(self, x):
expx = np.exp(x)
print(expx)
return expx / (expx.sum(axis=self.axis, keepdims=True) + 1)

def jacobian(self, p):
Expand Down
1 change: 1 addition & 0 deletions autofit/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from autofit.non_linear.mock.mock_result import MockResultGrid
from autofit.non_linear.mock.mock_search import MockSearch
from autofit.non_linear.mock.mock_search import MockOptimizer
from autofit.non_linear.mock.mock_samples_summary import MockSamplesSummary
from autofit.non_linear.mock.mock_samples import MockSamples
from autofit.non_linear.mock.mock_samples import MockSamplesNest

Expand Down
14 changes: 12 additions & 2 deletions autofit/non_linear/analysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from autofit.non_linear.paths.abstract import AbstractPaths
from autofit.non_linear.paths.database import DatabasePaths
from autofit.non_linear.paths.null import NullPaths
from autofit.non_linear.samples.summary import SamplesSummary
from autofit.non_linear.samples.pdf import SamplesPDF
from autofit.non_linear.result import Result

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -171,11 +173,19 @@ def modify_after_fit(
"""
return self

def make_result(self, samples, search_internal=None):
def make_result(
self,
samples_summary: SamplesSummary,
paths: AbstractPaths,
samples: Optional[SamplesPDF] = None,
search_internal: Optional[object] = None,
) -> Result:
return Result(
samples_summary=samples_summary,
paths=paths,
samples=samples,
latent_variables=self.latent_variables,
search_internal=search_internal,
latent_variables=self.latent_variables,
)

def profile_log_likelihood_function(self, paths: AbstractPaths, instance):
Expand Down
107 changes: 89 additions & 18 deletions autofit/non_linear/fitness.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import numpy as np
import os
from typing import Optional

from autoconf import conf

from autofit import exc

from autofit.mapper.prior_model.abstract import AbstractPriorModel
from autofit.non_linear.paths.abstract import AbstractPaths
from autofit.non_linear.analysis import Analysis

from timeout_decorator import timeout

from autofit import jax_wrapper
Expand All @@ -23,41 +29,42 @@ def get_timeout_seconds():
class Fitness:
def __init__(
self,
model,
analysis,
model : AbstractPriorModel,
analysis : Analysis,
paths : Optional[AbstractPaths] = None,
fom_is_log_likelihood: bool = True,
resample_figure_of_merit: float = -np.inf,
convert_to_chi_squared: bool = False,
):
"""
Interfaces with any non-linear in order to fit a model to the data and return a log likelihood via
an `Analysis` class.
Interfaces with any non-linear search to fit the model to the data and return a log likelihood via
the analysis.

The interface of a non-linear search and a fitness function can be summarized as follows:
The interface of a non-linear search and fitness function is summarized as follows:

1) The non-linear search chooses a new set of parameters for the model, which are passed to the fitness
1) The non-linear search samples a new set of model parameters, which are passed to the fitness
function's `__call__` method.

2) The parameter values (typically a list) are mapped to an instance of the model (via its priors if
appropriate for the non-linear search).
2) The list of parameter values are mapped to an instance of the model.

3) The instance is passed to the analysis class's log likelihood function, which fits the model to the
data and returns the log likelihood.

4) A final figure-of-merit is computed and returned to the non-linear search, which is either the log
likelihood or log posterior depending on the type of non-linear search.
likelihood or log posterior (e.g. adding the log prior to the log likelihood).

Certain searches (commonly nested samplers) require the parameters to be mapped from unit values to physical
values, which is performed internally by the fitness object in step 2.

It is common for nested sampling algorithms to require that the figure of merit returned is a log likelihood
as priors are often built into the mapping of values from a unit hyper-cube. Optimizers and MCMC methods
typically require that the figure of merit returned is a log posterior, with the prior terms added via this
fitness function. This is not a strict rule, but is a good default.
Certain searches require the returned figure of merit to be a log posterior (often MCMC methods) whereas
others require it to be a log likelihood (often nested samples which account for priors internally) in step 4.
Which values is returned by the `fom_is_log_likelihood` bool.

Some methods also require a chi-squared value to be computed (which is minimized), which is the log likelihood
multiplied by -2.0. The `Fitness` class can also compute this value, if the `convert_to_chi_squared` bool is
`True`.
Some searches require a chi-squared value (which they minimized), given by the log likelihood multiplied
by -2.0. This is returned by the fitness if the `convert_to_chi_squared` bool is `True`.

If a model-fit raises an exception of returns a `np.nan` a `resample_figure_of_merit` value is returned. The
appropriate value depends on the non-linear search, but is typically either `None`, `-np.inf` or `1.0e99`.
If a model-fit raises an exception or returns a `np.nan`, a `resample_figure_of_merit` value is returned
instead. The appropriate value depends on the search, but is typically either `None`, `-np.inf` or `1.0e99`.
All values indicate to the non-linear search that the model-fit should be resampled or ignored.

Parameters
Expand All @@ -68,6 +75,9 @@ def __init__(
model
The model that is fitted to the data, which is used by the non-linear search to create instances of
the model that are fitted to the data via the log likelihood function.
paths
The paths of the search, which if the search is being resumed from an old run is used to check that
the likelihood function has not changed from the previous run.
fom_is_log_likelihood
If `True`, the figure of merit returned by the fitness function is the log likelihood. If `False`, the
figure of merit is the log posterior.
Expand All @@ -80,11 +90,15 @@ def __init__(

self.analysis = analysis
self.model = model
self.paths = paths
self.fom_is_log_likelihood = fom_is_log_likelihood
self.resample_figure_of_merit = resample_figure_of_merit
self.convert_to_chi_squared = convert_to_chi_squared
self._log_likelihood_function = None

if self.paths is not None:
self.check_log_likelihood(fitness=self)

def __getstate__(self):
state = self.__dict__.copy()
del state["_log_likelihood_function"]
Expand Down Expand Up @@ -144,3 +158,60 @@ def __call__(self, parameters, *kwargs):
figure_of_merit *= -2.0

return figure_of_merit

def check_log_likelihood(self, fitness):
"""
Changes to the PyAutoGalaxy source code may inadvertantly change the numerics of how a log likelihood is
computed. Equally, one may set off a model-fit that resumes from previous results, but change the settings of
the pixelization or inversion in a way that changes the log likelihood function.

This function performs an optional sanity check, which raises an exception if the log likelihood calculation
changes, to ensure a model-fit is not resumed with a different likelihood calculation to the previous run.

If the model-fit has not been performed before (e.g. it is not a resume) this function outputs
the `figure_of_merit` (e.g. the log likelihood) of the maximum log likelihood model at the end of the model-fit.

If the model-fit is a resume, it loads this `figure_of_merit` and compares it against a new value computed for
the resumed run (again using the maximum log likelihood model inferred). If the two likelihoods do not agree
and therefore the log likelihood function has changed, an exception is raised and the code execution terminated.

Parameters
----------
paths
certain searches the non-linear search outputs are stored,
visualization, and pickled objects used by the database and aggregator.
result
The result containing the maximum log likelihood fit of the model.
"""

if os.environ.get("PYAUTOFIT_TEST_MODE") == "1":
return

if not conf.instance["general"]["test"]["check_likelihood_function"]:
return

try:
samples_summary = self.paths.load_samples_summary()
except FileNotFoundError:
return

max_log_likelihood_sample = samples_summary.max_log_likelihood_sample
log_likelihood_old = samples_summary.max_log_likelihood_sample.log_likelihood

parameters = max_log_likelihood_sample.parameter_lists_for_model(model=self.model)

log_likelihood_new = fitness(parameters=parameters)

if not np.isclose(log_likelihood_old, log_likelihood_new):
raise exc.SearchException(
f"""
Figure of merit sanity check failed.

This means that the existing results of a model fit used a different
likelihood function compared to the one implemented now.
Old Figure of Merit = {log_likelihood_old}
New Figure of Merit = {log_likelihood_new}
"""
)


30 changes: 24 additions & 6 deletions autofit/non_linear/mock/mock_result.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,54 @@
from typing import Union

from autofit.mapper.model import ModelInstance
from autofit.mapper.model_mapper import ModelMapper
from autofit.non_linear.result import Result

from autofit.non_linear.mock.mock_samples_summary import MockSamplesSummary
from autofit.non_linear.mock.mock_samples import MockSamples


class MockResult(Result):
def __init__(
self,
samples_summary : MockSamplesSummary = None,
paths=None,
samples=None,
instance=None,
analysis=None,
search=None,
model=None,
):
super().__init__(samples, search_internal=None)

super().__init__(
samples_summary=samples_summary,
paths=paths,
samples=samples,
search_internal=None
)

self._instance = instance or ModelInstance()
self._samples = samples or MockSamples(
max_log_likelihood_instance=self.instance, model=model or ModelMapper()
# max_log_likelihood_instance=self.instance,
model=model or ModelMapper()
)

self.prior_means = None
self.analysis = analysis
self.search = search
self.model = model

def model_absolute(self, absolute):
return self.model
def model_absolute(self, a):
try:
return self.samples_summary.model_absolute(a)
except AttributeError:
return self.model

def model_relative(self, relative):
return self.model
def model_relative(self, r):
try:
return self.samples_summary.model_relative(r)
except AttributeError:
return self.model

@property
def last(self):
Expand Down
28 changes: 1 addition & 27 deletions autofit/non_linear/mock/mock_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def __init__(
model=None,
sample_list=None,
samples_info=None,
max_log_likelihood_instance=None,
log_likelihood_list=None,
prior_means=None,
**kwargs,
Expand All @@ -34,19 +33,11 @@ def __init__(
**kwargs,
)

self._max_log_likelihood_instance = max_log_likelihood_instance
self._prior_means = prior_means

@property
def default_sample_list(self):
if self._log_likelihood_list is not None:
log_likelihood_list = self._log_likelihood_list
else:
log_likelihood_list = range(3)

return [
Sample(log_likelihood=log_likelihood, log_prior=0.0, weight=0.0)
for log_likelihood in log_likelihood_list
for log_likelihood in range(3)
]

@property
Expand All @@ -56,23 +47,6 @@ def log_likelihood_list(self):

return self._log_likelihood_list

def max_log_likelihood(self, as_instance: bool = True):
if self._max_log_likelihood_instance is None:
try:
return super().max_log_likelihood(as_instance=as_instance)
except (KeyError, AttributeError):
pass

if as_instance:
return self._max_log_likelihood_instance
return list(self.sample_list[0].kwargs.values())

@property
def prior_means(self):
if self._prior_means is None:
return super().prior_means

return self._prior_means

@property
def unconverged_sample_size(self):
Expand Down
Loading
Loading