Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: cache yield_stdev using spec and interpcodes of model #322

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 46 additions & 2 deletions src/cabinetry/model_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Provides utilities for pyhf models."""

import json
import logging
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union

Expand Down Expand Up @@ -155,6 +156,38 @@ def prefit_uncertainties(model: pyhf.pdf.Model) -> np.ndarray:
return np.asarray(pre_fit_unc)


def _hashable_model_key(
model: pyhf.pdf.Model,
) -> Tuple[str, Tuple[Tuple[str, str], ...]]:
"""Compute a hashable representation of the values that uniquely identify a Model.

The `pyhf.model.Model` type is already hashable,
but it uses the `__hash__` inherited from `object`,
so a copy of a model has a distinct hash.
The key returned by this function instead will hash to the same value for copies,
but differ when the model represents a different likelihood.

Note: The key returned here considers only the spec and interpolation codes.
All other `Model` configuration options leave it unchanged
(e.g. `poi_name`, overriding parameter bounds, etc.).

Args:
model (pyhf.model.Model): model to generate a key for.

Returns:
Tuple[str, Tuple[Tuple[str, str], ...]]: a key that identifies the model
by its spec and interpcodes
"""
interpcodes = []
for mod_type in sorted(model.config.modifier_settings.keys()):
code = model.config.modifier_settings[mod_type]["interpcode"]
interpcodes.append((mod_type, code))
# sort since different orderings result in equivalent models,
# but distinct strings
spec_str = json.dumps(model.spec, sort_keys=True)
return (spec_str, tuple(interpcodes))


def yield_stdev(
model: pyhf.pdf.Model,
parameters: np.ndarray,
Expand Down Expand Up @@ -183,7 +216,13 @@ def yield_stdev(
"""
# check whether results are already stored in cache
cached_results = _YIELD_STDEV_CACHE.get(
(model, tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes()), None
(
_hashable_model_key(model),
tuple(parameters),
tuple(uncertainty),
corr_mat.data.tobytes(),
),
None,
)
if cached_results is not None:
# return results from cache
Expand Down Expand Up @@ -284,7 +323,12 @@ def yield_stdev(
# save to cache
_YIELD_STDEV_CACHE.update(
{
(model, tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes()): (
(
_hashable_model_key(model),
tuple(parameters),
tuple(uncertainty),
corr_mat.data.tobytes(),
): (
total_stdev_per_bin,
total_stdev_per_channel,
)
Expand Down
5 changes: 4 additions & 1 deletion tests/test_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,10 @@ def test_yield_stdev(example_spec, example_spec_multibin):
assert np.allclose(total_stdev_chan[i_reg], expected_stdev_chan[i_reg])
# also look up cache directly
from_cache = model_utils._YIELD_STDEV_CACHE[
model, tuple(parameters), tuple(uncertainty), corr_mat.tobytes()
model_utils._hashable_model_key(model),
tuple(parameters),
tuple(uncertainty),
corr_mat.tobytes(),
]
for i_reg in range(2):
assert np.allclose(from_cache[0][i_reg], expected_stdev_bin[i_reg])
Expand Down