Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Keep staterror from affecting multiple samples #1965

Merged
merged 10 commits into from
Aug 28, 2022
26 changes: 10 additions & 16 deletions src/pyhf/modifiers/staterror.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,12 @@ def finalize(self):
],
axis=0,
)
# here relerrs still has all the bins, while the staterror are usually per-channel
# so we need to pick out the masks for this modifier to extract the
# modifier configuration (sigmas, etc..)
# so loop over samples and extract the first mask
# while making sure any subsequent mask is consistent
relerrs = default_backend.sqrt(relerrs)

masks = {}
for modifier_data in self.builder_data[modname].values():
mask_this_sample = default_backend.astensor(
Expand All @@ -113,12 +117,14 @@ def finalize(self):
else:
assert (mask_this_sample == masks[modname]).all()

for modifier_data in self.builder_data[modname].values():
modifier_data['data']['mask'] = masks[modname]
# extract sigmas using this modifiers mask
sigmas = relerrs[masks[modname]]

# list of bools, consistent with other modifiers (no numpy.bool_)
fixed = default_backend.tolist(sigmas == 0)
# ensures non-Nan constraint term, but in a future PR we need to remove constraints for these
# FIXME: sigmas that are zero will be fixed to 1.0 arbitrarily to ensure
# non-Nan constraint term, but in a future PR need to remove constraints
# for these
sigmas[fixed] = 1.0
self.required_parsets.setdefault(parname, [required_parset(sigmas, fixed)])
return self.builder_data
Expand All @@ -145,18 +151,6 @@ def __init__(self, modifiers, pdfconfig, builder_data, batch_size=None):
[[builder_data[m][s]['data']['mask']] for s in pdfconfig.samples]
for m in keys
]
self.__staterror_uncrt = default_backend.astensor(
matthewfeickert marked this conversation as resolved.
Show resolved Hide resolved
[
[
[
builder_data[m][s]['data']['uncrt'],
builder_data[m][s]['data']['nom_data'],
]
for s in pdfconfig.samples
]
for m in keys
]
)
global_concatenated_bin_indices = [
[[j for c in pdfconfig.channels for j in range(pdfconfig.channel_nbins[c])]]
]
Expand Down
52 changes: 52 additions & 0 deletions tests/test_modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,55 @@ def test_invalid_bin_wise_modifier(datadir, patch_file):

with pytest.raises(pyhf.exceptions.InvalidModifier):
pyhf.Model(bad_spec)


def test_issue1720_staterror_builder_mask(datadir):
with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file:
spec = json.load(spec_file)

spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror"
config = pyhf.pdf._ModelConfig(spec)
builder = pyhf.modifiers.staterror.staterror_builder(config)

channel = spec["channels"][0]
sig_sample = channel["samples"][0]
bkg_sample = channel["samples"][1]
modifier = bkg_sample["modifiers"][0]

assert channel["name"] == "channel"
assert sig_sample["name"] == "signal"
assert bkg_sample["name"] == "bkg"
assert modifier["type"] == "staterror"

builder.append("staterror/NP", "channel", "bkg", modifier, bkg_sample)
collected_bkg = builder.collect(modifier, bkg_sample["data"])
assert collected_bkg == {"mask": [True], "nom_data": [1], "uncrt": [1.5]}

builder.append("staterror/NP", "channel", "signal", None, sig_sample)
collected_sig = builder.collect(None, sig_sample["data"])
assert collected_sig == {"mask": [False], "nom_data": [5], "uncrt": [0.0]}

finalized = builder.finalize()
assert finalized["staterror/NP"]["bkg"]["data"]["mask"].tolist() == [True]
assert finalized["staterror/NP"]["signal"]["data"]["mask"].tolist() == [False]


@pytest.mark.parametrize(
"inits",
[[-2.0], [-1.0], [0.0], [1.0], [2.0]],
)
def test_issue1720_greedy_staterror(datadir, inits):
"""
Test that the staterror does not affect more samples than shapesys equivalently.
"""
with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file:
spec = json.load(spec_file)

model_shapesys = pyhf.Workspace(spec).model()
expected_shapesys = model_shapesys.expected_actualdata(inits)

spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror"
model_staterror = pyhf.Workspace(spec).model()
expected_staterror = model_staterror.expected_actualdata(inits)

assert expected_staterror == expected_shapesys
49 changes: 49 additions & 0 deletions tests/test_modifiers/issue1720_greedy_staterror.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"channels": [
{
"name": "channel",
"samples": [
{
"data": [
5
],
"modifiers": [],
"name": "signal"
},
{
"data": [
1
],
"modifiers": [
{
"data": [
1.5
],
"name": "NP",
"type": "shapesys"
}
],
"name": "bkg"
}
]
}
],
"measurements": [
{
"config": {
"parameters": [],
"poi": "NP"
},
"name": ""
}
],
"observations": [
{
"data": [
0
],
"name": "channel"
}
],
"version": "1.0.0"
}
2 changes: 1 addition & 1 deletion tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def test_testpoi(tmpdir, script_runner):
command = f'pyhf xml2json validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {temp.strpath:s}'
ret = script_runner.run(*shlex.split(command))

pois = [1.0, 0.5, 0.0]
pois = [1.0, 0.5, 0.001]
matthewfeickert marked this conversation as resolved.
Show resolved Hide resolved
results_exp = []
results_obs = []
for test_poi in pois:
Expand Down