From 669ba455543f7315a33bac5913b4fbd68f4999be Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 9 Dec 2020 12:10:49 +0000 Subject: [PATCH 1/8] add test for _choose_chains --- pymc3/tests/test_sampling.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index a167b4f759..5ed9734afa 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -30,6 +30,7 @@ import pymc3 as pm +from pymc3.backends.ndarray import NDArray from pymc3.exceptions import IncorrectArgumentsError, SamplingError from pymc3.tests.helpers import SeededTest from pymc3.tests.models import simple_init @@ -299,6 +300,31 @@ def test_partial_trace_sample(): trace = pm.sample(trace=[a]) +@pytest.mark.parametrize( + "points_0, points_1, expected_length, expected_n_traces", + [ + ([1, 4, 2, 8, 5, 7], [3, 1, 4, 1], 4, 2), + ([1, 4, 2, 8, 5, 7], [3, 1], 6, 1), + ], +) +def test_choose_chains(points_0, points_1, expected_length, expected_n_traces): + trace_0_points = tuple({"a": i} for i in points_0) + trace_1_points = tuple({"a": i} for i in points_1) + with pm.Model() as model: + a = pm.Normal("a", mu=0, sigma=1) + trace_0 = NDArray(model) + trace_1 = NDArray(model) + trace_0.setup(6, 1) + trace_1.setup(4, 1) + for point in trace_0_points: + trace_0.record(point) + for point in trace_1_points: + trace_1.record(point) + traces, length = pm.sampling._choose_chains([trace_0, trace_1], tune=1) + assert length == expected_length + assert expected_n_traces == len(traces) + + @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") class TestNamedSampling(SeededTest): def test_shared_named(self): From 2523001d390ceb9c942d2763a7aca1199460f783 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 14:54:08 +0000 Subject: [PATCH 2/8] fix bug - choose overall maximum --- pymc3/sampling.py | 27 +++++++++++++-------------- pymc3/tests/test_sampling.py | 28 +++++++++++++++------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 4ae11ccf8f..020c3a4f87 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1508,6 +1508,14 @@ def _mp_sample( def _choose_chains(traces, tune): + """ + Pick traces such that the effective sample size is maximised. + + We get here after a ``KeyboardInterrupt``, and so the different + traces have different lengths. We therefore pick the number of + traces such that (number of traces) * (length of shortest trace) + is maximised. + """ if tune is None: tune = 0 @@ -1518,22 +1526,13 @@ def _choose_chains(traces, tune): if not sum(lengths): raise ValueError("Not enough samples to build a trace.") - idxs = np.argsort(lengths)[::-1] - l_sort = np.array(lengths)[idxs] + idxs = np.argsort(lengths) + l_sort = np.sort(lengths) - final_length = l_sort[0] - last_total = 0 - for i, length in enumerate(l_sort): - total = (i + 1) * length - if total < last_total: - use_until = i - break - last_total = total - final_length = length - else: - use_until = len(lengths) + use_until = np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1]) + final_length = l_sort[use_until] - return [traces[idx] for idx in idxs[:use_until]], final_length + tune + return [traces[idx] for idx in idxs[use_until:]], final_length + tune def stop_tuning(step): diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 5ed9734afa..cb5f9806ac 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -301,26 +301,28 @@ def test_partial_trace_sample(): @pytest.mark.parametrize( - "points_0, points_1, expected_length, expected_n_traces", + "n_points, tune, expected_length, expected_n_traces", [ - ([1, 4, 2, 8, 5, 7], [3, 1, 4, 1], 4, 2), - ([1, 4, 2, 8, 5, 7], [3, 1], 6, 1), + ((5, 2, 2), 0, 2, 3), + ((6, 1, 1), 1, 6, 1), ], ) -def test_choose_chains(points_0, points_1, expected_length, expected_n_traces): - trace_0_points = tuple({"a": i} for i in points_0) - trace_1_points = tuple({"a": i} for i in points_1) +def test_choose_chains(n_points, tune, expected_length, expected_n_traces): with pm.Model() as model: a = pm.Normal("a", mu=0, sigma=1) trace_0 = NDArray(model) trace_1 = NDArray(model) - trace_0.setup(6, 1) - trace_1.setup(4, 1) - for point in trace_0_points: - trace_0.record(point) - for point in trace_1_points: - trace_1.record(point) - traces, length = pm.sampling._choose_chains([trace_0, trace_1], tune=1) + trace_2 = NDArray(model) + trace_0.setup(n_points[0], 1) + trace_1.setup(n_points[1], 1) + trace_2.setup(n_points[2], 1) + for _ in range(n_points[0]): + trace_0.record({"a": 0}) + for _ in range(n_points[1]): + trace_1.record({"a": 0}) + for _ in range(n_points[2]): + trace_2.record({"a": 0}) + traces, length = pm.sampling._choose_chains([trace_0, trace_1, trace_2], tune=tune) assert length == expected_length assert expected_n_traces == len(traces) From 3281215e507b427344569b76d18dee54c8fb0319 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 14:58:34 +0000 Subject: [PATCH 3/8] update release notes --- RELEASE-NOTES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index e6c404520b..92d6669bee 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,5 +1,9 @@ # Release Notes +## PyMC3 3.10.1 (on deck) +### Maintenance +- Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had smaller effective size than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318) + ## PyMC3 3.10.0 (7 December 2020) This is a major release with many exciting new features. The biggest change is that we now rely on our own fork of [Theano-PyMC](https://github.com/pymc-devs/Theano-PyMC). This is in line with our [big announcement about our commitment to PyMC3 and Theano](https://pymc-devs.medium.com/the-future-of-pymc3-or-theano-is-dead-long-live-theano-d8005f8a0e9b). From 67a795f47112bf2e26355ce66552bb69f1b6ddda Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 14:59:38 +0000 Subject: [PATCH 4/8] :memo: --- RELEASE-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 92d6669bee..5c1f6c1d95 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -2,7 +2,7 @@ ## PyMC3 3.10.1 (on deck) ### Maintenance -- Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had smaller effective size than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318) +- Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318) ## PyMC3 3.10.0 (7 December 2020) From 541b88882d365e7ff723e905b5c39c15a051a5f9 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 15:04:08 +0000 Subject: [PATCH 5/8] :art: --- RELEASE-NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 5c1f6c1d95..ef1d6c4558 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,6 +1,7 @@ # Release Notes ## PyMC3 3.10.1 (on deck) + ### Maintenance - Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318) From b42241e24640cedbaddfb9763f39f8e58d5be2cd Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 15:04:41 +0000 Subject: [PATCH 6/8] minimise diff --- pymc3/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 020c3a4f87..7e82bc8b7c 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1527,7 +1527,7 @@ def _choose_chains(traces, tune): raise ValueError("Not enough samples to build a trace.") idxs = np.argsort(lengths) - l_sort = np.sort(lengths) + l_sort = lengths[idxs] use_until = np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1]) final_length = l_sort[use_until] From 5478a73ad5a9f50f16611ff21966cc38c0cd726d Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 15:05:45 +0000 Subject: [PATCH 7/8] minimise diff --- pymc3/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 7e82bc8b7c..0995daefe1 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1527,7 +1527,7 @@ def _choose_chains(traces, tune): raise ValueError("Not enough samples to build a trace.") idxs = np.argsort(lengths) - l_sort = lengths[idxs] + l_sort = np.array(lengths)[idxs] use_until = np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1]) final_length = l_sort[use_until] From 6d264f261d2b7f0cdc6a95a54347eb1c610317df Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 12 Dec 2020 15:14:31 +0000 Subject: [PATCH 8/8] Update pymc3/sampling.py --- pymc3/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 0995daefe1..47cbcac381 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1509,7 +1509,7 @@ def _mp_sample( def _choose_chains(traces, tune): """ - Pick traces such that the effective sample size is maximised. + Filter and slice traces such that (n_traces * len(shortest_trace)) is maximized. We get here after a ``KeyboardInterrupt``, and so the different traces have different lengths. We therefore pick the number of