From 669ba455543f7315a33bac5913b4fbd68f4999be Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Wed, 9 Dec 2020 12:10:49 +0000
Subject: [PATCH 1/8] add test for _choose_chains

---
 pymc3/tests/test_sampling.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index a167b4f759..5ed9734afa 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -30,6 +30,7 @@
 
 import pymc3 as pm
 
+from pymc3.backends.ndarray import NDArray
 from pymc3.exceptions import IncorrectArgumentsError, SamplingError
 from pymc3.tests.helpers import SeededTest
 from pymc3.tests.models import simple_init
@@ -299,6 +300,31 @@ def test_partial_trace_sample():
         trace = pm.sample(trace=[a])
 
 
+@pytest.mark.parametrize(
+    "points_0, points_1, expected_length, expected_n_traces",
+    [
+        ([1, 4, 2, 8, 5, 7], [3, 1, 4, 1], 4, 2),
+        ([1, 4, 2, 8, 5, 7], [3, 1], 6, 1),
+    ],
+)
+def test_choose_chains(points_0, points_1, expected_length, expected_n_traces):
+    trace_0_points = tuple({"a": i} for i in points_0)
+    trace_1_points = tuple({"a": i} for i in points_1)
+    with pm.Model() as model:
+        a = pm.Normal("a", mu=0, sigma=1)
+        trace_0 = NDArray(model)
+        trace_1 = NDArray(model)
+        trace_0.setup(6, 1)
+        trace_1.setup(4, 1)
+        for point in trace_0_points:
+            trace_0.record(point)
+        for point in trace_1_points:
+            trace_1.record(point)
+        traces, length = pm.sampling._choose_chains([trace_0, trace_1], tune=1)
+    assert length == expected_length
+    assert expected_n_traces == len(traces)
+
+
 @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
 class TestNamedSampling(SeededTest):
     def test_shared_named(self):

From 2523001d390ceb9c942d2763a7aca1199460f783 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Wed, 9 Dec 2020 14:54:08 +0000
Subject: [PATCH 2/8] fix bug - choose overall maximum

---
 pymc3/sampling.py            | 27 +++++++++++++--------------
 pymc3/tests/test_sampling.py | 28 +++++++++++++++-------------
 2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 4ae11ccf8f..020c3a4f87 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1508,6 +1508,14 @@ def _mp_sample(
 
 
 def _choose_chains(traces, tune):
+    """
+    Pick traces such that the effective sample size is maximised.
+
+    We get here after a ``KeyboardInterrupt``, and so the different
+    traces have different lengths. We therefore pick the number of
+    traces such that (number of traces) * (length of shortest trace)
+    is maximised.
+    """
     if tune is None:
         tune = 0
 
@@ -1518,22 +1526,13 @@ def _choose_chains(traces, tune):
     if not sum(lengths):
         raise ValueError("Not enough samples to build a trace.")
 
-    idxs = np.argsort(lengths)[::-1]
-    l_sort = np.array(lengths)[idxs]
+    idxs = np.argsort(lengths)
+    l_sort = np.sort(lengths)
 
-    final_length = l_sort[0]
-    last_total = 0
-    for i, length in enumerate(l_sort):
-        total = (i + 1) * length
-        if total < last_total:
-            use_until = i
-            break
-        last_total = total
-        final_length = length
-    else:
-        use_until = len(lengths)
+    use_until = np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1])
+    final_length = l_sort[use_until]
 
-    return [traces[idx] for idx in idxs[:use_until]], final_length + tune
+    return [traces[idx] for idx in idxs[use_until:]], final_length + tune
 
 
 def stop_tuning(step):
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 5ed9734afa..cb5f9806ac 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -301,26 +301,28 @@ def test_partial_trace_sample():
 
 
 @pytest.mark.parametrize(
-    "points_0, points_1, expected_length, expected_n_traces",
+    "n_points, tune, expected_length, expected_n_traces",
     [
-        ([1, 4, 2, 8, 5, 7], [3, 1, 4, 1], 4, 2),
-        ([1, 4, 2, 8, 5, 7], [3, 1], 6, 1),
+        ((5, 2, 2), 0, 2, 3),
+        ((6, 1, 1), 1, 6, 1),
     ],
 )
-def test_choose_chains(points_0, points_1, expected_length, expected_n_traces):
-    trace_0_points = tuple({"a": i} for i in points_0)
-    trace_1_points = tuple({"a": i} for i in points_1)
+def test_choose_chains(n_points, tune, expected_length, expected_n_traces):
     with pm.Model() as model:
         a = pm.Normal("a", mu=0, sigma=1)
         trace_0 = NDArray(model)
         trace_1 = NDArray(model)
-        trace_0.setup(6, 1)
-        trace_1.setup(4, 1)
-        for point in trace_0_points:
-            trace_0.record(point)
-        for point in trace_1_points:
-            trace_1.record(point)
-        traces, length = pm.sampling._choose_chains([trace_0, trace_1], tune=1)
+        trace_2 = NDArray(model)
+        trace_0.setup(n_points[0], 1)
+        trace_1.setup(n_points[1], 1)
+        trace_2.setup(n_points[2], 1)
+        for _ in range(n_points[0]):
+            trace_0.record({"a": 0})
+        for _ in range(n_points[1]):
+            trace_1.record({"a": 0})
+        for _ in range(n_points[2]):
+            trace_2.record({"a": 0})
+        traces, length = pm.sampling._choose_chains([trace_0, trace_1, trace_2], tune=tune)
     assert length == expected_length
     assert expected_n_traces == len(traces)
 

From 3281215e507b427344569b76d18dee54c8fb0319 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Wed, 9 Dec 2020 14:58:34 +0000
Subject: [PATCH 3/8] update release notes

---
 RELEASE-NOTES.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index e6c404520b..92d6669bee 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,5 +1,9 @@
 # Release Notes
 
+## PyMC3 3.10.1 (on deck)
+### Maintenance
+- Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had smaller effective size than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318)
+
 ## PyMC3 3.10.0 (7 December 2020)
 
 This is a major release with many exciting new features. The biggest change is that we now rely on our own fork of [Theano-PyMC](https://github.com/pymc-devs/Theano-PyMC). This is in line with our [big announcement about our commitment to PyMC3 and Theano](https://pymc-devs.medium.com/the-future-of-pymc3-or-theano-is-dead-long-live-theano-d8005f8a0e9b).

From 67a795f47112bf2e26355ce66552bb69f1b6ddda Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Wed, 9 Dec 2020 14:59:38 +0000
Subject: [PATCH 4/8] :memo:

---
 RELEASE-NOTES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 92d6669bee..5c1f6c1d95 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -2,7 +2,7 @@
 
 ## PyMC3 3.10.1 (on deck)
 ### Maintenance
-- Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had smaller effective size than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318)
+- Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318)
 
 ## PyMC3 3.10.0 (7 December 2020)
 

From 541b88882d365e7ff723e905b5c39c15a051a5f9 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Wed, 9 Dec 2020 15:04:08 +0000
Subject: [PATCH 5/8] :art:

---
 RELEASE-NOTES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 5c1f6c1d95..ef1d6c4558 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,6 +1,7 @@
 # Release Notes
 
 ## PyMC3 3.10.1 (on deck)
+
 ### Maintenance
 - Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318)
 

From b42241e24640cedbaddfb9763f39f8e58d5be2cd Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Wed, 9 Dec 2020 15:04:41 +0000
Subject: [PATCH 6/8] minimise diff

---
 pymc3/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 020c3a4f87..7e82bc8b7c 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1527,7 +1527,7 @@ def _choose_chains(traces, tune):
         raise ValueError("Not enough samples to build a trace.")
 
     idxs = np.argsort(lengths)
-    l_sort = np.sort(lengths)
+    l_sort = lengths[idxs]
 
     use_until = np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1])
     final_length = l_sort[use_until]

From 5478a73ad5a9f50f16611ff21966cc38c0cd726d Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Wed, 9 Dec 2020 15:05:45 +0000
Subject: [PATCH 7/8] minimise diff

---
 pymc3/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 7e82bc8b7c..0995daefe1 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1527,7 +1527,7 @@ def _choose_chains(traces, tune):
         raise ValueError("Not enough samples to build a trace.")
 
     idxs = np.argsort(lengths)
-    l_sort = lengths[idxs]
+    l_sort = np.array(lengths)[idxs]
 
     use_until = np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1])
     final_length = l_sort[use_until]

From 6d264f261d2b7f0cdc6a95a54347eb1c610317df Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Sat, 12 Dec 2020 15:14:31 +0000
Subject: [PATCH 8/8] Update pymc3/sampling.py

---
 pymc3/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 0995daefe1..47cbcac381 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1509,7 +1509,7 @@ def _mp_sample(
 
 def _choose_chains(traces, tune):
     """
-    Pick traces such that the effective sample size is maximised.
+    Filter and slice traces such that (n_traces * len(shortest_trace)) is maximized.
 
     We get here after a ``KeyboardInterrupt``, and so the different
     traces have different lengths. We therefore pick the number of