Skip to content

Commit

Permalink
Add max_subset_rank (#253)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnothman authored Dec 28, 2023
1 parent 16ff930 commit a608725
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ What's new in version 0.9
introduced in version 0.7. (:issue:`248`)
- Ability to disable totals plot with `totals_plot_elements=0`. (:issue:`246`)
- Ability to set totals y axis label (:issue:`243`)
- Added ``max_subset_rank`` to get only n most populous subsets.

What's new in version 0.8
-------------------------
Expand Down
16 changes: 16 additions & 0 deletions upsetplot/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def _process_data(
sum_over,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
reverse=False,
Expand All @@ -41,6 +42,7 @@ def _process_data(
sum_over=sum_over,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
include_empty_subsets=include_empty_subsets,
Expand Down Expand Up @@ -200,6 +202,11 @@ class UpSet:
a size greater than this threshold will be omitted from plotting.
.. versionadded:: 0.5
max_subset_rank : int, optional
Limit to the top N ranked subsets in descending order of size.
All tied subsets are included.
.. versionadded:: 0.9
min_degree : int, optional
Minimum degree of a subset to be shown in the plot.
Expand Down Expand Up @@ -270,6 +277,7 @@ def __init__(
sum_over=None,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
facecolor="auto",
Expand Down Expand Up @@ -324,6 +332,7 @@ def __init__(
sum_over=sum_over,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
reverse=not self._horizontal,
Expand All @@ -345,6 +354,7 @@ def style_subsets(
absent=None,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
facecolor=None,
Expand All @@ -371,6 +381,11 @@ def style_subsets(
Minimum size of a subset to be styled.
max_subset_size : int, optional
Maximum size of a subset to be styled.
max_subset_rank : int, optional
Limit to the top N ranked subsets in descending order of size.
All tied subsets are included.
.. versionadded:: 0.9
min_degree : int, optional
Minimum degree of a subset to be styled.
max_degree : int, optional
Expand Down Expand Up @@ -405,6 +420,7 @@ def style_subsets(
absent=absent,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
)
Expand Down
31 changes: 29 additions & 2 deletions upsetplot/reformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,25 @@ def _scalar_to_list(val):


def _get_subset_mask(
agg, min_subset_size, max_subset_size, min_degree, max_degree, present, absent
agg,
min_subset_size,
max_subset_size,
max_subset_rank,
min_degree,
max_degree,
present,
absent,
):
"""Get a mask over subsets based on size, degree or category presence"""
subset_mask = True
if min_subset_size is not None:
subset_mask = np.logical_and(subset_mask, agg >= min_subset_size)
if max_subset_size is not None:
subset_mask = np.logical_and(subset_mask, agg <= max_subset_size)
if max_subset_rank is not None:
subset_mask = np.logical_and(
subset_mask, agg.rank(method="min", ascending=False) <= max_subset_rank
)
if (min_degree is not None and min_degree >= 0) or max_degree is not None:
degree = agg.index.to_frame().sum(axis=1)
if min_degree is not None:
Expand All @@ -121,12 +132,21 @@ def _get_subset_mask(


def _filter_subsets(
df, agg, min_subset_size, max_subset_size, min_degree, max_degree, present, absent
df,
agg,
min_subset_size,
max_subset_size,
max_subset_rank,
min_degree,
max_degree,
present,
absent,
):
subset_mask = _get_subset_mask(
agg,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
present=present,
Expand Down Expand Up @@ -189,6 +209,7 @@ def query(
absent=None,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
sort_by="degree",
Expand Down Expand Up @@ -221,6 +242,11 @@ def query(
Size may be a sum of values, see `subset_size`.
max_subset_size : int, optional
Maximum size of a subset to be reported.
max_subset_rank : int, optional
Limit to the top N ranked subsets in descending order of size.
All tied subsets are included.
.. versionadded:: 0.9
min_degree : int, optional
Minimum degree of a subset to be reported.
max_degree : int, optional
Expand Down Expand Up @@ -348,6 +374,7 @@ def query(
agg,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
present=present,
Expand Down
31 changes: 31 additions & 0 deletions upsetplot/tests/test_upsetplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,14 @@ def test_index_must_be_bool(x):
(True, True, True): 990,
},
),
(
{"max_subset_rank": 3},
{
(True, False, False): 884,
(True, True, False): 1547,
(True, True, True): 990,
},
),
(
{"min_subset_size": 800, "max_subset_size": 990},
{
Expand Down Expand Up @@ -822,6 +830,29 @@ def test_filter_subsets(filter_params, expected, sort_by):
assert upset_full.total == pytest.approx(upset_filtered.total)


def test_filter_subsets_max_subset_rank_tie():
data = generate_samples(seed=0, n_samples=5, n_categories=3)
tested_non_tie = False
tested_tie = True
full = UpSet(data, subset_size="count").intersections
prev = None
for max_rank in range(1, 5):
cur = UpSet(data, subset_size="count", max_subset_rank=max_rank).intersections
if prev is not None:
if cur.shape[0] > prev.shape[0]:
# check we add rows only when they are new
assert cur.min() < prev.min()
tested_non_tie = True
elif cur.shape[0] != full.shape[0]:
assert (cur == cur.min()).sum() > 1
tested_tie = True

prev = cur
assert tested_non_tie
assert tested_tie
assert cur.shape[0] == full.shape[0]


@pytest.mark.parametrize(
"x",
[
Expand Down

0 comments on commit a608725

Please sign in to comment.