Skip to content

Commit

Permalink
Merge pull request #37 from ebmdatalab/fix-deciles
Browse files Browse the repository at this point in the history
Fix percentiles
  • Loading branch information
inglesp authored Oct 20, 2023
2 parents 3c1c7fe + 3ec111a commit 63ac7bb
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 11 deletions.
2 changes: 1 addition & 1 deletion ebmdatalab/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Package for ebmdatalab jupyter notebook stuff
"""
__version__ = "0.0.29"
__version__ = "0.0.30"
8 changes: 4 additions & 4 deletions ebmdatalab/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ def add_percentiles(df, period_column=None, column=None, show_outer_percentiles=
Adds `percentile` column.
"""
deciles = np.arange(0.1, 1, 0.1)
bottom_percentiles = np.arange(0.01, 0.1, 0.01)
top_percentiles = np.arange(0.91, 1, 0.01)
deciles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
bottom_percentiles = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09]
top_percentiles = [0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]
if show_outer_percentiles:
quantiles = np.concatenate((deciles, bottom_percentiles, top_percentiles))
else:
quantiles = deciles
df = df.groupby(period_column)[column].quantile(quantiles).reset_index()
df = df.rename(index=str, columns={"level_1": "percentile"})
# create integer range of percentiles
df["percentile"] = df["percentile"].apply(lambda x: int(x * 100))
df["percentile"] = (df["percentile"] * 100).astype(int)
return df


Expand Down
21 changes: 15 additions & 6 deletions ebmdatalab/tests/test_charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,21 @@


def test_add_percentiles():
df = pd.DataFrame(np.random.rand(1000, 1), columns=["val"])
months = pd.date_range("2018-01-01", periods=12, freq="M")
df["month"] = np.random.choice(months, len(df))
df = charts.add_percentiles(df, period_column="month", column="val")
# This is a statistically-likely test, so might fail!
assert (df[df.percentile == 99].val > 0.75).all()
rows = [["2023-09-01", n * 5] for n in range(1001)]
df = pd.DataFrame(rows, columns=["month", "val"])
dfp = (
charts.add_percentiles(df, period_column="month", column="val")
.sort_values("percentile")
.reset_index(drop=True)
)
percentiles = [
1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 20, 30, 40, 50, 60, 70, 80, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99,
]
expected_rows = [["2023-09-01", percentile, percentile * 50.0] for percentile in percentiles]
expected = pd.DataFrame(expected_rows, columns=["month", "percentile", "val"])
pd.testing.assert_frame_equal(dfp, expected)


def test_deciles_chart():
Expand Down

0 comments on commit 63ac7bb

Please sign in to comment.