From 4a2e6a3e6f5e47f2f2eb2a2339550279eb9ee7da Mon Sep 17 00:00:00 2001 From: George Hickman Date: Tue, 14 Nov 2023 15:19:19 +0000 Subject: [PATCH] Extract throughput filter into a function This makes it easier to deal with still-open PRs and keep all the logic in one place. --- metrics/github/backfill.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/metrics/github/backfill.py b/metrics/github/backfill.py index 8f5b2503..277e1a72 100644 --- a/metrics/github/backfill.py +++ b/metrics/github/backfill.py @@ -60,22 +60,28 @@ def get_prs(db): def open_prs(prs, org, start, days_threshold): dates = list(iter_days(start, date.today())) + today = date.today() + threshold = timedelta(days=days_threshold) + + def open_on_day(date, pr, today): + """ + Filter function for PRs + + Checks whether a PR is open today and if it's been open for greater or + equal to the threshold of days. + """ + closed = date_from_iso(pr["closed"]) or today + opened = date_from_iso(pr["created"]) + + open_today = (opened <= date) and (closed >= day) + if not open_today: + return False + + return (closed - opened) >= threshold + with TimescaleDBWriter(GitHubPullRequests) as writer: for day in dates: - prs_on_day = [ - pr - for pr in prs - if date_from_iso(pr["created"]) <= day - and date_from_iso(pr["closed"]) >= day - ] - - # remove PRs which have been open = timedelta(days=days_threshold) - ] + prs_on_day = [pr for pr in prs if open_on_day(day, pr, today)] name = f"queue_older_than_{days_threshold}_days"