diff --git a/metrics/github/backfill.py b/metrics/github/backfill.py index 02edcee7..8f5b2503 100644 --- a/metrics/github/backfill.py +++ b/metrics/github/backfill.py @@ -9,7 +9,7 @@ from ..logs import setup_logging from ..timescaledb import TimescaleDBWriter from ..timescaledb.tables import GitHubPullRequests -from ..tools.dates import date_from_iso, datetime_from_iso, iter_days +from ..tools.dates import date_from_iso, iter_days from .prs import process_prs @@ -57,17 +57,18 @@ def get_prs(db): return list(cur.execute(sql)) -def pr_queue(prs, org, start, days_threshold=None): - dates = iter_days(start, date.today(), step=timedelta(days=1)) - for day in dates: - prs_on_day = [ - pr - for pr in prs - if date_from_iso(pr["created"]) <= day - and date_from_iso(pr["closed"]) >= day - ] +def open_prs(prs, org, start, days_threshold): + dates = list(iter_days(start, date.today())) + + with TimescaleDBWriter(GitHubPullRequests) as writer: + for day in dates: + prs_on_day = [ + pr + for pr in prs + if date_from_iso(pr["created"]) <= day + and date_from_iso(pr["closed"]) >= day + ] - if days_threshold is not None: # remove PRs which have been open = timedelta(days=days_threshold) ] - suffix = f"_older_than_{days_threshold}_days" if days_threshold else "" - key = f"queue{suffix}" + name = f"queue_older_than_{days_threshold}_days" - log.info("%s | %s | %s | Processing %s PRs", key, day, org, len(prs_on_day)) - with TimescaleDBWriter(GitHubPullRequests) as writer: - process_prs(writer, prs_on_day, day, f"queue{suffix}") + log.info( + "%s | %s | %s | Processing %s PRs", name, day, org, len(prs_on_day) + ) + process_prs(writer, prs_on_day, day, name=name) def pr_throughput(prs, org, start): @@ -120,12 +121,9 @@ def backfill(ctx, org, pull_data, db_path): org_prs = [pr for pr in prs if pr["org"] == org] log.info("Backfilling with %s PRs for %s", len(org_prs), org) - start_date = min([pr["created"] for pr in org_prs]) - start_date = datetime_from_iso(start_date).date() - - pr_queue(org_prs, org, start_date) + start_date = date_from_iso(min([pr["created"] for pr in org_prs])) - for day in [2, 10, 30, 60]: - pr_queue(org_prs, org, start_date, days_threshold=day) + for day in [2, 7, 10, 30, 60]: + open_prs(org_prs, org, start_date, days_threshold=day) pr_throughput(org_prs, org, start_date) diff --git a/metrics/github/cli.py b/metrics/github/cli.py index d2b5162a..a5da0011 100644 --- a/metrics/github/cli.py +++ b/metrics/github/cli.py @@ -25,26 +25,25 @@ def github(ctx, token): @github.command() @click.argument("org") @click.argument("date", type=click.DateTime()) -@click.option("--days-threshold", type=int) +@click.argument("days-threshold", type=int) @click.pass_context -def pr_queue(ctx, org, date, days_threshold): - """The number of PRs open on the given date""" +def open_prs(ctx, org, date, days_threshold): + """The number of PRs open for DAYS_THRESHOLD or longer on the given date""" date = date.date() prs = api.prs_open_on_date(org, date) - if days_threshold is not None: - # remove PRs which have been open = timedelta(days=days_threshold) - ] + # remove PRs which have been open = timedelta(days=days_threshold) + ] - suffix = f"_older_than_{days_threshold}_days" if days_threshold else "" - - log.info("%s | %s | Processing %s PRs", date, org, len(prs)) - with TimescaleDBWriter("github_pull_requests", f"queue{suffix}") as writer: - process_prs(writer, prs, date) + log.info("%s | %s | Processing %s PRs", date, org, len(open_prs)) + with TimescaleDBWriter(GitHubPullRequests) as writer: + process_prs( + writer, open_prs, date, name=f"queue_older_than_{days_threshold}_days" + ) @github.command()