diff --git a/metrics/github/api.py b/metrics/github/api.py index 23f03154..eceebeb6 100644 --- a/metrics/github/api.py +++ b/metrics/github/api.py @@ -1,12 +1,12 @@ import json import os import textwrap -from datetime import date, timedelta +from datetime import date import requests import structlog -from ..tools.dates import datetime_from_iso +from ..tools.dates import date_from_iso log = structlog.get_logger() @@ -136,18 +136,17 @@ def _iter_pull_requests(org, date_range): results = list(_iter_query_results(query, searchQuery=search_query)) for pr in results: yield { - "created": datetime_from_iso(pr["createdAt"]).date(), - "closed": datetime_from_iso(pr["closedAt"]).date(), + "created": date_from_iso(pr["createdAt"]), + "closed": date_from_iso(pr["closedAt"]), "author": pr["author"]["login"], "repo": pr["repository"]["name"], "org": pr["repository"]["owner"]["login"], } -def prs_open_on_date(org, date): - start = date.isoformat() - end = (date + timedelta(days=1)).isoformat() - +def prs_open_in_range(org, start, end): + start = start.isoformat() + end = end.isoformat() date_range = f"created:<={start} closed:>={end}" return list(_iter_pull_requests(org, date_range)) diff --git a/metrics/github/backfill.py b/metrics/github/backfill.py index 78737692..a3d07667 100644 --- a/metrics/github/backfill.py +++ b/metrics/github/backfill.py @@ -9,7 +9,7 @@ from ..logs import setup_logging from ..timescaledb import TimescaleDBWriter from ..timescaledb.tables import GitHubPullRequests -from ..tools.dates import date_from_iso, iter_days +from ..tools.dates import date_from_iso, iter_days, previous_weekday from .prs import process_prs @@ -57,13 +57,15 @@ def get_prs(db): return list(cur.execute(sql)) -def open_prs(prs, org, start, days_threshold): - dates = list(iter_days(start, date.today())) +def open_prs(prs, org, days_threshold): + earliest = date_from_iso(min([pr["created"] for pr in prs])) + start = previous_weekday(earliest, 0) # Monday + mondays = list(iter_days(start, date.today(), step=timedelta(days=7))) today = date.today() threshold = timedelta(days=days_threshold) - def open_on_day(date, pr, today): + def open_on_day(pr, start, end): """ Filter function for PRs @@ -73,25 +75,31 @@ def open_on_day(date, pr, today): closed = date_from_iso(pr["closed"]) or today opened = date_from_iso(pr["created"]) - open_today = (opened <= date) and (closed >= day) + open_today = (opened <= start) and (closed >= end) if not open_today: return False return (closed - opened) >= threshold with TimescaleDBWriter(GitHubPullRequests) as writer: - for day in dates: - prs_on_day = [pr for pr in prs if open_on_day(day, pr, today)] + for start in mondays: + end = start + timedelta(days=6) + prs_on_day = [pr for pr in prs if open_on_day(pr, start, end)] name = f"queue_older_than_{days_threshold}_days" log.info( - "%s | %s | %s | Processing %s PRs", name, day, org, len(prs_on_day) + "%s | %s | Processing %s PRs from week starting %s", + name, + org, + len(prs_on_day), + start, ) - process_prs(writer, prs_on_day, day, name=name) + process_prs(writer, prs_on_day, start, name=name) -def pr_throughput(prs, org, start): +def pr_throughput(prs, org): + start = date_from_iso(min([pr["created"] for pr in prs])) days = list(iter_days(start, date.today())) with TimescaleDBWriter(GitHubPullRequests) as writer: @@ -125,9 +133,6 @@ def backfill(ctx, org, pull_data, db_path): org_prs = [pr for pr in prs if pr["org"] == org] log.info("Backfilling with %s PRs for %s", len(org_prs), org) - start_date = date_from_iso(min([pr["created"] for pr in org_prs])) - for day in [2, 7, 10, 30, 60]: - open_prs(org_prs, org, start_date, days_threshold=day) - - pr_throughput(org_prs, org, start_date) + open_prs(org_prs, org, days_threshold=7) + pr_throughput(org_prs, org) diff --git a/metrics/github/cli.py b/metrics/github/cli.py index a5da0011..acb5f99b 100644 --- a/metrics/github/cli.py +++ b/metrics/github/cli.py @@ -5,6 +5,7 @@ from ..timescaledb import TimescaleDBWriter from ..timescaledb.tables import GitHubPullRequests +from ..tools.dates import previous_weekday from . import api from .backfill import backfill from .prs import process_prs @@ -25,12 +26,24 @@ def github(ctx, token): @github.command() @click.argument("org") @click.argument("date", type=click.DateTime()) -@click.argument("days-threshold", type=int) +@click.argument("--days-threshold", type=int, default=7) @click.pass_context def open_prs(ctx, org, date, days_threshold): - """The number of PRs open for DAYS_THRESHOLD or longer on the given date""" + """ + How many open PRs were there this week? + + The number of PRs open for DAYS_THRESHOLD (defaults to 7 days) in the + previous week to the given date. + + Week here is defined as the dates covering the most recent Monday to Sunday + (inclusive) before the given date, eg if the given date is a Tuesday this + command will step back a week+1 day to collect a full weeks worth of data. + """ date = date.date() - prs = api.prs_open_on_date(org, date) + + end = previous_weekday(date, 6) # Most recent Sunday + start = end - timedelta(days=6) # Monday before that Sunday + prs = api.prs_open_in_range(org, start, end) # remove PRs which have been open