Skip to content

Commit

Permalink
Replace pr-queue with open-prs command
Browse files Browse the repository at this point in the history
This lets us get a count of open PRs on a given day, with a threshold
for how many days they've been open.

We want this command to only care about a 7-day threshold, but we're
keeping the other thresholds in the backfill command so we can compare
and contrast in grafana.

Note: A zero-days threshold (ie all PRs open on a day) here is the same
data that PR throughput is tracking but each command is scoped to what's
needed for that metric so we're ok with some duplication, particularly
while we're still scoping out what's needed with the stakeholders.
  • Loading branch information
ghickman committed Nov 14, 2023
1 parent c9276f0 commit 989113f
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 37 deletions.
42 changes: 20 additions & 22 deletions metrics/github/backfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..logs import setup_logging
from ..timescaledb import TimescaleDBWriter
from ..timescaledb.tables import GitHubPullRequests
from ..tools.dates import date_from_iso, datetime_from_iso, iter_days
from ..tools.dates import date_from_iso, iter_days
from .prs import process_prs


Expand Down Expand Up @@ -57,17 +57,18 @@ def get_prs(db):
return list(cur.execute(sql))


def pr_queue(prs, org, start, days_threshold=None):
dates = iter_days(start, date.today(), step=timedelta(days=1))
for day in dates:
prs_on_day = [
pr
for pr in prs
if date_from_iso(pr["created"]) <= day
and date_from_iso(pr["closed"]) >= day
]
def open_prs(prs, org, start, days_threshold):
dates = list(iter_days(start, date.today()))

with TimescaleDBWriter(GitHubPullRequests) as writer:
for day in dates:
prs_on_day = [
pr
for pr in prs
if date_from_iso(pr["created"]) <= day
and date_from_iso(pr["closed"]) >= day
]

if days_threshold is not None:
# remove PRs which have been open <days_threshold days
prs_on_day = [
pr
Expand All @@ -76,12 +77,12 @@ def pr_queue(prs, org, start, days_threshold=None):
>= timedelta(days=days_threshold)
]

suffix = f"_older_than_{days_threshold}_days" if days_threshold else ""
key = f"queue{suffix}"
name = f"queue_older_than_{days_threshold}_days"

log.info("%s | %s | %s | Processing %s PRs", key, day, org, len(prs_on_day))
with TimescaleDBWriter(GitHubPullRequests) as writer:
process_prs(writer, prs_on_day, day, f"queue{suffix}")
log.info(
"%s | %s | %s | Processing %s PRs", name, day, org, len(prs_on_day)
)
process_prs(writer, prs_on_day, day, name=name)


def pr_throughput(prs, org, start):
Expand Down Expand Up @@ -120,12 +121,9 @@ def backfill(ctx, org, pull_data, db_path):

org_prs = [pr for pr in prs if pr["org"] == org]
log.info("Backfilling with %s PRs for %s", len(org_prs), org)
start_date = min([pr["created"] for pr in org_prs])
start_date = datetime_from_iso(start_date).date()

pr_queue(org_prs, org, start_date)
start_date = date_from_iso(min([pr["created"] for pr in org_prs]))

for day in [2, 10, 30, 60]:
pr_queue(org_prs, org, start_date, days_threshold=day)
for day in [2, 7, 10, 30, 60]:
open_prs(org_prs, org, start_date, days_threshold=day)

pr_throughput(org_prs, org, start_date)
29 changes: 14 additions & 15 deletions metrics/github/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,25 @@ def github(ctx, token):
@github.command()
@click.argument("org")
@click.argument("date", type=click.DateTime())
@click.option("--days-threshold", type=int)
@click.argument("days-threshold", type=int)
@click.pass_context
def pr_queue(ctx, org, date, days_threshold):
"""The number of PRs open on the given date"""
def open_prs(ctx, org, date, days_threshold):
"""The number of PRs open for DAYS_THRESHOLD or longer on the given date"""
date = date.date()
prs = api.prs_open_on_date(org, date)

if days_threshold is not None:
# remove PRs which have been open <days_threshold days
prs = [
pr
for pr in prs
if (pr["closed"] - pr["created"]) >= timedelta(days=days_threshold)
]
# remove PRs which have been open <days_threshold days
open_prs = [
pr
for pr in prs
if (pr["closed"] - pr["created"]) >= timedelta(days=days_threshold)
]

suffix = f"_older_than_{days_threshold}_days" if days_threshold else ""

log.info("%s | %s | Processing %s PRs", date, org, len(prs))
with TimescaleDBWriter("github_pull_requests", f"queue{suffix}") as writer:
process_prs(writer, prs, date)
log.info("%s | %s | Processing %s PRs", date, org, len(open_prs))
with TimescaleDBWriter(GitHubPullRequests) as writer:
process_prs(
writer, open_prs, date, name=f"queue_older_than_{days_threshold}_days"
)


@github.command()
Expand Down

0 comments on commit 989113f

Please sign in to comment.