Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework the open PRs data to measure at a specific point in time #54

Merged
merged 1 commit into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions metrics/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import requests
import structlog

from ..tools.dates import date_from_iso
from ..tools.dates import date_from_iso, datetime_from_iso


log = structlog.get_logger()
Expand Down Expand Up @@ -139,9 +139,12 @@ def iter_repo_prs(org, repo):
"org": org,
"repo": repo,
"author": pr["author"]["login"],
"created": date_from_iso(pr["createdAt"]),
"closed": date_from_iso(pr["closedAt"]),
"closed_at": datetime_from_iso(pr["closedAt"]),
"created": date_from_iso(pr["createdAt"]),
"created_at": datetime_from_iso(pr["createdAt"]),
"merged": date_from_iso(pr["mergedAt"]),
"merged_at": datetime_from_iso(pr["mergedAt"]),
}


Expand Down
42 changes: 26 additions & 16 deletions metrics/github/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date, timedelta
from datetime import UTC, date, datetime, time, timedelta

import click
import structlog
Expand All @@ -16,44 +16,54 @@


def open_prs(prs, org, days_threshold):
"""
How many PRs were open at a given sample point?

We're using Monday morning here to match how the values in throughput are
bucketed with timeseriesdb's time_bucket() function

So we start with the Monday before the earliest PR, then iterate from that
Monday to todays date, filtering the list of PRs down to just those open on
the given Monday morning.
"""
earliest = min([pr["created"] for pr in prs])
start = previous_weekday(earliest, 0) # Monday
mondays = list(iter_days(start, date.today(), step=timedelta(days=7)))

today = date.today()
the_future = datetime(9999, 1, 1, tzinfo=UTC)
threshold = timedelta(days=days_threshold)

def open_on_day(pr, start, end):
def is_open(pr, dt):
"""
Filter function for PRs

Checks whether a PR is open today and if it's been open for greater or
equal to the threshold of days.
Checks whether a PR was open at the given datetime, and if it has been
open long enough.
"""
closed = pr["closed"] or today
opened = pr["created"]
closed = pr["closed_at"] or the_future
opened = pr["created_at"]

open_today = (opened <= start) and (closed >= end)
if not open_today:
open_now = opened < dt < closed
if not open_now:
return False

return (closed - opened) >= threshold

with TimescaleDBWriter(GitHubPullRequests) as writer:
for start in mondays:
end = start + timedelta(days=6)
prs_on_day = [pr for pr in prs if open_on_day(pr, start, end)]
for monday in mondays:
dt = datetime.combine(monday, time(), tzinfo=UTC)
prs_open = [pr for pr in prs if is_open(pr, dt)]

name = f"queue_older_than_{days_threshold}_days"

log.info(
"%s | %s | Processing %s PRs from week starting %s",
"%s | %s | Processing %s PRs open at %s",
name,
org,
len(prs_on_day),
start,
len(prs_open),
dt,
)
process_prs(writer, prs_on_day, start, name=name)
process_prs(writer, prs_open, monday, name=name)


def pr_throughput(prs, org):
Expand Down