Skip to content

Commit

Permalink
Rework open PRs to look at PRs from a week prior to the given date
Browse files Browse the repository at this point in the history
  • Loading branch information
ghickman committed Nov 16, 2023
1 parent 27833ea commit e840ee6
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 27 deletions.
15 changes: 7 additions & 8 deletions metrics/github/api.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import json
import os
import textwrap
from datetime import date, timedelta
from datetime import date

import requests
import structlog

from ..tools.dates import datetime_from_iso
from ..tools.dates import date_from_iso


log = structlog.get_logger()
Expand Down Expand Up @@ -136,18 +136,17 @@ def _iter_pull_requests(org, date_range):
results = list(_iter_query_results(query, searchQuery=search_query))
for pr in results:
yield {
"created": datetime_from_iso(pr["createdAt"]).date(),
"closed": datetime_from_iso(pr["closedAt"]).date(),
"created": date_from_iso(pr["createdAt"]),
"closed": date_from_iso(pr["closedAt"]),
"author": pr["author"]["login"],
"repo": pr["repository"]["name"],
"org": pr["repository"]["owner"]["login"],
}


def prs_open_on_date(org, date):
start = date.isoformat()
end = (date + timedelta(days=1)).isoformat()

def prs_open_in_range(org, start, end):
start = start.isoformat()
end = end.isoformat()
date_range = f"created:<={start} closed:>={end}"

return list(_iter_pull_requests(org, date_range))
Expand Down
35 changes: 20 additions & 15 deletions metrics/github/backfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..logs import setup_logging
from ..timescaledb import TimescaleDBWriter
from ..timescaledb.tables import GitHubPullRequests
from ..tools.dates import date_from_iso, iter_days
from ..tools.dates import date_from_iso, iter_days, previous_weekday
from .prs import process_prs


Expand Down Expand Up @@ -57,13 +57,15 @@ def get_prs(db):
return list(cur.execute(sql))


def open_prs(prs, org, start, days_threshold):
dates = list(iter_days(start, date.today()))
def open_prs(prs, org, days_threshold):
earliest = date_from_iso(min([pr["created"] for pr in prs]))
start = previous_weekday(earliest, 0) # Monday
mondays = list(iter_days(start, date.today(), step=timedelta(days=7)))

today = date.today()
threshold = timedelta(days=days_threshold)

def open_on_day(date, pr, today):
def open_on_day(pr, start, end):
"""
Filter function for PRs
Expand All @@ -73,25 +75,31 @@ def open_on_day(date, pr, today):
closed = date_from_iso(pr["closed"]) or today
opened = date_from_iso(pr["created"])

open_today = (opened <= date) and (closed >= day)
open_today = (opened <= start) and (closed >= end)
if not open_today:
return False

return (closed - opened) >= threshold

with TimescaleDBWriter(GitHubPullRequests) as writer:
for day in dates:
prs_on_day = [pr for pr in prs if open_on_day(day, pr, today)]
for start in mondays:
end = start + timedelta(days=6)
prs_on_day = [pr for pr in prs if open_on_day(pr, start, end)]

name = f"queue_older_than_{days_threshold}_days"

log.info(
"%s | %s | %s | Processing %s PRs", name, day, org, len(prs_on_day)
"%s | %s | Processing %s PRs from week starting %s",
name,
org,
len(prs_on_day),
start,
)
process_prs(writer, prs_on_day, day, name=name)
process_prs(writer, prs_on_day, start, name=name)


def pr_throughput(prs, org, start):
def pr_throughput(prs, org):
start = date_from_iso(min([pr["created"] for pr in prs]))
days = list(iter_days(start, date.today()))

with TimescaleDBWriter(GitHubPullRequests) as writer:
Expand Down Expand Up @@ -125,9 +133,6 @@ def backfill(ctx, org, pull_data, db_path):

org_prs = [pr for pr in prs if pr["org"] == org]
log.info("Backfilling with %s PRs for %s", len(org_prs), org)
start_date = date_from_iso(min([pr["created"] for pr in org_prs]))

for day in [2, 7, 10, 30, 60]:
open_prs(org_prs, org, start_date, days_threshold=day)

pr_throughput(org_prs, org, start_date)
open_prs(org_prs, org, days_threshold=7)
pr_throughput(org_prs, org)
19 changes: 16 additions & 3 deletions metrics/github/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ..timescaledb import TimescaleDBWriter
from ..timescaledb.tables import GitHubPullRequests
from ..tools.dates import previous_weekday
from . import api
from .backfill import backfill
from .prs import process_prs
Expand All @@ -25,12 +26,24 @@ def github(ctx, token):
@github.command()
@click.argument("org")
@click.argument("date", type=click.DateTime())
@click.argument("days-threshold", type=int)
@click.argument("--days-threshold", type=int, default=7)
@click.pass_context
def open_prs(ctx, org, date, days_threshold):
"""The number of PRs open for DAYS_THRESHOLD or longer on the given date"""
"""
How many open PRs were there this week?
The number of PRs open for DAYS_THRESHOLD (defaults to 7 days) in the
previous week to the given date.
Week here is defined as the dates covering the most recent Monday to Sunday
(inclusive) before the given date, eg if the given date is a Tuesday this
command will step back a week+1 day to collect a full weeks worth of data.
"""
date = date.date()
prs = api.prs_open_on_date(org, date)

end = previous_weekday(date, 6) # Most recent Sunday
start = end - timedelta(days=6) # Monday before that Sunday
prs = api.prs_open_in_range(org, start, end)

# remove PRs which have been open <days_threshold days
open_prs = [
Expand Down
18 changes: 18 additions & 0 deletions metrics/tools/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,21 @@ def iter_days(start, end, step=DELTA):
while start <= end:
yield start
start += step


def previous_weekday(d, weekday):
"""
Get the date for a previous week day
Starting at the given date, walk backwards through days until the given
weekday is found, returning the date for that weekday.
For example, when giving the date 2023-11-16 and asking for the previous
Sunday, the returned date would be 2023-11-12.
"""
output = d

while output.weekday() != weekday:
output -= timedelta(days=1)

return output
35 changes: 34 additions & 1 deletion tests/metrics/tools/test_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,23 @@

import pytest

from metrics.tools.dates import date_from_iso, datetime_from_iso, iter_days
from metrics.tools.dates import (
date_from_iso,
datetime_from_iso,
iter_days,
previous_weekday,
)


# TODO: remove when we switch to 3.12, this has been added to the calendar
# module in stdlib
MONDAY = 0
TUESDAY = 1
WEDNESDAY = 2
THURSDAY = 3
FRIDAY = 4
SATURDAY = 5
SUNDAY = 6


@pytest.mark.parametrize(
Expand Down Expand Up @@ -48,3 +64,20 @@ def test_iter_days_with_empty_values():

with pytest.raises(TypeError):
list(iter_days(date(2020, 7, 8), date(2022, 7, 8), None))


@pytest.mark.parametrize(
"d,weekday,expected",
[
(date(2023, 11, 16), MONDAY, date(2023, 11, 13)),
(date(2023, 11, 16), TUESDAY, date(2023, 11, 14)),
(date(2023, 11, 16), WEDNESDAY, date(2023, 11, 15)),
(date(2023, 11, 16), THURSDAY, date(2023, 11, 16)),
(date(2023, 11, 16), FRIDAY, date(2023, 11, 10)),
(date(2023, 11, 16), SATURDAY, date(2023, 11, 11)),
(date(2023, 11, 16), SUNDAY, date(2023, 11, 12)),
],
ids=["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"],
)
def test_previous_weekday(d, weekday, expected):
assert previous_weekday(d, weekday) == expected

0 comments on commit e840ee6

Please sign in to comment.