Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework open prs #27

Merged
merged 3 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions metrics/github/api.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import json
import os
import textwrap
from datetime import date, timedelta
from datetime import date

import requests
import structlog

from ..tools.dates import datetime_from_iso
from ..tools.dates import date_from_iso


log = structlog.get_logger()
Expand Down Expand Up @@ -136,18 +136,17 @@ def _iter_pull_requests(org, date_range):
results = list(_iter_query_results(query, searchQuery=search_query))
for pr in results:
yield {
"created": datetime_from_iso(pr["createdAt"]).date(),
"closed": datetime_from_iso(pr["closedAt"]).date(),
"created": date_from_iso(pr["createdAt"]),
"closed": date_from_iso(pr["closedAt"]),
"author": pr["author"]["login"],
"repo": pr["repository"]["name"],
"org": pr["repository"]["owner"]["login"],
}


def prs_open_on_date(org, date):
start = date.isoformat()
end = (date + timedelta(days=1)).isoformat()

def prs_open_in_range(org, start, end):
start = start.isoformat()
end = end.isoformat()
date_range = f"created:<={start} closed:>={end}"

return list(_iter_pull_requests(org, date_range))
Expand Down
35 changes: 20 additions & 15 deletions metrics/github/backfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..logs import setup_logging
from ..timescaledb import TimescaleDBWriter
from ..timescaledb.tables import GitHubPullRequests
from ..tools.dates import date_from_iso, iter_days
from ..tools.dates import date_from_iso, iter_days, previous_weekday
from .prs import process_prs


Expand Down Expand Up @@ -57,13 +57,15 @@ def get_prs(db):
return list(cur.execute(sql))


def open_prs(prs, org, start, days_threshold):
dates = list(iter_days(start, date.today()))
def open_prs(prs, org, days_threshold):
earliest = date_from_iso(min([pr["created"] for pr in prs]))
start = previous_weekday(earliest, 0) # Monday
mondays = list(iter_days(start, date.today(), step=timedelta(days=7)))

today = date.today()
threshold = timedelta(days=days_threshold)

def open_on_day(date, pr, today):
def open_on_day(pr, start, end):
"""
Filter function for PRs

Expand All @@ -73,25 +75,31 @@ def open_on_day(date, pr, today):
closed = date_from_iso(pr["closed"]) or today
opened = date_from_iso(pr["created"])

open_today = (opened <= date) and (closed >= day)
open_today = (opened <= start) and (closed >= end)
if not open_today:
return False

return (closed - opened) >= threshold

with TimescaleDBWriter(GitHubPullRequests) as writer:
for day in dates:
prs_on_day = [pr for pr in prs if open_on_day(day, pr, today)]
for start in mondays:
end = start + timedelta(days=6)
prs_on_day = [pr for pr in prs if open_on_day(pr, start, end)]

name = f"queue_older_than_{days_threshold}_days"

log.info(
"%s | %s | %s | Processing %s PRs", name, day, org, len(prs_on_day)
"%s | %s | Processing %s PRs from week starting %s",
name,
org,
len(prs_on_day),
start,
)
process_prs(writer, prs_on_day, day, name=name)
process_prs(writer, prs_on_day, start, name=name)


def pr_throughput(prs, org, start):
def pr_throughput(prs, org):
start = date_from_iso(min([pr["created"] for pr in prs]))
days = list(iter_days(start, date.today()))

with TimescaleDBWriter(GitHubPullRequests) as writer:
Expand Down Expand Up @@ -125,9 +133,6 @@ def backfill(ctx, org, pull_data, db_path):

org_prs = [pr for pr in prs if pr["org"] == org]
log.info("Backfilling with %s PRs for %s", len(org_prs), org)
start_date = date_from_iso(min([pr["created"] for pr in org_prs]))

for day in [2, 7, 10, 30, 60]:
open_prs(org_prs, org, start_date, days_threshold=day)

pr_throughput(org_prs, org, start_date)
open_prs(org_prs, org, days_threshold=7)
pr_throughput(org_prs, org)
19 changes: 16 additions & 3 deletions metrics/github/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ..timescaledb import TimescaleDBWriter
from ..timescaledb.tables import GitHubPullRequests
from ..tools.dates import previous_weekday
from . import api
from .backfill import backfill
from .prs import process_prs
Expand All @@ -25,12 +26,24 @@ def github(ctx, token):
@github.command()
@click.argument("org")
@click.argument("date", type=click.DateTime())
@click.argument("days-threshold", type=int)
@click.argument("--days-threshold", type=int, default=7)
@click.pass_context
def open_prs(ctx, org, date, days_threshold):
"""The number of PRs open for DAYS_THRESHOLD or longer on the given date"""
"""
How many open PRs were there this week?

The number of PRs open for DAYS_THRESHOLD (defaults to 7 days) in the
previous week to the given date.

Week here is defined as the dates covering the most recent Monday to Sunday
(inclusive) before the given date, eg if the given date is a Tuesday this
command will step back a week+1 day to collect a full weeks worth of data.
"""
date = date.date()
prs = api.prs_open_on_date(org, date)

end = previous_weekday(date, 6) # Most recent Sunday
start = end - timedelta(days=6) # Monday before that Sunday
prs = api.prs_open_in_range(org, start, end)

# remove PRs which have been open <days_threshold days
open_prs = [
Expand Down
18 changes: 18 additions & 0 deletions metrics/tools/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,21 @@ def iter_days(start, end, step=DELTA):
while start <= end:
yield start
start += step


def previous_weekday(d, weekday):
"""
Get the date for a previous week day

Starting at the given date, walk backwards through days until the given
weekday is found, returning the date for that weekday.

For example, when giving the date 2023-11-16 and asking for the previous
Sunday, the returned date would be 2023-11-12.
"""
output = d

while output.weekday() != weekday:
output -= timedelta(days=1)

return output
Empty file added tests/metrics/__init__.py
Empty file.
Empty file added tests/metrics/tools/__init__.py
Empty file.
83 changes: 83 additions & 0 deletions tests/metrics/tools/test_dates.py
ghickman marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from datetime import date, datetime

import pytest

from metrics.tools.dates import (
date_from_iso,
datetime_from_iso,
iter_days,
previous_weekday,
)


# TODO: remove when we switch to 3.12, this has been added to the calendar
# module in stdlib
MONDAY = 0
TUESDAY = 1
WEDNESDAY = 2
THURSDAY = 3
FRIDAY = 4
SATURDAY = 5
SUNDAY = 6


@pytest.mark.parametrize(
"value,expected",
[
(None, None),
("2020-07-08", date(2020, 7, 8)),
("2020-07-08T09:12", date(2020, 7, 8)),
],
)
def test_date_from_iso(value, expected):
assert date_from_iso(value) == expected


@pytest.mark.parametrize(
"value,expected",
[
(None, None),
("2020-07-08", datetime(2020, 7, 8, 0, 0, 0)),
("2020-07-08T09:12", datetime(2020, 7, 8, 9, 12, 0)),
],
)
def test_datetime_from_iso(value, expected):
assert datetime_from_iso(value) == expected


def test_iter_days():
dates = list(iter_days(date(2020, 7, 8), date(2020, 7, 10)))

assert dates == [
date(2020, 7, 8),
date(2020, 7, 9),
date(2020, 7, 10),
]


def test_iter_days_with_empty_values():
with pytest.raises(TypeError):
list(iter_days(None, date(2020, 7, 8)))

with pytest.raises(TypeError):
list(iter_days(date(2020, 7, 8), None))

with pytest.raises(TypeError):
list(iter_days(date(2020, 7, 8), date(2022, 7, 8), None))


@pytest.mark.parametrize(
"d,weekday,expected",
[
(date(2023, 11, 16), MONDAY, date(2023, 11, 13)),
(date(2023, 11, 16), TUESDAY, date(2023, 11, 14)),
(date(2023, 11, 16), WEDNESDAY, date(2023, 11, 15)),
(date(2023, 11, 16), THURSDAY, date(2023, 11, 16)),
(date(2023, 11, 16), FRIDAY, date(2023, 11, 10)),
(date(2023, 11, 16), SATURDAY, date(2023, 11, 11)),
(date(2023, 11, 16), SUNDAY, date(2023, 11, 12)),
],
ids=["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"],
)
def test_previous_weekday(d, weekday, expected):
assert previous_weekday(d, weekday) == expected