Skip to content

Commit

Permalink
Use separate queues to fetch fresh vs. old PRs.
Browse files Browse the repository at this point in the history
Closes #15.
  • Loading branch information
JoshRosen committed Aug 31, 2014
1 parent 1e737c5 commit b5b44b8
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 2 deletions.
9 changes: 8 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
from dateutil.parser import parse as parse_datetime
from dateutil import tz
from datetime import datetime
import logging
import os
Expand Down Expand Up @@ -27,8 +29,13 @@ def fetch_and_process(url):
response = raw_request(url, oauth_token=app.config['GITHUB_OAUTH_KEY'])
link_header = parse_link_header(response.headers.get('Link', ''))
prs = json.loads(response.content)
now = datetime.utcnow()
for pr in prs:
taskqueue.add(url="/tasks/update-issue/%i" % pr['number'])
updated_at = \
parse_datetime(pr['updated_at']).astimezone(tz.tzutc()).replace(tzinfo=None)
is_fresh = (now - updated_at).total_seconds() < app.config['FRESHNESS_THRESHOLD']
queue_name = ("fresh-prs" if is_fresh else "old-prs")
taskqueue.add(url="/tasks/update-issue/%i" % pr['number'], queue_name=queue_name)
for link in link_header.links:
if link.rel == 'next':
fetch_and_process(link.href)
Expand Down
11 changes: 10 additions & 1 deletion queue.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
queue:
- name: default
# Queue used for fetching recently-updated PRs.
# Has a faster processing rate so that the site stays up-to-date.
- name: fresh-prs
rate: 100/m
bucket_size: 100
max_concurrent_requests: 100
# Queue used for fetching older PRs.
# Has a lower processing rate so that bulk-loading doesn't exhaust
# the GitHub API call rate-limit.
- name: old-prs
rate: 2500/h # GitHub's default limit is 5000 requests per hour
bucket_size: 20
max_concurrent_requests: 20
6 changes: 6 additions & 0 deletions settings.cfg.template
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
# OAuth key used for issuing requests against GitHub API
GITHUB_OAUTH_KEY = ''

# Threshold (in seconds) used for classifying "fresh" pull requests.
# If a pull request has been updated within the last FRESHNESS_THRESHOLD
# seconds, then updates for it are fetched using a task queue with a higher
# processing rate.
FRESHNESS_THRESHOLD = 60 * 60 * 24

0 comments on commit b5b44b8

Please sign in to comment.