-
Notifications
You must be signed in to change notification settings - Fork 4
/
step1_search_by_location.py
executable file
·105 lines (81 loc) · 2.87 KB
/
step1_search_by_location.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: ai ts=4 sts=4 et sw=4 nu
import os
import json
import logging
import copy
import requests
from requests.auth import HTTPBasicAuth
countries = json.load(open('africa_data.json'))
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
output = 'step1.json'
oneMinute = 60
oneHour = oneMinute * 60
minRemainingToStop = 30
reqs = 0
reqsLimit = None
reqsRemaining = None
headers = {}
TOKEN_AUTH = HTTPBasicAuth(GITHUB_TOKEN, "x-oauth-basic")
allUsers = []
def addTo(searchTerm, allUsers, countryStub, city=None):
def usersFrom(location):
complete = False
page = 1
users = []
order = 'asc'
while not complete:
if page > 10:
# well, we can't query anymore.
if order == 'desc':
complete = True
continue
order = 'desc'
page = 1
req = requests.get(
'https://api.github.com/legacy/user/search/location:%s' %
location,
headers=headers, params={'start_page': page,
'sort': 'joined',
'order': order},
auth=TOKEN_AUTH)
page += 1
try:
jsusers = json.loads(req.content).get('users')
if not len(jsusers):
complete = True
continue
users += jsusers
except:
logger.warning("Failed to parse JSON:")
logger.warning(req.content)
complete = True
return users
jsonUsers = usersFrom(searchTerm)
if not len(jsonUsers):
return
for user in jsonUsers:
logger.info("FOUND -- %s -- %s" % (user.get('username'),
user.get('location')))
user.update({'country': countryStub,
'city': city})
allUsers.append(user)
for countryCode, country in countries.items():
logger.info("COUNTRY: %s" % country.get('name'))
countryStub = copy.copy(country)
countryStub.update({'code': countryCode})
del(countryStub['patterns'])
for city in country.get('patterns', []):
logging.info("SEARCHING for city -- %s" % city.get('name'))
for searchName in city.get('patterns', [city.get('name')]):
addTo(searchName, allUsers, countryStub, city)
for name in country.get('names', []):
logging.info("SEARCHING for country -- %s" % name)
addTo(name, allUsers, countryStub, None)
logger.info("Found %d records" % len(allUsers))
json.dump(allUsers, open(output, 'w'), indent=4)
logger.info("UNIQUE user accounts: %d" %
len(list(set([u.get('username') for u in allUsers]))))