-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathget_verified.py
65 lines (48 loc) · 2.06 KB
/
get_verified.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Retrieves list of verified Twitter usernames (for preprocessing).
Based on https://raw.githubusercontent.com/twitterdev/Twitter-API-v2-sample-code/main/Follows-Lookup/followers_lookup.py
"""
import os
import json
import time
import requests
from datetime import datetime
# To set your environment variables in your terminal run the following line:
# export 'BEARER_TOKEN'='<your_bearer_token>'
bearer_token = os.environ.get("BEARER_TOKEN")
def bearer_oauth(r):
"""
Method required by bearer token authentication.
"""
r.headers["Authorization"] = f"Bearer {bearer_token}"
r.headers["User-Agent"] = "v2FollowersLookupPython"
return r
if __name__ == "__main__":
wait_secs = 60
wait_secs_on_error = 120
endpoint = "https://api.twitter.com/2/users/63796828/following" # @verified
params = {"user.fields": "created_at,verified,public_metrics", "max_results": 500}
users = set()
with open('data/verified_users.jl', 'w') as out_f:
while True:
try:
response = requests.request("GET", endpoint, auth=bearer_oauth, params=params)
except:
print(datetime.now(), "[Request Error] Sleeping %d secs ..." % wait_secs_on_error)
time.sleep(wait_secs_on_error)
continue
if response.status_code != 200:
print(datetime.now(), "Request returned an error: {} {}".format(response.status_code, response.text))
print(datetime.now(), "[Bad Code] Sleeping %d secs ..." % wait_secs_on_error)
time.sleep(wait_secs_on_error)
continue
json_response = response.json()
for user_info in json_response['data']:
out_f.write(json.dumps(user_info)+'\n')
users.add(user_info['username'])
print(datetime.now(), 'Retrieved %d users.' % len(users))
try:
params['pagination_token'] = json_response['meta']['next_token']
except KeyError:
break # end of list
time.sleep(wait_secs)