-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathfb_scrapper.py
67 lines (54 loc) · 1.96 KB
/
fb_scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import shelve
from fb_posts import FB_SCRAPE
from pg_db import save_scrape_PS, get_time
import time
import os
#import fb_pages
# Function to scrape with.
# Call with group id and whether you want to scrape all the way back 0 or since last scrape 1.
def scrape_groups_pages(page_id, from_time, useKafka, useES):
scrape(page_id, from_time, useKafka, useES)
return "Sucessfully scraped from " + str(from_time) + "for page id " + str(page_id)
# Get to time scrape from
def get_tstamp(page_id, tstamp ,path):
if tstamp is 1:
pageStamp = get_time(page_id)
elif tstamp is 0:
pageStamp = -2180131200
else:
pageStamp=tstamp
return pageStamp
# Save the time the last time page was scraped
def save_shelve(page_id, path):
timestamp = int(time.time())
save_scrape_PS(page_id,timestamp, 123031, 30)
d = shelve.open(path)
d[page_id] = timestamp
d.close()
return "shelve successfully saved at time"
#
def get_access(path):
try:
with open(path, 'r') as f:
app_id =f.readline().strip("\n")
print(app_id)
second_line = f.readline()
app_secret = second_line
access_token = app_id + "|" + app_secret
print(access_token)
except:
print("app.txt not found will now try getting data from environment variables")
access_token = os.environ['FB_ID'] + "|" + os.environ['FB_KEY']
return access_token
def scrape(page_id,tstamp, useKafka, useES):
access_token = get_access('app.txt')
pageStamp = get_tstamp(page_id, tstamp, "save_times")
save_shelve(page_id,'save_times')
scraper = FB_SCRAPE(useKafka, useES, False, False)
scraper.scrapeFacebookPageFeedStatus2(page_id, access_token, pageStamp)
if os.environ.get("COMMENTS") is not None:
scraper.scrapeComments()
return "results saved"
#if __name__ == '__main__':
#group_id = "115285708497149"
#scrape(group_id, 0,scrapeFacebookPageFeedStatus2)