forked from MiguelSR/metal-scraper
-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
37 lines (28 loc) · 1.08 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from scrapy.signalmanager import dispatcher
from metal_scraper.spiders.steelspider import SteelSpider
from metal_scraper.spiders.ironspider import run
import logging
log = logging.getLogger('runner')
log.setLevel(logging.DEBUG)
logging.getLogger("chardet.charsetprober").setLevel(logging.INFO)
def spider_results(spider):
results = []
def crawler_results(signal, sender, item, response, spider):
log.debug(f'len(results): {len(results)}')
if len(results) == 100: #1 page of results 10 for testing
log.debug("dumping results")
run(results)
results.clear()
results.append(item)
dispatcher.connect(crawler_results, signal=signals.item_passed)
process = CrawlerProcess(get_project_settings())
process.crawl(spider)
process.start(
) # the script will block here until the crawling is finished
return results
if __name__ == '__main__':
res = spider_results(SteelSpider)
run(res)