-
Notifications
You must be signed in to change notification settings - Fork 0
/
billboardSpider.py
33 lines (26 loc) · 1.02 KB
/
billboardSpider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import scrapy
from scrapy.crawler import CrawlerProcess
class BillboardItem(scrapy.Item):
name = scrapy.Field()
artist = scrapy.Field()
class BillboardSpider(scrapy.Spider):
name = 'billboard'
start_urls = ['https://www.billboard.com/charts/hot-100']
headers = {
'user-agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
}
custom_settings = {
'FEED_FORMAT' : 'json',
'FEED_URI' : 'Resources/web scraping/billboard/result.json'
}
def parse(self, response, **kwargs):
item = BillboardItem()
result = response.css('.chart-element__information')
for x in result:
item['name'] = x.css('.text--truncate.color--primary').css('::text').extract_first()
item['artist'] = x.css('.text--truncate.color--secondary').css('::text').extract_first()
yield item
if __name__ == '__main__':
process = CrawlerProcess()
process.crawl(BillboardSpider)
process.start()