forked from guoguo12/billboard-charts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
billboard.py
executable file
·290 lines (249 loc) · 10.9 KB
/
billboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/usr/bin/env python
import datetime
import json
import sys
from urlparse import urlparse
from bs4 import BeautifulSoup
import requests
"""billboard.py: Unofficial Python API for accessing ranking charts from Billboard.com."""
__author__ = "Allen Guo"
__license__ = "MIT"
__maintainer__ = "Allen Guo"
__email__ = "[email protected]"
HEADERS = {
'User-Agent': 'billboard.py (https://github.com/guoguo12/billboard-charts)'}
IS_PY2 = sys.version_info.major == 2
class ChartEntry:
"""Represents an entry (typically a single track) on a chart.
Attributes:
title: The title of the track.
artist: The name of the track artist, as formatted on Billboard.com.
If there are multiple artists and/or featured artists, they will
be included in this string.
peakPos: The track's peak position on the chart, as an int.
lastPos: The track's position on the previous week's chart, as an int.
This value is 0 if the track has never been on the chart before.
weeks: The number of weeks the track has been on the chart.
This value is 1 if the track is new on the chart.
rank: The track's current position on the chart.
change: A string indicating how the track's position has changed since
the previous week. See web documentation for details.
spotifyID: The Spotify ID of the track, or an empty string if it was
not provided. This can be used to access more information about the
track via the Spotify Web API.
spotifyLink: The Spotify embed URL of the track, generated from the
spotifyID. Will be an empty string if no such ID was provided.
videoLink: The video URL of the track. Will be an empty string if no
such URL was provided.
billboardArtistID: The id parsed from the url for the artist page or
an empty string if not found.
"""
def __init__(self, title, artist, peakPos, lastPos, weeks, rank, change, spotifyID, spotifyLink, videoLink, billboardArtistID):
"""Constructs a new ChartEntry instance with given attributes.
"""
self.title = title
self.artist = artist
self.peakPos = peakPos
self.lastPos = lastPos
self.weeks = weeks
self.rank = rank
self.change = change
self.spotifyLink = spotifyLink
self.spotifyID = spotifyID
self.videoLink = videoLink
self.billboardArtistID = billboardArtistID
def __repr__(self):
"""Returns a string of the form 'TITLE by ARTIST'.
"""
s = u"'%s' by %s" % (self.title, self.artist)
if sys.version_info.major < 3:
return s.encode(getattr(sys.stdout, 'encoding', '') or 'utf8')
else:
return s
def to_JSON(self):
"""Returns the entry as a JSON string.
This is useful for caching.
"""
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
class ChartData:
"""Represents a particular Billboard chart for a particular date.
"""
def __init__(self, name, date=None, fetch=True, all=False, quantize=True):
"""Constructs a new ChartData instance.
By default, this constructor will download the requested data from
Billboard.com by calling fetchEntries().
Args:
name: The chart name, e.g. 'hot-100' or 'pop-songs'.
You can browse the Charts section of Billboard.com to find
valid chart names; the URL of a chart will look like
"http://www.billboard.com/charts/CHART-NAME".
date: The chart date as a string, in YYYY-MM-DD format.
By default, the latest chart is fetched.
If this argument is invalid and the date is not quantized (see
below), no exception will be raised; instead, the chart will
contain no entries.
fetch: A boolean indicating whether to fetch the chart data from
Billboard.com immediately (at instantiation time).
If False, the chart data can be populated at a later time
using the fetchEntries() method.
all: Deprecated; has no effect.
quantize: A boolean indicating whether or not to round the
date argument to the nearest date with a chart entry.
"""
self.name = name
self.previousDate = None
if date:
self.date = self._quantize_date(date) if quantize else date
self.latest = False
else:
self.date = None
self.latest = True
self.entries = []
if fetch:
self.fetchEntries(all=all)
def __repr__(self):
"""Returns the chart as a human-readable string (typically multi-line).
"""
if self.latest:
s = '%s chart (current)' % self.name
else:
s = '%s chart from %s' % (self.name, self.date)
s += '\n' + '-' * len(s)
for n, entry in enumerate(self.entries):
s += '\n%s. %s (%s)' % (entry.rank, str(entry), entry.change)
return s
def __getitem__(self, key):
"""Returns the (key + 1)-th chart entry; i.e., chart[0] refers to the
song at the No. 1 (top) position on the chart.
"""
return self.entries[key]
def __len__(self):
"""Returns the number of entries in the chart.
A length of zero may indicated a failed/bad request.
"""
return len(self.entries)
def _quantize_date(self, date):
"""Quantizes the passed date to the nearest Saturday, since
Billboard charts are always dated by Saturday.
This behavior is consistent with the website, even though charts
are released 11 days in advance.
E.g., entering 2016-07-19 corresponds to the chart dated 2016-07-23.
Args:
date: The chart date as a string, in YYYY-MM-DD format.
"""
assert any(isinstance(date, x) for x in (str, datetime.date)
), 'Provided date must be str or datetime.date'
if isinstance(date, str):
year, month, day = map(int, date.split('-'))
passedDate = datetime.date(year, month, day)
else:
passedDate = date
passedWeekday = passedDate.weekday()
if passedWeekday == 5: # Saturday
return date
elif passedWeekday == 6: # Sunday
quantizedDate = passedDate + datetime.timedelta(days=6)
else:
quantizedDate = passedDate + \
datetime.timedelta(days=5 - passedWeekday)
return str(quantizedDate)
def to_JSON(self):
"""Returns the entry as a JSON string.
This is useful for caching.
"""
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
def fetchEntries(self, all=False):
"""GETs the corresponding chart data from Billboard.com, then parses
the data. Makes use of BeautifulSoup.
"""
if self.latest:
url = 'http://www.billboard.com/charts/%s' % (self.name)
else:
url = 'http://www.billboard.com/charts/%s/%s' % (
self.name, self.date)
html = downloadHTML(url)
soup = BeautifulSoup(html, 'html.parser')
prevLink = soup.find('a', {'title': 'Previous Week'})
if prevLink:
# Extract the previous date from the link.
# eg, /charts/country-songs/2016-02-13
self.previousDate = prevLink.get('href').split('/')[-1]
if IS_PY2 and not isinstance(self.previousDate, str):
self.previousDate = self.previousDate.encode('utf8')
currentTime = soup.find('time')
if currentTime:
# Extract the previous date from the link.
# eg, /charts/country-songs/2016-02-13
self.date = currentTime.get('datetime')
for entrySoup in soup.find_all('article', {'class': 'chart-row'}):
# Grab title and artist
basicInfoSoup = entrySoup.find('div', 'chart-row__title')
title = basicInfoSoup.find(class_='chart-row__song').string.strip()
artist_tag = basicInfoSoup.find(class_='chart-row__artist')
artist = artist_tag.string.strip()
# parse billboard artist id from link
billboardArtistID = ''
href = artist_tag.get('href')
if href:
path = urlparse(href).path
if path.startswith('/artist/'):
billboardArtistID = path.split('/')[2]
def getRowValue(rowName):
selector = 'div.chart-row__' + rowName + ' .chart-row__value'
return entrySoup.select_one(selector).string.strip()
# Grab week data (peak rank, last week's rank, total weeks on
# chart)
peakPos = int(getRowValue('top-spot'))
lastPos = getRowValue('last-week')
lastPos = 0 if lastPos == '--' else int(lastPos)
weeks = int(getRowValue('weeks-on-chart'))
# Get current rank
rank = int(
entrySoup.select_one('.chart-row__current-week').string.strip())
change = lastPos - rank
if lastPos == 0:
# New entry
if weeks > 1:
# If entry has been on charts before, it's a re-entry
change = "Re-Entry"
else:
change = "New"
elif change > 0:
change = "+" + str(change)
else:
change = str(change)
# Get spotify link for this track
spotifyID = entrySoup.get('data-spotifyid')
if spotifyID:
spotifyLink = "https://embed.spotify.com/?uri=spotify:track:" + \
spotifyID
else:
spotifyID = ''
spotifyLink = ''
videoElement = entrySoup.find('a', 'chart-row__link--video')
if videoElement:
videoLink = videoElement.get('data-href')
else:
videoLink = ''
self.entries.append(
ChartEntry(title, artist, peakPos,
lastPos, weeks, rank, change,
spotifyID, spotifyLink, videoLink, billboardArtistID))
# Hot Shot Debut is the top-ranked new entry, or the first "New" entry
# we find.
for entry in self.entries:
if entry.change == "New":
entry.change = "Hot Shot Debut"
break
def downloadHTML(url):
"""Downloads and returns the webpage with the given URL.
Returns an empty string on failure.
"""
assert url.startswith('http://')
req = requests.get(url, headers=HEADERS)
if req.status_code == 200:
return req.text
else:
return ''