Skip to content

Commit

Permalink
updates to script to exclude non-songs
Browse files Browse the repository at this point in the history
  • Loading branch information
shaynak committed Dec 23, 2020
1 parent f115cd0 commit a0f3b4e
Show file tree
Hide file tree
Showing 2 changed files with 10,737 additions and 13,081 deletions.
11 changes: 10 additions & 1 deletion scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def sort_songs_by_album(genius, songs, existing_songs=[]):
if 'album' in song_data and song_data['lyrics_state'] == 'complete':
album_name = song_data['album']['name'].strip() if song_data['album'] else None
lyrics = genius.lyrics(song_data['url'])
if lyrics and album_name:
if lyrics and album_name and is_song(lyrics):
s = Song(genius, song_data, lyrics)
if album_name not in songs_by_album:
songs_by_album[album_name] = []
Expand Down Expand Up @@ -84,5 +84,14 @@ def albums_to_songs_csv(songs_by_album, existing_df=None):
song_df = pd.concat([existing_df, song_df])
song_df.to_csv(CSV_PATH, index=False)

def is_song(lyrics):
if lyrics[:len('[Intro')] == '[Intro':
return True
elif lyrics[:len('[Verse')] == '[Verse':
return True
elif lyrics[:len('[Chorus')] == '[Chorus':
return True
return False

if __name__ == '__main__':
main()
Loading

0 comments on commit a0f3b4e

Please sign in to comment.