Skip to content
This repository has been archived by the owner on May 27, 2024. It is now read-only.

Muser Data App #2

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@

# Built application files
*.apk
*.ap_

# Environment files
*.env

# Files for the ART/Dalvik VM
*.dex

# Java class files
*.class

# Generated files
bin/
gen/
out/

# Gradle files
.gradle/
build/

# Local configuration file (sdk path, etc)
local.properties

# Proguard folder generated by Eclipse
proguard/

# Log Files
*.log

# Android Studio Navigation editor temp files
.navigation/

# Android Studio captures folder
captures/

# IntelliJ
*.iml
.idea/

# Keystore information
keystore.properties

# External native build folder generated in Android Studio 2.2 and later
.externalNativeBuild

# Google Services (e.g. APIs or Firebase)
google-services.json

# Windows thumbnail db
.DS_Store

# Crashlytics
/app/crashlytics.properties

# Private API keys
/private.properties

# Onesky ruby files
/onesky_download.rb
/onesky_upload.rb

# Gradle GUI config
gradle-app.setting

# Private deployment keys
/deployment_keys.json
/secrets.tar
# Created by .ignore support plugin (hsz.mobi)
### Java template
# Compiled class file
*.class

# Log file
*.log

# BlueJ files
*.ctxt

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries


# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/
*.iml

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
target/
dependency-reduced-pom.xml

# Output data
*.csv
*.kml
*.kmz
*.zip
*.xls
*.xlsx
10 changes: 10 additions & 0 deletions AI/ETL.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import os
os.chdir(r'C:\Users\sriva\Desktop\edu.usf.sas.pal.muser\SpotifyDataExtractor')

class ETL:
def __init__(self, conn):
self.conn = conn

def build_final_table(self):
with self.conn.begin() as conn:
conn.execute('exec dbo.BLD_SPOTIFY_DATA')
1 change: 1 addition & 0 deletions AI/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Binary file added AI/__pycache__/ETL.cpython-37.pyc
Binary file not shown.
Binary file added AI/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file added AI/__pycache__/models.cpython-37.pyc
Binary file not shown.
Binary file added AI/__pycache__/muserdatabuilder.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file added AI/d2v.model
Binary file not shown.
56 changes: 56 additions & 0 deletions AI/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pandas as pd
from textslack.textslack import TextSlack
from gensim.models import doc2vec
import os

os.chdir(r'C:\Users\sriva\Desktop\edu.usf.sas.pal.muser\SpotifyDataExtractor')

class NLPModel:
def __init__(self, sp, conn, max_epochs=100, vec_size=50, alpha=0.025):
self.sp = sp
self.conn = conn
self.slack = TextSlack(variety='BrE', lang='english')
self.max_epochs = max_epochs
self.vec_size = vec_size
self.alpha = alpha
self.df = pd.read_sql_table('SPOTIFY_DATA', con=self.conn)

def _create_tagged_document(self, list_of_list_of_words):
for i, list_of_words in enumerate(list_of_list_of_words):
yield doc2vec.TaggedDocument(list_of_words, [i])

def _training_data(self):
key_features = (self.df['album'] + ' ' + self.df['name'] + ' ' + self.df['artist']).tolist()
cleaned_key_features = self.slack.transform(key_features)
list_list_words = [sent.split() for sent in cleaned_key_features]
return list_list_words

def build_model(self):
list_list_words = self._training_data()
train_data = list(self._create_tagged_document(list_list_words))
model = doc2vec.Doc2Vec(size=self.vec_size,
alpha=self.alpha,
min_alpha=0.00025,
min_count=1,
dm=1)
model.build_vocab(train_data)
for epoch in range(self.max_epochs):
print('iteration {0}'.format(epoch))
model.train(train_data,
total_examples=model.corpus_count,
epochs=model.iter)
# decrease the learning rate
model.alpha -= 0.0002
# fix the learning rate, no decay
model.min_alpha = model.alpha
model.save('d2v.model')
print("Model Saved")

def most_similar_doc(self, target):
model = doc2vec.Doc2Vec.load('d2v.model')
model.random.seed(95)
cleaned_target = self.slack.transform(target).split()
pred_vector = model.infer_vector(cleaned_target)
sim_vector = model.docvecs.most_similar([pred_vector])
pred_index = sim_vector[0][0]
return self.df.loc[pred_index, self.df.columns[6:-1]]
61 changes: 61 additions & 0 deletions AI/muserdatabuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import pandas as pd
import os
from AI.models import NLPModel

os.chdir(r'C:\Users\sriva\Desktop\edu.usf.sas.pal.muser\SpotifyDataExtractor')

class MuserDataBuilder:
def __init__(self, sp, conn):
self.sp = sp
self.conn = conn
self.df = pd.read_csv('music-analysis.csv')

def build_muser_data(self):
self.df['acousticness'] = '' * self.df.shape[0]
self.df['danceability'] = '' * self.df.shape[0]
self.df['energy'] = '' * self.df.shape[0]
self.df['instrumentalness'] = '' * self.df.shape[0]
self.df['liveness'] = '' * self.df.shape[0]
self.df['loudness'] = '' * self.df.shape[0]
self.df['speechiness'] = '' * self.df.shape[0]
self.df['tempo'] = '' * self.df.shape[0]
self.df['valence'] = '' * self.df.shape[0]
self.df['popularity'] = '' * self.df.shape[0]
for idx in self.df.index:
album = self.df.loc[idx, 'song_album_name']
track = self.df.loc[idx, 'song_name']
artist = self.df.loc[idx, 'song_artist_name']
query = 'album:{} track:{} artist:{}'.format(album, track, artist)
spotify_search = self.sp.search(query, limit=1, offset=0, type='track', market=None)
if len(spotify_search['tracks']['items']) > 0:
track_uri = spotify_search['tracks']['items'][0]['uri']
audio_features = self.sp.audio_features(track_uri)[0]

self.df.loc[idx, 'acousticness'] = audio_features['acousticness']
self.df.loc[idx, 'danceability'] = audio_features['danceability']
self.df.loc[idx, 'energy'] = audio_features['energy']
self.df.loc[idx, 'instrumentalness'] = audio_features['instrumentalness']
self.df.loc[idx, 'liveness'] = audio_features['liveness']
self.df.loc[idx, 'loudness'] = audio_features['loudness']
self.df.loc[idx, 'speechiness'] = audio_features['speechiness']
self.df.loc[idx, 'tempo'] = audio_features['tempo']
self.df.loc[idx, 'valence'] = audio_features['valence']
self.df.loc[idx, 'popularity'] = self.sp.track(track_uri)['popularity']
else:
target = album + ' '+ track + ' ' + artist
nlp_model = NLPModel(self.sp, self.conn)
audio_features = nlp_model.most_similar_doc(target)
self.df.loc[idx, 'acousticness'] = audio_features['acousticness']
self.df.loc[idx, 'danceability'] = audio_features['danceability']
self.df.loc[idx, 'energy'] = audio_features['energy']
self.df.loc[idx, 'instrumentalness'] = audio_features['instrumentalness']
self.df.loc[idx, 'liveness'] = audio_features['liveness']
self.df.loc[idx, 'loudness'] = audio_features['loudness']
self.df.loc[idx, 'speechiness'] = audio_features['speechiness']
self.df.loc[idx, 'tempo'] = audio_features['tempo']
self.df.loc[idx, 'valence'] = audio_features['valence']
self.df.loc[idx, 'popularity'] = audio_features['popularity']

self.df.to_csv('music-analysis.csv')


Loading