-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsegment_withPagination.py
189 lines (174 loc) · 8.2 KB
/
segment_withPagination.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import webapp2
import os
import jinja2
import json
import datetime
import time
import urllib
import urllib2
import soundcloud
import sys
import random
import math
from google.appengine.ext import db
from google.appengine.api import memcache
from secret import client_id, client_secret
template_dir = os.path.dirname(__file__)
jinja_env = jinja2.Environment(loader = jinja2.FileSystemLoader(template_dir),
autoescape = True)
#handler for the jinja2 env. allows us to use templates! c/p this code all you want for other projects
#https://api-v2.soundcloud.com/explore/techno?limit=100&linked_partitioning=1
# stream url: https://api.soundcloud.com/tracks/189594894/stream?client_id=6ec16ffb5ed930fce00949be480f746b&allows_redirect=false#t=50
# comment url: https://api.soundcloud.com/tracks/189594894/comments?client_id=6ec16ffb5ed930fce00949be480f746b
client = soundcloud.Client(client_id=client_id, client_secret=client_secret)
segmentLen = 3
class Handler(webapp2.RequestHandler):
def write(self, *a, **kw):
self.response.out.write(*a, **kw)
def render_str(self, template, **params):
t = jinja_env.get_template(template)
return t.render(params)
def render(self, template, **kw):
self.write(self.render_str(template,**kw))
class SegmentHandler(Handler):
def get(self):
self.write('hello world')
class ReqJSON(db.Model):
genre = db.StringProperty(required = True)
json_str = db.TextProperty(required = True)
created = db.DateTimeProperty(auto_now_add = True)
page = db.IntegerProperty(required = True)
class RandomHandler(Handler):
def get(self, genre1):
genre = urllib.quote(genre1) #to make sure it's url valid
if '/' in genre:
genre, sortOption, page = genre.split('/')
page = int(page)
else:
sortOption = 'random' #hot by default
page = 1 #page 1 by default
if page == '': page = 1
arr = []
comments = []
####
#requirements for a song to be considered
#downloadable, minimum duration (2 min), minimum playbacks (1000), minimum likes (5), minimum comments (5)
#hotness = plays / (time elapsed)^1.2
#store song snippets on box
url = 'https://api-v2.soundcloud.com/explore/' + genre + '?offset=' + str((page-1)*50) + "&tag=out-of-experiment&limit=50" #offset parameter for paging (e.g. offset = (n-1)*limit to get results for nth page)
print url
self.response.headers['Content-Type'] = 'application/json; charset=UTF-8'
# mc_genre = memcache.get('genre')
tracks = memcache.get('tracks_' + genre + "_" + str(page))
tracks_filtered = memcache.get('tracks_filtered_' + genre + "_" + str(page)) #type string or None
lastUpdated = memcache.get('lastUpdated_' + genre + "_" + str(page)) #type string or None
if tracks:
tracks = json.loads(tracks)
filter_change_needed = False
if lastUpdated is None or int(time.time()) - float(lastUpdated) > 3600: #if memcache needs to update bc too old
#url = url + genre + "?limit=50"
req = json.load(urllib2.urlopen(url))
tracks = req.get('tracks')
print req.get('next_href')
memcache.set('tracks_'+genre+"_"+str(page), json.dumps(tracks))
# memcache.set('genre', genre)
memcache.set('lastUpdated_'+genre+"_"+str(page), int(time.time()))
filter_change_needed = True
if tracks_filtered and not filter_change_needed: #if the filtered tracks list exists in memcache and change isn't needed
tracks_filtered = json.loads(tracks_filtered) #convert to list of track objects
elif filter_change_needed: #if memcache needs to update (or not found in memcache)
query = db.GqlQuery('SELECT * FROM ReqJSON') #query db to check if we already did this before
query = list(query)
print "DB QUERY"
in_db = False
tooOld = False #check if db needs to update as well
for q in query:
if q.genre == genre: #if found in db. USE THIS TO IMPLEMENT MULTIPLE GENRE FEATURE
in_db = True
if time.time() - time.mktime(q.created.timetuple()) > 3600: #if the db entry is more than an hour old, delete and refresh
db.get(q.get('__key__')).delete() #delete old entr
tooOld = True
else:
tracks_filtered = json.loads(q.json_str)
if not in_db or tooOld: #if not in db or db needs to be updated(along with memcache), we send http requests, and then store to db
tracks_filtered = [] #going to generate list of track objects
for a in range(len(tracks)):
if tracks[a].get('streamable') == True and \
tracks[a].get('duration') > 120000 and \
tracks[a].get('duration') < 360000 and \
tracks[a].get('commentable') == True and \
tracks[a].get('playback_count') > 1000 and \
tracks[a].get('comment_count') > 5 and \
tracks[a].get('likes_count') > 50: #if this track isn't spam and can be applicable to the app
intrack = {}
startTime = 0
greatestSum = 0
#now we find best part based on comment density
#retrieve comments
#instantiate array with length = length of song in seconds
#parse through comments, increment index of list that it appears in
#parse through array, set starting index as startTime if sum is greater than greatestSum
link = tracks[a].get('uri') + "/comments?client_id=" + client_id
comments = json.load(urllib2.urlopen(link)) #retrieve comments
#are we retrieving comments correctly? sanity check
# for b in range(len(comments)):
# arr.append(comments[b].get('timestamp'))
#okay this works
#calculating startTime based on comment density now
arr = [0] * (int(tracks[a].get('duration')/1000)+10)
for b in range(len(comments)):
if comments[b].get('timestamp') and comments[b].get('timestamp') < len(arr)*1000:
arr[int(comments[b].get('timestamp'))/1000] += 1
for index in range(1,len(arr)-segmentLen):
tempsum = sum(arr[index:(index+segmentLen)])
if tempsum>greatestSum:
greatestSum = tempsum
startTime = index
# how about reddit's hot algorithm? include a hotness attr
# hotness value = log(num_likes * 20*num_comments) + time_elapsed/45000
if tracks[a].get('release_day'):
time_track = datetime.datetime(tracks[a].get('release_year'), tracks[a].get('release_month'), tracks[a].get('release_day'))
else:
time_track = datetime.datetime(2011,5,1)
time_obj = time_track - datetime.datetime(2007, 8, 1)
time_dif = time_obj.days*3600*24 + time_obj.seconds
hotness = math.log(20*len(comments) * tracks[a].get('likes_count'), 10) + time_dif/45000
intrack['hotness'] = hotness
# var title: String
# var id: Int
# var duration: Int
# var stream_url: String
# var start_time: Int
# var permalink_url: String
# // Optional Variables (could be nil if not there)
# var genre: String?
# var subtitle: String?
# var artwork_url: String?
#extracting only the necessary json parts
intrack['start_time'] = startTime*1000
attributes = ['id', 'duration', 'stream_url', 'permalink_url', 'genre', 'description', 'artwork_url', 'title', 'comment_count']
for attr in attributes:
if attr == 'artwork_url': #exception since we want the highest quality album art
intrack[attr] = str(tracks[a].get(attr)).replace('large', 't500x500')
else:
intrack[attr] = tracks[a].get(attr)
tracks_filtered.append(intrack)
track = ReqJSON(genre = genre, json_str=json.dumps(tracks_filtered), page=page) #add to db
track.put()
memcache.set('tracks_filtered_'+genre+"_"+str(page), json.dumps(tracks_filtered))
#now, to return json
#just return tracks_filtered list of objects, each one with an additional start time for most popular segment
#write random function
#tracks = json.load(urllib2.urlopen(url)).get('tracks') #url is hardcoded for now...
# self.write(tracks[random.randint(0,99)].get('id'))
#sort randomly (shuffle)
if tracks_filtered and sortOption == 'random':
random.shuffle(tracks_filtered)
#or sort based on reddit's hot algorithm?
elif tracks_filtered:
tracks_filtered.sort(key=lambda x: x.get('hotness'), reverse=True)
self.write(json.dumps(tracks_filtered))
#self.write("This should spit out a random song")
class APIHandler(Handler):
def get(self, inp):
self.write(inp)