-
Notifications
You must be signed in to change notification settings - Fork 1
/
isbnsearch.py
executable file
·330 lines (280 loc) · 13.6 KB
/
isbnsearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#!/usr/bin/env python
#
# Algorithm:
#
# * For any item with an ISBN, we want to say:
# - if the sales rank is
#
import os, sys, re, lxml, cgi, unicodedata, locale, time, traceback
import cgitb
import StringIO
from ConfigParser import *
from amazonproduct import *
import urllib2 # for exception handling on timeouts from amazonproduct calls
from datetime import date
cgitb.enable()
import shelve
dat = shelve.open("isbnsearch.dat", writeback=True)
datkey = str(date.fromtimestamp(time.time()).toordinal())
if not dat.has_key(datkey):
dat[datkey] = {'selected': 0, 'unknown': 0, 'rejected': 0}
lowcutoffprice = 500.0 # in pennies US$
highcutoffprice = 1500.0 # in pennies US$
cutoffprice_epsilon = 5.0
salesrankcutoff = 10000000
def dosearch(api, isbn, page):
node = None
try:
idType = "ISBN"
if len(isbn) == 12: idType = "UPC"
node = api.item_lookup(isbn, IdType=idType, SearchIndex="All", MerchantId="All", Condition="All", ResponseGroup="Medium,Offers", OfferPage=page)
#open("/tmp/%s-search.xml" % isbn, "w").write(str(node))
except InvalidParameterValue, e:
if e.args[0] == "ItemId":
pass
else:
raise e
return node
def safe_note(s):
res = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
res = cgi.escape(res, True)
return res.replace('\'', "&rsquot;")
def firstof(lxmlnode, possibleattributes, default="(none)"):
for attr in possibleattributes:
try:
res = lxmlnode.__getattr__(attr)
return res
except AttributeError, e:
pass
return default
#
# rejected, selected or unknown
#
def classifyvalues(values):
if (values[0] > usedcutoffprice):
return "selected"
if (len(values) > 5 and values[4] < usedcutoffprice):
return "rejected"
return "unknown"
def classifyoffersummaries(item):
salesrank = firstof(item, ["SalesRank"], 0)
results = {"lowused": None,
"lowusedfmt": "--",
"lownew": None,
"lownewfmt": "--",
"lowcollectible": None,
"lowcollectiblefmt": "--",
"totalnew": 0,
"totalused": 0,
"totalcollectible": 0,
"class": "unknown",
"msg": ""}
for offs in (item.OfferSummary):
results["totalnew"] += offs.TotalNew
results["totalused"] += offs.TotalUsed
results["totalcollectible"] += offs.TotalCollectible
try:
lu = int(offs.LowestUsedPrice.Amount)
if (results["lowused"] is None or lu < results["lowused"]):
results["lowused"] = lu
results["lowusedfmt"] = offs.LowestUsedPrice.FormattedPrice
except:
pass
try:
ln = int(offs.LowestNewPrice.Amount)
if (results["lownew"] is None or ln < results["lownew"]):
results["lownew"] = ln
results["lownewfmt"] = offs.LowestNewPrice.FormattedPrice
except:
pass
try:
lc = int(offs.LowestCollectiblePrice.Amount)
if (results["lowcollectible"] is None or lc < results["lowcollectible"]):
results["lowcollectible"] = lc
results["lowcollectiblefmt"] = offs.LowestCollectiblePrice.FormattedPrice
except:
pass
# if we have new items offered for less than 'lowcutoffprice' then we don't want to list
if (results["lownew"] is not None and results["lownew"] < (lowcutoffprice+cutoffprice_epsilon)):
results["class"] = "rejected"
results["msg"] = "rejected because there are new copies for sale at less than $%.2f" % ((lowcutoffprice+cutoffprice_epsilon)/100.0)
# if we have very poor selling items offered for less than 'highcutoffprice' then we don't want to list
if ( (results["lownew"] is not None and results["lownew"] < highcutoffprice) and ((salesrank is None) or (salesrank > salesrankcutoff))):
results["class"] = "rejected"
results["msg"] = "rejected because salesrank is poor and there are new copies for sale at less than $%.2f" % ((highcutoffprice)/100.0)
# if we have more than 20 items used, and lowprice is < a dollar, don't list
if ((results["totalused"] > 20) and results["lowused"] is not None and (results["lowused"] < 100)):
results["class"] = "rejected"
results["msg"] = "rejected because there are more than 20 used copies available and the lowprice is < $1.00"
# if there are no items listed below 'lowcutoffprice' and the salesrank exists and is < 'salesrankcutoff', then DO list
if ((results["lowused"] > (lowcutoffprice+cutoffprice_epsilon) and (results["lownew"] is None or results["lownew"] > (lowcutoffprice+cutoffprice_epsilon))) and (salesrank is not None) and (salesrank < salesrankcutoff)):
results["class"] = "selected"
results["msg"] = "ACCEPTED because low price is > $%.2f and sales rank is < %d" % (((lowcutoffprice+cutoffprice_epsilon)/100.0), salesrankcutoff)
### NEW TODAY ###
# if there are no items listed below 'highcutoffprice' and the salesrank exists and is > 'salesrankcutoff', then DO list
if ((results["lowused"] > (highcutoffprice+cutoffprice_epsilon) and (results["lownew"] is None or results["lownew"] > (highcutoffprice+cutoffprice_epsilon))) and (salesrank is not None) and (salesrank > salesrankcutoff)):
results["class"] = "selected"
results["msg"] = "ACCEPTED because low price is > $%.2f and sales rank is > %d" % (((highcutoffprice+cutoffprice_epsilon)/100.0), salesrankcutoff)
### NEW TODAY ###
# if there is NO sales rank and the prices is > $25, list
if (salesrank is None) and ((results["lowused"] > 2500+cutoffprice_epsilon) and (results["lownew"] is None or results["lownew"] > 2500+cutoffprice_epsilon)):
results["class"] = "selected"
results["msg"] = "ACCEPTED because low price is > $%.2f and there is no sales rank" % 25.00
return results
def formatitem(item, offsresult):
res = StringIO.StringIO()
try:
atr = item.ItemAttributes
author = firstof(atr, ["Author", "Artist", "Creator"])
pub = firstof(atr, ["Publisher", "Label"])
sr = firstof(item, ["SalesRank"], 0)
if author is None or type(author) == type(""):
author = "(no author)"
else:
author = author.text.encode('utf8')
if pub is None or type(pub) == type(""):
pub = "(no publisher)"
else:
pub = pub.text.encode('utf8')
bycond = dict()
rowclass = offsresult["class"]
if dat[datkey].has_key(rowclass):
dat[datkey][rowclass] += 1
else:
dat[datkey][rowclass] = 1
print >>res, "<tr class='%s' title='%s'>" % (rowclass, offsresult["msg"])
print >>res, "<td>"
#print >>res, "<b>", cgi.escape(str(atr.Title), True), "</b><br>ASIN: ", item.ASIN, "<br>by", cgi.escape(str(author)), ",", cgi.escape(str(pub))
print >>res, "<b><a href='%s' target='_blank'>" % item.DetailPageURL, cgi.escape(atr.Title.text.encode('utf8'), True), "</a></b><br>ASIN: ", item.ASIN, "<br>EAN: ", firstof(atr, "EAN"), "<br>ISBN: ", firstof(atr, "ISBN"), "<br>by", cgi.escape(author, True), ",", cgi.escape(pub, True)
print >> res, "</td>"
offs = item.OfferSummary
print >>res, "<td><table>"
print >>res, "<tr><td colspan=2>SalesRank <b>", locale.format("%d", int(sr), True), "</b></td></tr>"
try:
print >>res, "<tr><td align=right>%d N</td><td> >= %s</td></tr>" % (offsresult["totalnew"], offsresult["lownewfmt"])
except:
print >>res, "<tr><td align=right>0 N</td><td></td></tr>"
try:
print >>res, "<tr><td align=right>%d U</td><td> >= %s</td></tr>" % (offsresult["totalused"], offsresult["lowusedfmt"])
except:
print >>res, "<tr><td align=right>0 U</td><td></td></tr>"
try:
print >>res, "<tr><td align=right>%d C</td><td> >= %s</td></tr>" % (offsresult["totalcollectible"], offsresult["lowestcollectiblefmt"])
except:
print >>res, "<tr><td align=right>0 C</td><td></td></tr>"
print >>res, "</table></td>"
print >>res, "<td><table>"
for key in bycond.keys():
print >>res, "<tr><td align=right>%s</td><td>%s</td></tr>" % (key, ' '.join(bycond[key]))
#print >>res, "<br>"
print >>res, "</table></td>"
print >>res, "</tr>"
# debugging, dump the item into the table too
#print >>res, "<tr><td colspan=3>", str(item).replace("\n", "<br>"), "</td></tr>"
except:
res.truncate(0)
print >>res, "<tr><td colspan=3>Unknown exception: ",
traceback.print_exc(None, res) #str(sys.exc_type), str(sys.exc_value), str(sys.exc_traceback),
print >>res, "</td></tr>"
return res.getvalue()
def process_isbns(isbns):
print "<table border='1'>"
print "<tr><th>Item Details</th><th>Offer Summary</th><th>20 Lowest Priced Offers</th></tr>"
for isbn in isbns:
try:
node = dosearch(api, isbn, 1)
except urllib2.URLError, e:
print "<tr><td colspan=3 bgcolor=purple><b>TIMEOUT SEARCHING FOR ISBN: ", isbn, "<br>%s</b></td></tr>" % str(e)
continue
except Exception as e:
print "<tr><td colspan=3 bgcolor=purple><b>EXCEPTION HANDLING ISBN: ", isbn, "<br>%s</b></td></tr>" % str(e)
continue
if node is None:
print "<tr><td colspan=3 bgcolor=yellow><b>INVALID ISBN: ", isbn, "</b></td></tr>"
continue
try:
item = None
# For books with kindle editions, we get one item for the kindle version which *does not* have an Offers attribute
# and another (for the one we actually asked for) which does have it.
# The kindle ISBN is not the same as the book ISBN, so we can distinguish by that, or by the ItemAttributes.Binding, or .Edition
itemsandoffers = []
for i in node.Items.Item:
if i.__dict__.keys().__contains__("Offers"):
item = i
offers = classifyoffersummaries(item)
itemsandoffers.append((item, offers))
# FIXME: ok, in the case where we have multiple itemsandoffers,
# should create a formatted listing using different code, better
# explain what's going on.
(item, offers) = realitemandoffers(itemsandoffers)
if (item is not None):
print formatitem(item, offers)
elif (allselected(itemsandoffers)):
print formatitem(itemsandoffers[0][0], itemsandoffers[0][1])
else:
print "<tr><td colspan=3 bgcolor=red><b>%d ASIN LISTINGS FOR ISBN: " % len(itemsandoffers), isbn, "</b>, not all accept</td></tr>"
except Exception as e:
print "<tr><td colspan=3 bgcolor=yellow><b>EXCEPTION PROCESSING ISBN: ", isbn, ", email [email protected]<br>%s</b></td></tr>" % str(e)
sys.stdout.flush()
print "</table>"
def realitemandoffers(itemsandoffers):
for (item, offers) in itemsandoffers:
if item.ASIN[0] != 'B':
return (item, offers)
return (None, None)
def allselected(itemsandoffers):
for (item, offers) in itemsandoffers:
if offers['class'] == 'rejected': return False
return True
# api = API(os.getenv("AMAZON_ACCESS_KEY"), os.getenv("AMAZON_SECRET_KEY"), "us")
def make_apiobj():
cfg = ConfigParser()
cfg.read("/etc/apache2/amazon.keys")
if not cfg.has_option("keys", "AMAZON_ACCESS_KEY"):
cfg.read("./amazon.keys")
return API(cfg.get("keys", "AMAZON_ACCESS_KEY"), cfg.get("keys", "AMAZON_SECRET_KEY"), "us", cfg.get("keys", "AMAZON_ASSOCIATE_TAG"))
def display_searches(shelf, key):
print "<h3>ISBN searches: last 7 days</h3>"
print "<table border=1>"
datkeyordinal = int(key)
print "<tr><th>date</th><th>(sell, ?, shelve)</th><th>total # isbn searches</th></tr>"
for o in range(datkeyordinal-6, datkeyordinal+1):
d = str(date.fromordinal(o))
v = shelf.get(str(o), dict())
(acc, unkn, rej) = (v.get('selected', 0), v.get('unknown', 0), v.get('rejected', 0))
print "<tr><td><b>%s</b>: </td><td>(<span class='selected'>%d</span>, %d, <span class='rejected'>%d</span>)</td><td><b> %d</b></td></tr>" % (d, acc, unkn, rej, acc+unkn+rej)
print "</table>"
isbnstring = ""
form = cgi.FieldStorage()
api = make_apiobj()
lxml.objectify.enable_recursive_str(True)
locale.setlocale(locale.LC_ALL, 'en_US.utf8')
if __name__ != "main":
print "Content-Type: text/html\n\n"
print "<html><head>"
print "<style type='text/css'>"
print " .rejected { background-color: #FF0000; }"
print " .selected { background-color: #00FF00; }"
print "</style>"
print "</head>\n<body>"
isbns = list()
invalidisbns = dict()
if form.has_key("isbns"):
isbns = form["isbns"].value.split()
process_isbns(isbns)
else:
pass
display_searches(dat, datkey)
#print "ISBN lookups today: ", dat[datkey], "\n"
dat.close()
print "<h3>Enter ISBNs (or UPC from CD/DVD/etc.) 1 per line</h3>"
#print "Listing calculations using:<br>"
#print "low price cutoff: $%.2f<br>" % (lowcutoffprice / 100.0)
#print "high price cutoff: $%.2f<br>" % (highcutoffprice / 100.0)
#print "salesrank cutoff: %d<br>" % salesrankcutoff
#print "<br>"
print "<form method='GET'>"
print "<textarea width='80%' name=isbns rows=20></textarea>"
print "<input type='submit' value='Search'/>"
print "</form>"
print "</body></html>"