-
Notifications
You must be signed in to change notification settings - Fork 0
/
author.py
388 lines (351 loc) · 14.8 KB
/
author.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
"""
Review-writing module for MovieGuide
"""
import re, urllib, random
from datetime import date
import jsonapi, wikidata, wikipedia
def grouped_num(num, char=',', size=3):
"""Impose digit grouping on integer num"""
my_str = str(int(num))
out = []
i = len(my_str)
while i >= size:
out.append(my_str[i-size:i])
i -= size
if i > 0:
assert i < size
out.append(my_str[0:i])
return char.join(reversed(out))
# From snudown, &()- removed; only escape . after a digit.
MARKDOWN_SPECIAL_RE = re.compile(r'[\\`*_{}\[\]#+!:|<>/^~]|(?<=\d)\.',
flags=re.UNICODE)
def escape_markdown(data):
"""Escape characters with special meaning in Markdown."""
def _replacement(match):
"""Backslash-escape all characters matching MARKDOWN_SPECIAL_RE."""
return '\\' + match.group(0)
return MARKDOWN_SPECIAL_RE.sub(_replacement, data)
QV_RE = re.compile(r"(?:'([^']+?)(?: \([A-Z]+\))?'|_([^_]+?)_) ?\(qv\)",
flags=re.UNICODE)
def strip_qv(data):
"""Remove IMDb's qv-linking."""
def _replacement(match):
"""Return first or second group from QV_RE."""
return match.group(1) or match.group(2)
return QV_RE.sub(_replacement, data)
def imdb_url(movie):
"""Build a URL to the IMDb page of a movie object."""
return 'http://www.imdb.com/Title?%s' % \
(urllib.quote_plus(movie['title'].encode('iso-8859-1')),)
# Transformations to apply to certificate strings
def certificate_usa(text):
"""Transform USA rating as appropriate for display."""
if text.startswith('TV'):
url = 'TV_Parental_Guidelines#Ratings'
else:
url = 'Motion_Picture_Association_of_America_film_rating_system#Ratings'
return '[USA:%s](https://en.wikipedia.org/wiki/%s)' % \
(escape_markdown(text), url)
CERTIFICATE_FUNCS = {
'USA': certificate_usa,
}
YEAR_RE = re.compile(r'.*\(([0-9]+)[/\)]', flags=re.UNICODE)
# Invent plots
def invent_plot(movie):
"""Invent a plot summary if IMDb doesn't provide one."""
def recent_movie(movie):
"""Movie is from this year (or later)"""
# FIXME: January/February treat as previous year
match = YEAR_RE.match(movie['title'])
return (match and int(match.group(1)) >= date.today().year)
if recent_movie(movie):
return "Sorry, I don't have a plot summary for this movie. " + \
"Maybe it's too new."
generic_plot = [
"I have no idea what happens in this movie.",
"I haven't seen this movie; I don't know anything else about it.",
"This is one of those movies where there's nothing helpful " +
"printed on the back of the box.",
]
bad_plot = [
"It looks bad to me, but what do I know: I'm just a bot.",
"Plot? I'm not sure this movie has a plot.",
random.choice(generic_plot) + " But it looks bad."
]
good_plot = [
"People seem to like this movie. But writing plot summaries, " +
"apparently, not so much.",
"I don't know if there's a plot, but I hear it's not a bad movie.",
random.choice(generic_plot) + " But it looks good."
]
if len(movie['cast']) > 8:
temp = random.choice(movie['cast'][6:])
if temp[0] and temp[1] and random.random() < 0.25:
temp = ("Well, I know %s plays %s in it. " +
"I don't know anything else about it.") % \
(temp[0], temp[1])
elif temp[0]:
temp = "Hmm. Well, it has %s in it." % temp[0]
else:
temp = None
if temp:
generic_plot.append(temp)
good_plot.append(temp + " Maybe it's good.")
if len(movie['directors']) == 1 and movie['directors'][0][0]:
temp = 'Directed by %s.' % movie['directors'][0][0]
if 'M. Night Shyamalan' not in temp:
temp += ' Who is not M. Night Shyamalan.'
generic_plot.append(temp)
bad_plot.append(temp + " Maybe that's a bad sign?")
genre_plots = (
(('Action', 'Adventure'), 'Action! Adventure! Really wild things! ' +
'I just wish I knew what those things were....'),
(('Mystery',), "The plot shall remain a mystery."),
(('Romance',), 'Boy meets girl; boy loses girl; boy finds girl again.'
+ " It's a romance; they're all like that."),
(('Documentary',), "It's a documentary. Maybe it's about movies " +
'without plot summaries, or something like that.'),
(('Biography',), "It's a biography. Maybe it's about someone who " +
"writes plot summaries. Or, more appropriately, someone who doesn't."),
(('Short',), "A short film. Maybe it's so short it has no plot."),
(('Short',), "What is this? A film for ants?"),
#(('Thriller',), "'Cause this is thriller, thriller night."),
(('Experimental',), "'Experimental'? What does that even mean?"),
(('Lifestyle',), "'Lifestyle'? Is that a real genre? " +
"What does that even mean?"),
)
for genres, text in genre_plots:
for genre in genres:
if genre in movie['genres']:
generic_plot.append(text)
break
generic_plot += [
"In a world where there is no plot summary...",
"This space intentionally left blank."
]
rating = float(movie["rating"][2])
if movie['certificates'] and 'X' in movie['certificates'][0]:
return "Plot? It's X-rated, it doesn't need a plot."
if rating > 0.1 and rating < 3:
return random.choice(bad_plot)
if rating > 8:
return random.choice(good_plot)
return random.choice(generic_plot)
def munge_name(name):
"""Escape markdown in (and possibly munge) names."""
name = escape_markdown(name)
if name == 'Nicolas Cage':
return '[%s](/r/OneTrueGod)' % name
return name
def write_imdb_vitals(movie):
"""Assemble summary information (title, genres, cast, ...) for a movie."""
temp_list = []
# Compute certificate (film classification)
if movie['certificates'] and movie['certificates'][1] in CERTIFICATE_FUNCS:
certificate_transform = CERTIFICATE_FUNCS[movie['certificates'][1]]
temp_list.append(certificate_transform(movie['certificates'][0]))
# Compute color info
# if movie['color_info']:
# temp_list.append(movie['color_info'])
# Compute running time
if movie['running_time']:
(hrs, mins) = (int(movie['running_time']/60), movie['running_time']%60)
if hrs <= 0:
temp_list.append("%d min" % (mins,))
else:
temp_list.append("%d h %d min" % (hrs, mins,))
# FIRST LINE: title, IMDb link
extra_info = ', '.join(temp_list)
if extra_info:
extra_info = '['+extra_info+']'
url = imdb_url(movie)
review = '### **[%s](%s)**\n\n' % (escape_markdown(movie['title']), url)
# OPTIONAL: actual title, if not original title, that was the best match
if 'aka' in movie and movie['aka']:
review += ' a.k.a. **%s**\n\n' % \
(escape_markdown(movie['aka']),)
# SECOND LINE: genres and extra info (certificate, running time)
if movie['genres']:
review += ', '.join(escape_markdown(g) for g in movie['genres'])
else:
review += 'Unclassified'
review += ' %s \n' % (extra_info,)
# THIRD LINE: Cast, directors, writers.
names_strs = [', '.join(munge_name(i[0]) for i in movie[field][:4])
for field in 'cast', 'directors', 'writers']
if movie['cast']:
review += names_strs[0] + " \n" # Cast
if movie['directors']:
plural = 'Director' if len(movie['directors']) == 1 else 'Directors'
review += "%s: %s" % (plural, names_strs[1])
# if movie['writers']:
# if movie['directors']:
# review += ' \n'
# plural = 'Writer' if len(movie['writers']) == 1 else 'Writers'
# review += "%s: %s" % (plural, names_strs[2])
return {'vitals': review, 'IMDb_url': url}
def write_imdb_plot(movie):
"""Assemble IMDb rating and plot summary for a movie."""
review = {}
# Compute star rating
rating_int = int(round(float(movie["rating"][2])))
if rating_int > 0:
rating_str = "[](#movieguide_stars)**" + \
"★" * rating_int + "☆" * (10 - rating_int) \
+ "** **%s**/10 (%s votes)" % \
(movie["rating"][2], grouped_num(movie["rating"][1]))
else:
rating_str = "Unknown; awaiting five votes"
review['rating'] = "**IMDb rating:** %s" % (rating_str,)
# Plot summary
if movie['plot'] and movie['plot'][0]:
review['plot'] = '> ' + escape_markdown(strip_qv(movie['plot'][0])) + \
"\n(*%sIMDb*)" % (movie['plot'][1]+"/" if movie['plot'][1]
else '',)
else:
# Can't find a plot; let's just make something up.
review['invented_plot'] = "> *%s*" % invent_plot(movie)
return review
# Award canonicalization
CANONICAL_AWARD = {
'Razzie Award': 'Golden Raspberry Award',
}
MAJOR_AWARDS = (
'Academy Award', 'Golden Globe Award',
'BAFTA Award', 'Golden Raspberry Award'
# FIXME: There's probably a few other awards that deserve special
# attention.
)
def write_freebase_awards(fbdata):
"""Assemble award summary returned from Freebase."""
def summarize_counts(counts):
"""Display [3,2] as '3 wins, 2 nominations' (skipping wins if none)"""
return ("%(1)d wins and %(0)d nominations" if counts[1] else
"%(0)d nominations") % \
{'0': counts[0], '1': counts[1]}
review = ""
# Summarize awards/award nominations
awards = {}
awardval = 0
counts = [0, 0]
for award in fbdata.award_nominations() + [None] + fbdata.awards_won():
if not award:
awardval = 1
continue
awardname = award['award'] if award['award'] else 'Untitled Award'
awardcat = ''
if ' for ' in awardname:
awardname, awardcat = awardname.split(' for ', 1)
if awardname in CANONICAL_AWARD:
awardname = CANONICAL_AWARD[awardname]
if awardname in MAJOR_AWARDS:
awardname = '%s %s' % (award['year'], awardname)
if awardname not in awards:
awards[awardname] = {}
awards[awardname][awardcat] = awardval
else:
counts[awardval] += 1
# Display awards info
if awards:
review += "**Awards:**\n\n"
for award in sorted(awards.keys()):
review += "* **%s** " % (escape_markdown(award),)
if '' not in awards[award] or len(awards[award]) > 1:
review += 'for '
cats = [('%s' if awards[award][k] else '*%s (nominated)*') % \
(escape_markdown(k.strip()),) \
for k in sorted(awards[award].keys())]
review += '; '.join(cats)
review += "\n"
if counts[0]:
review += "* Another %s\n" % summarize_counts(counts)
elif counts[0]:
review += "**Awards:** %s\n" % summarize_counts(counts)
return {'awards': review.strip()}
def write_freebase_xrefs(fbdata):
"""Assemble cross-references from Freebase/Wikidata data."""
urls = {}
for name, url in (("Rotten Tomatoes", fbdata.rotten_tomatoes_url()),
("Metacritic", fbdata.metacritic_url()),
("Netflix", fbdata.netflix_url()),
("Wikidata", fbdata.wikidata_url()),
("Freebase", fbdata.freebase_url())):
key = name + "_url"
if not urls.get(key, None):
urls[key] = url
return urls
def write_wikipedia(wikipedia, wikiurl):
"""Assemble critical reception excerpt from Wikipedia article."""
if not wikiurl:
return {}
article = wikipedia.by_url(wikiurl)
review = {}
if article.get('critical', None):
review['critical'] = "**Critical reception:**\n" + \
"> %s\n(*Wikipedia*)" % (escape_markdown(article['critical']),)
if article.get('summary', None):
review['summary'] = "> %s\n(*Wikipedia*)" % \
(escape_markdown(article['summary']),)
if 'url' in article:
review['Wikipedia_url'] = article['url']
for key in article:
if key.endswith('_url') and not review.get(key, None):
review[key] = article[key]
return review
REVIEW_SECTIONS = ('vitals+rating', 'plot|summary|invented_plot',
'critical+awards', 'links')
CROSSREF_URLS = ('IMDb', 'Wikipedia', 'Rotten Tomatoes', 'Metacritic',
'Netflix', 'Wikidata', 'Freebase')
class Author(object):
"""Class for holding state variables relating to writing reviews."""
def __init__(self, imdburl='http://localhost:8051/imdb', freebasekey=None):
self.imdb = jsonapi.IMDbAPI(imdburl)
self.wikidata = wikidata.WikidataQuery()
self.wikipedia = wikipedia.Wikipedia()
def process_item(self, title, year):
"""Look up an item by title and year and write a review."""
review = {}
try:
# Look up the record for this movie using the IMDb API.
movie = self.imdb.search(title, year=year)
review.update(write_imdb_vitals(movie))
review.update(write_imdb_plot(movie))
except jsonapi.IMDbError:
# Wow; this movie doesn't exist at all.
return (None, None)
# Check IMDb ID for cross-referencing
if 'imdbid' in movie:
# We have an IMDb ID, cross-reference to Freebase
fbdata = self.wikidata.by_imdbid(movie['imdbid'])
if fbdata:
review.update(write_freebase_awards(fbdata))
review.update(write_freebase_xrefs(fbdata))
review.update(write_wikipedia(self.wikipedia,
fbdata.wikipedia_url()))
# A list of links to sources, etc.
review['links'] = 'More info at ' + \
', '.join("[%s](%s)" % (i, review[i+'_url']) for i in CROSSREF_URLS
if i+'_url' in review and review[i+'_url']) + '.'
if review:
buf = []
for i in REVIEW_SECTIONS:
sect = []
for j in i.split('+'):
for k in j.split('|'):
if k in review and review[k]:
sect.append(review[k].strip())
break
sect = '\n\n'.join(i for i in sect if i).strip()
if sect:
buf.append(sect)
return (movie, "\n\n".join(buf) + ' \n')
return (movie, None)
def _main(title):
"""Utility function for command-line testing."""
movie, comment = Author().process_item(title, None)
print '[', invent_plot(movie), ']'
print
print comment
if __name__ == '__main__':
import sys
_main(sys.argv[1])