-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgencloud.py
executable file
·578 lines (560 loc) · 31.3 KB
/
gencloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
#!/usr/bin/env python2
# an example of using TopicCloud
from os import path
from wordcloud import WordCloud
import sys
#sys.path.append("C:/Dropbox/topicvec/visualization/")
from TopicCloud import TopicCloud
topics_drugstory = [
[ 26.7, [ ('drugs', 2.028), ('drug', 1.722), ('use', 1.555),
('prescriptions', 1.314), ('generic', 1.307), ('patients', 1.257),
('hospitals', 1.163), ('turing', 1.130), ('treatment', 1.061),
('used', 1.037), ('pharmaceutical', 0.919), ('pharmaceuticals', 0.908),
('health', 0.898), ('medicine', 0.891) ] ],
[ 21.9, [ ('increase', 2.380), ('million', 2.331), ('increases', 1.378),
('raised', 1.303), ('dollars', 1.262), ('year', 1.221),
('percent', 1.163), ('sales', 1.151), ('huge', 1.066),
('money', 1.013), ('pay', 0.988), ('cost', 0.980),
('millions', 0.882), ('hundreds', 0.851) ] ],
[ 16.4, [ ('company', 2.758), ('acquired', 2.038), ('companies', 1.687),
('mr', 1.105), ('manager', 1.087), ('hedge', 1.063),
('business', 0.990), ('filed', 0.949), ('now', 0.903),
('icahn', 0.883), ('sold', 0.845), ('august', 0.828),
('investors', 0.825), ('founder', 0.820) ] ],
[ 10.5, [ ('patients', 1.261), ('treatment', 0.813), ('hospitals', 0.731),
('treat', 0.515), ('patient', 0.506), ('health', 0.470),
('care', 0.448), ('doctors', 0.446), ('treatments', 0.426),
('use', 0.368), ('therapies', 0.366), ('dr', 0.353),
('cancer', 0.348), ('inpatient', 0.338) ] ],
[ 9.9, [ ('price', 4.027), ('prices', 1.786), ('gouge', 0.840),
('priced', 0.715), ('high', 0.479), ('sharply', 0.389),
('year', 0.322), ('stock', 0.315), ('rose', 0.313),
('sales', 0.271), ('mr', 0.267), ('mount', 0.262),
('according', 0.253), ('low', 0.246) ] ]
]
topics_drugstory_8 = [
[ 27.1, [ ('turing', 3.00), ('year', 2.35), ('use', 2.31),
('old', 2.15), ('mr', 2.14), ('generic', 1.99),
('dr', 1.96), ('used', 1.89), ('now', 1.79),
('first', 1.69), ('two', 1.59), ('called', 1.58),
('standard', 1.38), ('manager', 1.38) ] ],
[ 22.8, [ ('drug', 3.52), ('drugs', 2.94), ('patients', 2.85),
('diseases', 2.39), ('hospitals', 2.13), ('toxoplasmosis', 2.00),
('infectious', 1.96), ('treatment', 1.93), ('certain', 1.77),
('treat', 1.61), ('controlled', 1.43), ('effects', 1.39),
('infection', 1.37), ('dangerous', 1.37) ] ],
[ 22.8, [ ('raised', 1.99), ('mount', 1.72), ('sinai', 1.71),
('center', 1.67), ('hedge', 1.66), ('fund', 1.64),
('high', 1.54), ('lawmakers', 1.40), ('marathon', 1.35),
('atlanta', 1.35), ('investors', 1.32), ('sharply', 1.21),
('according', 1.16), ('jumped', 0.97) ] ],
[ 22.1, [ ('price', 4.08), ('increase', 2.73), ('increases', 1.96),
('prices', 1.78), ('distribution', 1.64), ('better', 1.62),
('sales', 1.52), ('prescriptions', 1.47), ('make', 1.45),
('million', 1.40), ('huge', 1.34), ('made', 1.27),
('pay', 1.22), ('cost', 1.19) ] ],
[ 5.2, [ ('acquired', 2.22), ('company', 2.00), ('pharmaceuticals', 1.65),
('companies', 1.52), ('pharmaceutical', 1.17), ('therapeutics', 0.97),
('glaxo', 0.95), ('drugstores', 0.94), ('acquisition', 0.88),
('glaxosmithkline', 0.82), ('sold', 0.72), ('products', 0.59),
('laboratories', 0.39), ('develop', 0.18) ] ]
]
topics_drugstory_kmeans = [
[ 10.0, [ ('company', 2.83), ('year', 2.45), ('dr', 2.24),
('acquired', 2.24), ('raised', 2.00), ('first', 1.73),
('august', 1.73), ('sinai', 1.73), ('center', 1.73),
('mount', 1.73), ('time', 1.41), ('university', 1.41),
('life', 1.41), ('same', 1.41) ] ],
[ 9.0, [ ('now', 2.45), ('mr', 2.24), ('old', 2.24),
('called', 1.73), ('even', 1.73), ('make', 1.73),
('better', 1.73), ('think', 1.41), ('like', 1.41),
('side', 1.41), ('jumped', 1.41), ('ago', 1.41),
('threatening', 1.41), ('trying', 1.41) ] ],
[ 8.0, [ ('federal', 1.41), ('lawmakers', 1.41), ('filed', 1.41),
('chief', 1.41), ('claim', 1.00), ('state', 1.00),
('united', 1.00), ('states', 1.00), ('general', 1.00),
('senator', 1.00), ('district', 1.00), ('public', 1.00),
('former', 1.00), ('president', 1.00) ] ],
[ 5.0, [ ('price', 4.12), ('million', 2.45), ('prices', 2.00),
('companies', 2.00), ('hedge', 1.73), ('sales', 1.73),
('fund', 1.73), ('dollars', 1.41), ('business', 1.41),
('sold', 1.41), ('money', 1.41), ('cost', 1.41),
('pay', 1.41), ('stock', 1.41) ] ],
[ 5.0, [ ('turing', 3.00), ('use', 2.65), ('used', 2.00),
('generic', 2.00), ('controlled', 1.73), ('tablet', 1.41),
('standard', 1.41), ('strategy', 1.41), ('system', 1.00),
('patents', 1.00), ('example', 1.00), ('data', 1.00),
('switch', 1.00), ('systems', 1.00) ] ],
[ 5.0, [ ('made', 1.73), ('long', 1.73), ('huge', 1.41),
('attention', 1.41), ('mainstays', 1.00), ('criticism', 1.00),
('led', 1.00), ('further', 1.00), ('making', 1.00),
('despite', 1.00), ('controversy', 1.00), ('drew', 1.00),
('previous', 1.00), ('overnight', 1.00) ] ],
[ 4.0, [ ('drug', 3.87), ('drugs', 3.16), ('patients', 3.00),
('treatment', 2.24), ('hospitals', 2.24), ('health', 1.73),
('pharmaceuticals', 1.73), ('prescriptions', 1.73), ('therapies', 1.41),
('pharmaceutical', 1.41), ('doctors', 1.41), ('care', 1.41),
('treatments', 1.41), ('medicine', 1.41) ] ],
[ 4.0, [ ('increase', 2.83), ('increases', 2.00), ('certain', 2.00),
('serious', 1.41), ('less', 1.41), ('required', 1.41),
('need', 1.41), ('effects', 1.41), ('dangerous', 1.41),
('discourage', 1.00), ('shortages', 1.00), ('rarely', 1.00),
('potentially', 1.00), ('possibly', 1.00) ] ],
[ 4.0, [ ('according', 2.00), ('two', 1.73), ('high', 1.73),
('distribution', 1.73), ('sharply', 1.41), ('many', 1.41),
('hundreds', 1.41), ('percent', 1.41), ('number', 1.00),
('list', 1.00), ('include', 1.00), ('differently', 1.00),
('copies', 1.00), ('added', 1.00) ] ]
]
topics_sigir = [
[ 15.6, [ ('user', 0.904), ('web', 0.621), ('document', 0.589),
('query', 0.435), ('cursor', 0.429), ('queries', 0.421),
('retrieval', 0.407), ('text', 0.404), ('information', 0.399),
('online', 0.397), ('knowledge', 0.375), ('interface', 0.363),
('click', 0.342), ('collaborative', 0.319) ] ],
[ 13.8, [ ('user', 0.482), ('document', 0.439), ('web', 0.403),
('knowledge', 0.386), ('entity', 0.379), ('retrieval', 0.376),
('information', 0.342), ('collaborative', 0.312), ('queries', 0.308),
('leveraging', 0.306), ('query', 0.302), ('text', 0.291),
('online', 0.285), ('relevance', 0.285) ] ],
[ 13.4, [ ('knowledge', 0.379), ('retrieval', 0.369), ('user', 0.345),
('document', 0.326), ('web', 0.313), ('information', 0.302),
('leveraging', 0.294), ('collaborative', 0.290), ('relevance', 0.290),
('queries', 0.261), ('query', 0.253), ('summarization', 0.247),
('models', 0.242), ('evaluation', 0.239) ] ],
[ 12.9, [ ('fast', 0.459), ('neural', 0.318), ('efficient', 0.316),
('models', 0.310), ('retrieval', 0.304), ('knowledge', 0.248),
('model', 0.237), ('user', 0.233), ('modeling', 0.227),
('relevance', 0.225), ('leveraging', 0.224), ('random', 0.219),
('networks', 0.218), ('selection', 0.217) ] ],
[ 12.8, [ ('embeddings', 1.047), ('graphs', 0.438), ('subspace', 0.366),
('hamming', 0.363), ('random', 0.351), ('quantization', 0.312),
('factorization', 0.297), ('graph', 0.296), ('discrete', 0.291),
('parameterized', 0.290), ('generalized', 0.285), ('math', 0.282),
('neural', 0.275), ('models', 0.269) ] ],
[ 12.3, [ ('retrieval', 0.346), ('neural', 0.279), ('knowledge', 0.274),
('models', 0.272), ('user', 0.250), ('document', 0.245),
('relevance', 0.240), ('information', 0.238), ('queries', 0.236),
('web', 0.235), ('modeling', 0.232), ('query', 0.232),
('summarization', 0.224), ('efficient', 0.222) ] ],
[ 9.4, [ ('retrieval', 0.267), ('user', 0.248), ('web', 0.215),
('knowledge', 0.213), ('efficient', 0.203), ('queries', 0.196),
('query', 0.193), ('neural', 0.193), ('information', 0.192),
('leveraging', 0.180), ('document', 0.180), ('models', 0.175),
('relevance', 0.170), ('collaborative', 0.167) ] ],
[ 7.2, [ ('search', 3.558), ('searching', 0.675), ('finding', 0.278),
('investigation', 0.203), ('retrieving', 0.190), ('exploration', 0.171),
('click', 0.146), ('web', 0.146), ('exploring', 0.124),
('online', 0.113), ('answers', 0.107), ('query', 0.096),
('cache', 0.095), ('knowledge', 0.092) ] ]
]
topics_icml = [
[ 14.4, [ ('models', 1.037), ('neural', 1.033), ('data', 0.966),
('optimization', 0.912), ('efficient', 0.875), ('model', 0.683),
('inference', 0.675), ('analysis', 0.673), ('sampling', 0.601),
('bayesian', 0.594), ('stochastic', 0.572), ('clustering', 0.549),
('estimation', 0.546), ('structured', 0.521) ] ],
[ 14.4, [ ('neural', 1.169), ('models', 1.063), ('data', 0.990),
('efficient', 0.942), ('optimization', 0.882), ('model', 0.692),
('inference', 0.656), ('analysis', 0.648), ('memory', 0.625),
('sampling', 0.563), ('bayesian', 0.559), ('structured', 0.549),
('clustering', 0.540), ('stochastic', 0.527) ] ],
[ 13.9, [ ('optimization', 0.914), ('models', 0.891), ('neural', 0.853),
('data', 0.814), ('efficient', 0.707), ('stochastic', 0.668),
('inference', 0.662), ('analysis', 0.656), ('sampling', 0.623),
('bayesian', 0.615), ('estimation', 0.610), ('model', 0.602),
('clustering', 0.525), ('sparse', 0.517) ] ],
[ 12.9, [ ('convex', 1.166), ('embeddings', 1.128), ('matrix', 1.086),
('tensor', 0.965), ('factorization', 0.825), ('riemannian', 0.770),
('gaussian', 0.762), ('linear', 0.675), ('dimensional', 0.672),
('matrices', 0.665), ('subspace', 0.621), ('nonconvex', 0.582),
('kernel', 0.581), ('gradient', 0.580) ] ],
[ 12.3, [ ('stochastic', 0.772), ('optimization', 0.740), ('estimation', 0.577),
('models', 0.558), ('regression', 0.534), ('sampling', 0.521),
('neural', 0.518), ('sparse', 0.507), ('analysis', 0.493),
('linear', 0.493), ('data', 0.488), ('gaussian', 0.488),
('matrix', 0.485), ('inference', 0.482) ] ],
[ 12.2, [ ('matrix', 0.817), ('convex', 0.702), ('gaussian', 0.692),
('stochastic', 0.652), ('linear', 0.611), ('tensor', 0.580),
('factorization', 0.547), ('gradient', 0.539), ('sparse', 0.535),
('embeddings', 0.534), ('optimization', 0.526), ('variational', 0.523),
('kernel', 0.488), ('dimensional', 0.460) ] ],
[ 11.9, [ ('stochastic', 0.740), ('optimization', 0.685), ('estimation', 0.534),
('matrix', 0.515), ('gaussian', 0.510), ('regression', 0.506),
('models', 0.503), ('linear', 0.498), ('sparse', 0.498),
('sampling', 0.479), ('neural', 0.467), ('variational', 0.453),
('analysis', 0.452), ('gradient', 0.446) ] ],
[ 4.4, [ ('deep', 4.408), ('convolutional', 2.100), ('fast', 0.334),
('exploration', 0.297), ('inner', 0.227), ('memory', 0.192),
('reconstructive', 0.188), ('squeezing', 0.187), ('rectifier', 0.184),
('streams', 0.175), ('faster', 0.167), ('neural', 0.163),
('layers', 0.157), ('hidden', 0.154) ] ]
]
topics_icml_3 = [
[ 14.2, [ ('convex', 1.84), ('embeddings', 1.81), ('rank', 1.66),
('tensor', 1.58), ('matrix', 1.55), ('riemannian', 1.26),
('factorization', 1.25), ('matrices', 1.01), ('dimensional', 1.01),
('subspace', 0.96), ('gaussian', 0.89), ('nonconvex', 0.87),
('doubly', 0.82), ('metric', 0.78) ] ],
[ 13.1, [ ('models', 0.85), ('optimization', 0.85), ('data', 0.78),
('efficient', 0.73), ('neural', 0.71), ('stochastic', 0.63),
('analysis', 0.60), ('inference', 0.59), ('model', 0.58),
('sampling', 0.56), ('estimation', 0.54), ('bayesian', 0.53),
('sparse', 0.51), ('clustering', 0.51) ] ],
[ 13.0, [ ('optimization', 0.84), ('models', 0.82), ('data', 0.74),
('neural', 0.69), ('efficient', 0.68), ('stochastic', 0.65),
('analysis', 0.58), ('inference', 0.58), ('model', 0.56),
('sampling', 0.55), ('estimation', 0.55), ('bayesian', 0.53),
('sparse', 0.51), ('clustering', 0.50) ] ],
[ 12.3, [ ('optimization', 0.78), ('stochastic', 0.70), ('models', 0.69),
('data', 0.61), ('neural', 0.57), ('efficient', 0.56),
('estimation', 0.55), ('analysis', 0.53), ('sampling', 0.53),
('inference', 0.52), ('bayesian', 0.50), ('regression', 0.50),
('sparse', 0.49), ('model', 0.48) ] ],
[ 12.3, [ ('optimization', 0.78), ('models', 0.71), ('stochastic', 0.68),
('data', 0.63), ('neural', 0.59), ('efficient', 0.58),
('estimation', 0.55), ('analysis', 0.54), ('inference', 0.53),
('sampling', 0.53), ('bayesian', 0.50), ('model', 0.49),
('sparse', 0.49), ('regression', 0.49) ] ],
[ 12.0, [ ('optimization', 0.75), ('stochastic', 0.69), ('models', 0.65),
('data', 0.58), ('neural', 0.55), ('estimation', 0.54),
('efficient', 0.53), ('analysis', 0.51), ('sampling', 0.51),
('inference', 0.50), ('regression', 0.49), ('sparse', 0.49),
('bayesian', 0.48), ('model', 0.46) ] ],
[ 11.6, [ ('optimization', 0.72), ('stochastic', 0.69), ('models', 0.62),
('data', 0.54), ('estimation', 0.53), ('neural', 0.51),
('efficient', 0.49), ('sampling', 0.49), ('analysis', 0.49),
('regression', 0.48), ('inference', 0.48), ('sparse', 0.47),
('bayesian', 0.46), ('linear', 0.44) ] ],
[ 7.8, [ ('deep', 4.22), ('convolutional', 2.50), ('neural', 0.97),
('memory', 0.62), ('fast', 0.61), ('faster', 0.38),
('brain', 0.33), ('reconstructive', 0.32), ('rectifier', 0.31),
('efficient', 0.27), ('data', 0.26), ('generative', 0.26),
('simple', 0.25), ('squeezing', 0.25) ] ]
]
topics_icml_5 = [
[ 21.8, [ ('stochastic', 3.88), ('optimization', 3.82), ('rank', 3.21),
('estimation', 2.95), ('gradient', 2.71), ('monte', 2.38),
('gaussian', 2.37), ('variational', 2.35), ('carlo', 2.35),
('regression', 2.27), ('optimal', 2.27), ('approximate', 2.18),
('descent', 1.96), ('approximation', 1.91) ] ],
[ 17.7, [ ('networks', 5.51), ('deep', 4.51), ('efficient', 3.88),
('fast', 3.20), ('bandits', 2.66), ('faster', 2.36),
('search', 2.24), ('online', 2.19), ('network', 2.14),
('bandit', 1.42), ('nystrom', 1.41), ('dueling', 1.37),
('simple', 1.36), ('anytime', 1.36) ] ],
[ 17.3, [ ('inference', 3.05), ('reinforcement', 2.73), ('hierarchical', 2.20),
('generative', 2.13), ('data', 1.98), ('bayesian', 1.56),
('contextual', 1.56), ('clustering', 1.54), ('recurrent', 1.41),
('structured', 1.40), ('conditional', 1.40), ('graphical', 1.35),
('empirical', 1.31), ('analysis', 1.30) ] ],
[ 14.8, [ ('matrix', 3.48), ('convex', 3.04), ('embeddings', 2.42),
('factorization', 2.38), ('kernel', 2.32), ('tensor', 2.21),
('doubly', 1.97), ('dimensional', 1.76), ('matrices', 1.75),
('riemannian', 1.66), ('nonconvex', 1.61), ('subspace', 1.61),
('decomposition', 1.59), ('dual', 1.43) ] ],
[ 11.5, [ ('classification', 2.65), ('policy', 2.47), ('supervised', 2.14),
('evaluation', 2.09), ('training', 1.89), ('cca', 1.73),
('correcting', 1.65), ('testing', 1.47), ('unsupervised', 1.32),
('dropout', 1.21), ('pca', 1.21), ('analysis', 1.19),
('test', 1.17), ('objectives', 1.13) ] ],
[ 8.9, [ ('sparse', 3.69), ('sampling', 3.03), ('low', 2.65),
('noisy', 1.40), ('high', 1.27), ('large', 1.14),
('heavy', 1.10), ('noise', 1.08), ('sample', 1.02),
('mixed', 1.00), ('mixture', 0.96), ('mixing', 0.94),
('variable', 0.92), ('samples', 0.89) ] ],
[ 8.0, [ ('neural', 4.23), ('models', 4.22), ('convolutional', 2.72),
('memory', 2.21), ('model', 1.96), ('block', 1.05),
('data', 0.89), ('architectures', 0.74), ('rectifier', 0.68),
('brain', 0.63), ('motor', 0.58), ('activation', 0.57),
('unlabeled', 0.54), ('processes', 0.52) ] ]
]
topics_ijcai = [
[ 14.7, [ ('logic', 2.911), ('semantics', 2.332), ('logics', 2.042),
('modal', 1.667), ('semantic', 1.534), ('symbolic', 1.471),
('convolutional', 1.394), ('language', 1.384), ('representation', 1.290),
('object', 1.248), ('representations', 1.247), ('reasoning', 1.241),
('abstraction', 1.188), ('calculus', 1.179) ] ],
[ 14.7, [ ('neural', 3.449), ('deep', 2.619), ('networks', 1.994),
('robot', 1.704), ('human', 1.648), ('network', 1.485),
('models', 1.415), ('systems', 1.190), ('model', 1.009),
('machine', 0.969), ('robust', 0.884), ('simulation', 0.830),
('interactive', 0.741), ('facial', 0.697) ] ],
[ 12.0, [ ('planning', 4.458), ('efficient', 2.270), ('task', 2.109),
('plan', 1.939), ('improving', 1.491), ('joint', 1.490),
('strategy', 1.480), ('supervised', 1.259), ('citywide', 1.184),
('recommendations', 1.072), ('policy', 1.071), ('transfer', 1.035),
('scheduling', 1.033), ('repositioning', 1.022) ] ],
[ 11.5, [ ('search', 1.508), ('detection', 1.116), ('recognition', 1.029),
('information', 0.920), ('data', 0.812), ('knowledge', 0.808),
('tracking', 0.783), ('online', 0.736), ('prediction', 0.634),
('identification', 0.515), ('selection', 0.497), ('automatic', 0.490),
('networks', 0.477), ('robust', 0.476) ] ],
[ 11.1, [ ('embeddings', 2.786), ('embedding', 2.720), ('factorization', 2.460),
('matrix', 1.998), ('kernel', 1.758), ('subspace', 1.731),
('graph', 1.645), ('generalized', 1.642), ('norm', 1.620),
('metric', 1.591), ('graphs', 1.535), ('hashing', 1.347),
('convex', 1.258), ('modulo', 1.257) ] ],
[ 9.9, [ ('knowledge', 1.085), ('recognition', 0.814), ('information', 0.705),
('search', 0.592), ('data', 0.591), ('preference', 0.533),
('reasoning', 0.519), ('probabilistic', 0.499), ('query', 0.485),
('selection', 0.476), ('text', 0.455), ('representation', 0.439),
('classification', 0.435), ('elicitation', 0.428) ] ],
[ 9.6, [ ('search', 0.778), ('selection', 0.657), ('clustering', 0.653),
('data', 0.550), ('prediction', 0.549), ('optimization', 0.524),
('information', 0.509), ('preference', 0.508), ('robust', 0.506),
('detection', 0.499), ('efficient', 0.490), ('tracking', 0.465),
('recognition', 0.455), ('optimal', 0.447) ] ]
]
topics_aamas = [
[ 23.4, [ ('social', 2.99), ('cooperation', 2.22), ('security', 1.74),
('voting', 1.61), ('policies', 1.61), ('cooperative', 1.45),
('mechanism', 1.43), ('mechanisms', 1.35), ('networks', 1.30),
('systems', 1.23), ('policy', 1.14), ('behavior', 1.07),
('preferences', 1.04), ('strategy', 0.98) ] ],
[ 22.0, [ ('reinforcement', 2.18), ('networks', 1.38), ('task', 1.29),
('distributed', 1.23), ('modeling', 1.21), ('systems', 1.12),
('scheduling', 1.09), ('online', 1.02), ('planning', 0.88),
('efficient', 0.83), ('dynamic', 0.82), ('decision', 0.79),
('automated', 0.77), ('simulation', 0.76) ] ],
[ 16.9, [ ('equilibria', 2.52), ('nash', 1.58), ('optimal', 1.52),
('equilibrium', 1.27), ('maximization', 1.16), ('stochastic', 1.02),
('models', 0.98), ('matching', 0.95), ('markov', 0.93),
('inverse', 0.91), ('model', 0.89), ('efficient', 0.81),
('constrained', 0.78), ('continuous', 0.77) ] ],
[ 14.2, [ ('argumentation', 2.53), ('logic', 2.03), ('theoretic', 1.89),
('truthful', 1.88), ('proof', 1.80), ('BDI', 1.71),
('hedonic', 1.65), ('reasoning', 1.61), ('epistemic', 1.14),
('boolean', 1.14), ('judgment', 1.07), ('abstract', 0.99),
('empirical', 0.94), ('propositional', 0.92) ] ],
[ 7.8, [ ('stackelberg', 1.68), ('reinforcement', 0.99), ('optimal', 0.32),
('maximization', 0.30), ('continuous', 0.29), ('model', 0.28),
('matching', 0.28), ('decision', 0.27), ('behavior', 0.26),
('dynamic', 0.26), ('modeling', 0.26), ('task', 0.25),
('models', 0.25), ('strategy', 0.24) ] ],
[ 6.2, [ ('robot', 3.13), ('human', 2.96), ('robots', 2.19),
('robotic', 1.97), ('autonomous', 0.99), ('humanoid', 0.97),
('humans', 0.76), ('vehicle', 0.71), ('swarms', 0.71),
('automated', 0.45), ('wheeled', 0.43),
('poachers', 0.42), ('body', 0.38) ] ],
[ 4.3, [ ('games', 5.13), ('game', 1.93), ('teams', 1.62),
('team', 1.05), ('playing', 0.58), ('player', 0.54),
('multiplayer', 0.39), ('players', 0.39), ('atari', 0.34),
('scoring', 0.31), ('competitions', 0.31), ('pac', 0.28),
('points', 0.26), ('winning', 0.25) ] ],
[ 3.5, [ ('agent', 5.52), ('agents', 3.53), ('sobe', 0.40),
('investigating', 0.09), ('intelligence', 0.06), ('customs', 0.05),
('poachers', 0.05), ('assignment', 0.05), ('dealers', 0.05),
('contact', 0.04), ('collusion', 0.04), ('security', 0.04),
('mdp', 0.04), ('anti', 0.04) ] ]
]
topics_trump = [
[ 19.1, [ ('know', 5.49), ('say', 4.62), ('think', 2.94),
('believe', 2.90), ('tell', 2.60), ('happen', 2.39),
('hear', 1.90), ('stupid', 1.75), ('want', 1.75),
('cheerleader', 1.71), ('gonna', 1.65), ('heard', 1.60),
('answer', 1.60), ('never', 1.52) ] ],
[ 17.6, [ ('going', 3.72), ('back', 3.71), ('right', 3.16),
('send', 2.26), ('over', 2.03), ('take', 1.98),
('sending', 1.70), ('money', 1.52), ('running', 1.40),
('bring', 1.37), ('run', 1.29), ('stop', 1.23),
('go', 1.22), ('deal', 1.05) ] ],
[ 16.3, [ ('jobs', 3.27), ('need', 3.11), ('job', 2.08),
('lobbyists', 1.76), ('obamacare', 1.71), ('care', 1.43),
('money', 1.37), ('problems', 1.34), ('vets', 1.30),
('politicians', 1.16), ('going', 1.08), ('problem', 1.08),
('needs', 1.04), ('bring', 1.02) ] ],
[ 11.4, [ ('very', 5.03), ('good', 3.65), ('nice', 3.27),
('big', 2.64), ('like', 2.26), ('rich', 2.02),
('make', 1.36), ('highly', 1.31), ('wonderful', 1.30),
('talented', 1.12), ('kind', 1.09), ('makes', 1.09),
('bad', 1.08), ('proud', 1.08) ] ],
[ 10.0, [ ('building', 3.02), ('build', 2.92), ('ford', 2.80),
('equipment', 2.21), ('built', 2.04), ('car', 1.81),
('manufacturer', 1.49), ('hotel', 1.48), ('tower', 1.32),
('manufacturing', 1.21), ('rebuild', 1.20), ('trucks', 1.20),
('truck', 1.18), ('cars', 1.16) ] ],
[ 9.5, [ ('country', 4.82), ('us', 3.47), ('iraq', 2.96),
('president', 2.21), ('iran', 2.17), ('military', 2.03),
('border', 1.92), ('saudi', 1.82), ('arabia', 1.78),
('mexico', 1.66), ('yemen', 1.55), ('united', 1.49),
('states', 1.44), ('airports', 1.41) ] ],
[ 8.8, [ ('china', 4.31), ('billion', 3.38), ('trillion', 2.60),
('trade', 1.87), ('mexico', 1.51), ('oil', 1.44),
('world', 1.21), ('debt', 1.21), ('worth', 1.09),
('currency', 1.08), ('million', 1.07), ('net', 1.06),
('over', 1.05), ('japan', 1.05) ] ],
[ 4.7, [ ('people', 6.59), ('thousands', 1.89), ('person', 0.60),
('hundreds', 0.57), ('killing', 0.50), ('many', 0.47),
('number', 0.42), ('leaders', 0.37), ('wounded', 0.36),
('millions', 0.31), ('tens', 0.27), ('ago', 0.25),
('crowd', 0.24), ('soldiers', 0.24) ] ]
]
topics_hillary = [
[ 22.9, [ ('people', 2.48), ('million', 2.46), ('now', 2.13),
('years', 2.10), ('country', 1.78), ('back', 1.69),
('working', 1.48), ('time', 1.45), ('today', 1.38),
('thank', 1.36), ('millions', 1.36), ('stop', 1.30),
('decades', 1.26), ('make', 1.24) ] ],
[ 17.6, [ ('right', 2.44), ('rightly', 1.16), ('respect', 1.09),
('fight', 1.05), ('mean', 1.02), ('extremist', 0.91),
('righted', 0.91), ('think', 0.91), ('bemoans', 0.89),
('tarnish', 0.89), ('starker', 0.88), ('hard', 0.88),
('progressive', 0.85), ('statesmanship', 0.84) ] ],
[ 14.3, [ ('corporations', 1.67), ('powerful', 1.46), ('people', 1.09),
('women', 0.89), ('make', 0.77), ('citizens', 0.75),
('care', 0.73), ('interests', 0.72), ('unions', 0.71),
('affordable', 0.70), ('rich', 0.65), ('corporate', 0.65),
('america', 0.64), ('want', 0.64) ] ],
[ 13.5, [ ('court', 6.53), ('supreme', 3.18), ('legal', 2.61),
('case', 2.58), ('justice', 2.28), ('law', 2.23),
('justices', 1.94), ('cases', 1.88), ('ruled', 1.77),
('judge', 1.66), ('courts', 1.63), ('hearing', 1.23),
('decisions', 1.10), ('lawyer', 1.06) ] ],
[ 9.5, [ ('president', 4.90), ('senator', 3.06), ('obama', 2.06),
('senate', 2.04), ('republican', 1.65), ('republicans', 1.41),
('grassley', 1.33), ('election', 1.32), ('john', 1.22),
('barack', 1.17), ('presidency', 1.11), ('former', 1.07),
('united', 1.05), ('governor', 1.01) ] ],
[ 8.7, [ ('women', 0.61), ('people', 0.50), ('care', 0.48),
('citizens', 0.48), ('country', 0.48), ('corporations', 0.47),
('unions', 0.45), ('america', 0.44), ('americans', 0.39),
('act', 0.37), ('health', 0.37), ('nation', 0.37),
('politics', 0.35), ('interests', 0.35) ] ],
[ 6.6, [ ('trump', 1.80), ('vote', 0.86), ('elections', 0.45),
('party', 0.44), ('choose', 0.39), ('voting', 0.39),
('politics', 0.39), ('republicans', 0.37), ('election', 0.34),
('voted', 0.33), ('decided', 0.31), ('constitution', 0.30),
('progressive', 0.30), ('votes', 0.30) ] ]
]
topics_hillary2 = [
[ 20.4, [ ('let', 2.64), ('go', 1.63), ('hard', 1.52),
('want', 1.19), ('going', 1.19), ('tonight', 1.09),
('live', 1.02), ('back', 0.95), ('know', 0.93),
('say', 0.90), ('tomorrow', 0.90), ('unselfish', 0.80),
('thank', 0.78), ('good', 0.73) ] ],
[ 18.8, [ ('great', 1.63), ('like', 0.91), ('grandparent', 0.78),
('people', 0.74), ('lives', 0.62), ('trust', 0.59),
('going', 0.58), ('lot', 0.55), ('remarkable', 0.54),
('way', 0.53), ('children', 0.53), ('good', 0.52),
('know', 0.52), ('child', 0.50) ] ],
[ 11.3, [ ('new', 1.80), ('state', 1.65), ('york', 1.62),
('president', 1.40), ('roosevelt', 1.07), ('senate', 0.93),
('governor', 0.77), ('presidents', 0.73), ('stuyvesant', 0.72),
('mayor', 0.68), ('island', 0.67), ('election', 0.66),
('senator', 0.66), ('members', 0.66) ] ],
[ 11.3, [ ('barriers', 0.85), ('people', 0.49), ('back', 0.47),
('going', 0.44), ('like', 0.43), ('way', 0.42),
('jobs', 0.40), ('hard', 0.40), ('americans', 0.33),
('crumbling', 0.29), ('make', 0.29), ('need', 0.27),
('problems', 0.27), ('stop', 0.27) ] ],
[ 8.9, [ ('rights', 2.29), ('people', 0.64), ('diversity', 0.60),
('lgbt', 0.59), ('discrimination', 0.56), ('dignity', 0.54),
('equal', 0.46), ('women', 0.45), ('americans', 0.44),
('advocate', 0.43), ('empowerment', 0.39), ('values', 0.39),
('racism', 0.35), ('families', 0.34) ] ],
[ 8.6, [ ('responders', 1.10), ('yorkers', 0.99), ('rikers', 0.99),
('inaudible', 0.98), ('trayvon', 0.97), ('fdny', 0.91),
('firefighter', 0.79), ('ladders', 0.46), ('firefighters', 0.41),
('people', 0.40), ('heard', 0.38), ('emergency', 0.38),
('survivors', 0.36), ('officers', 0.35) ] ],
[ 8.2, [ ('campaign', 2.35), ('progressive', 1.03), ('reform', 0.89),
('election', 0.63), ('democratic', 0.59), ('votes', 0.52),
('divisive', 0.50), ('voters', 0.49), ('voting', 0.42),
('supported', 0.42), ('congressional', 0.37), ('specter', 0.36),
('elections', 0.36), ('supporters', 0.36) ] ],
[ 7.6, [ ('country', 3.32), ('america', 2.63), ('us', 1.27),
('american', 0.55), ('world', 0.50), ('continent', 0.49),
('region', 0.40), ('nation', 0.38), ('cities', 0.36),
('south', 0.35), ('million', 0.24), ('places', 0.20),
('today', 0.19), ('americans', 0.18) ] ]
]
topics_hillary3 = [
[ 19.5, [ ('back', 3.24), ('let', 2.66), ('stop', 2.61),
('going', 2.54), ('hard', 2.53), ('go', 2.44),
('right', 2.23), ('fight', 2.00), ('keep', 1.98),
('take', 1.90), ('hold', 1.85), ('break', 1.79),
('make', 1.79), ('single', 1.69) ] ],
[ 17.5, [ ('people', 4.54), ('million', 2.84), ('years', 2.30),
('children', 2.26), ('working', 2.10), ('families', 1.94),
('americans', 1.91), ('many', 1.80), ('millions', 1.77),
('women', 1.74), ('worked', 1.56), ('today', 1.52),
('workers', 1.50), ('lives', 1.38) ] ],
[ 14.3, [ ('powerful', 2.61), ('great', 1.71), ('like', 1.30),
('very', 1.27), ('dangerous', 1.15), ('kind', 1.04),
('make', 1.04), ('remarkable', 0.99), ('good', 0.95),
('respect', 0.93), ('mean', 0.93), ('humbling', 0.93),
('strong', 0.92), ('know', 0.92) ] ],
[ 9.8, [ ('care', 1.09), ('barriers', 0.88), ('affordable', 0.75),
('equality', 0.74), ('health', 0.71), ('protecting', 0.69),
('protect', 0.66), ('discrimination', 0.64), ('women', 0.61),
('know', 0.60), ('rights', 0.59), ('want', 0.55),
('fair', 0.55), ('equal', 0.53) ] ],
[ 9.0, [ ('campaign', 2.82), ('corporations', 2.30), ('progressive', 2.17),
('politics', 1.53), ('pacs', 1.23), ('unions', 1.06),
('reform', 1.04), ('corporate', 0.96), ('party', 0.89),
('interests', 0.82), ('voters', 0.75), ('elections', 0.75),
('voting', 0.74), ('ads', 0.71) ] ],
[ 8.5, [ ('legal', 3.32), ('trump', 2.80), ('law', 1.98),
('rights', 1.83), ('issues', 1.55), ('issue', 1.24),
('decisions', 1.01), ('marriage', 0.96), ('matter', 0.89),
('lawyer', 0.81), ('matters', 0.78), ('constitutional', 0.74),
('laws', 0.64), ('constitution', 0.63) ] ],
[ 8.5, [ ('president', 5.21), ('senator', 3.24), ('senate', 2.56),
('obama', 2.34), ('republican', 1.91), ('election', 1.81),
('republicans', 1.78), ('john', 1.60), ('nomination', 1.52),
('barack', 1.50), ('grassley', 1.37), ('vote', 1.36),
('governor', 1.34), ('presidency', 1.19) ] ],
[ 7.9, [ ('court', 6.73), ('supreme', 3.30), ('case', 2.46),
('justice', 2.28), ('justices', 2.14), ('ruled', 1.98),
('cases', 1.76), ('courts', 1.69), ('judge', 1.68),
('hearing', 1.21), ('judges', 1.06), ('decisions', 1.02),
('judiciary', 0.92), ('scalia', 0.85) ] ]
]
topics_sanders = [
[ 18.0, [ ('people', 4.73), ('world', 4.08), ('country', 2.89),
('american', 2.12), ('us', 2.02), ('time', 1.88),
('nation', 1.75), ('america', 1.71), ('states', 1.70),
('united', 1.66), ('americans', 1.65), ('new', 1.48),
('young', 1.46), ('women', 1.33) ] ],
[ 17.8, [ ('wall', 2.36), ('going', 1.93), ('street', 1.71),
('bottom', 1.21), ('right', 1.20), ('let', 1.05),
('good', 1.02), ('protect', 0.99), ('back', 0.92),
('way', 0.89), ('fight', 0.87), ('bring', 0.86),
('top', 0.86), ('continue', 0.85) ] ],
[ 16.4, [ ('financial', 3.03), ('wealth', 2.80), ('economy', 1.53),
('profits', 1.51), ('banks', 1.32), ('money', 1.23),
('tax', 1.21), ('economic', 1.15), ('energy', 1.10),
('corporations', 1.09), ('investments', 1.03), ('income', 1.02),
('huge', 1.01), ('enormous', 0.96) ] ],
[ 15.2, [ ('political', 2.78), ('moral', 2.67), ('excesses', 1.59),
('deeply', 1.49), ('powerfully', 1.32), ('morality', 1.16),
('politics', 1.14), ('social', 1.09), ('sense', 0.97),
('indifference', 0.95), ('cynicism', 0.95), ('misguided', 0.93),
('recklessness', 0.90), ('disgrace', 0.90) ] ],
[ 14.4, [ ('system', 1.43), ('care', 1.14), ('poor', 0.97),
('pay', 0.88), ('protect', 0.84), ('workers', 0.79),
('adequate', 0.76), ('healthcare', 0.76), ('rights', 0.76),
('good', 0.70), ('wage', 0.69), ('need', 0.69),
('allow', 0.69), ('fair', 0.68) ] ],
[ 6.2, [ ('economy', 2.30), ('market', 0.46), ('economic', 0.44),
('climate', 0.38), ('industry', 0.29), ('globalization', 0.29),
('system', 0.29), ('workers', 0.28), ('inequality', 0.26),
('jobs', 0.25), ('working', 0.25), ('today', 0.24),
('change', 0.24), ('trade', 0.23) ] ],
[ 5.7, [ ('billionaires', 1.85), ('us', 0.68), ('billionaire', 0.47),
('top', 0.46), ('wealthiest', 0.35), ('million', 0.33),
('dollars', 0.32), ('millions', 0.31), ('class', 0.28),
('percent', 0.26), ('hampshire', 0.25), ('billion', 0.24),
('today', 0.24), ('country', 0.24) ] ]
]
topicCloud = TopicCloud(max_topic_words=8, max_topic_num=7, min_word_topic_prop=0.25, max_words=50,
height=1000, width=1000, relative_scaling=0.7, max_font_size=80,
min_font_size=30 ).generate_from_topics(topics_icml_5)
# Display the generated image:
# the matplotlib way:
import matplotlib.pyplot as plt
plt.imshow(topicCloud)
plt.axis("off")
plt.show()
topicCloud.to_file("clouds/topics_icml_5.png")