-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
952 lines (582 loc) · 69.4 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="renderer" content="webkit">
<meta http-equiv="X-UA-Compatible" content="IE=edge" >
<link rel="dns-prefetch" href="https://tracycuican.github.io">
<title>--async</title>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="description" content="很高兴认识你。">
<meta property="og:type" content="website">
<meta property="og:title" content="--async">
<meta property="og:url" content="https://tracycuican.github.io/index.html">
<meta property="og:site_name" content="--async">
<meta property="og:description" content="很高兴认识你。">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="--async">
<meta name="twitter:description" content="很高兴认识你。">
<link rel="icon" href="/assets/image/favicon.png">
<link rel="stylesheet" href="/main.css">
</head>
<body>
<div id="container">
<div class="left-col">
<div class="overlay"></div>
<div class="intrude-less">
<header id="header" class="inner">
<a href="/" class="profilepic">
<img src="/assets/image/tracy.png" class="js-avatar">
</a>
<hgroup>
<h1 class="header-author"><a href="/">少女翠西</a></h1>
</hgroup>
<p class="header-subtitle">Life is an adventure.</p>
<nav class="header-menu">
<ul>
<li><a href="/">主页</a></li>
<li><a href="/photos">相册</a></li>
</ul>
</nav>
<nav class="header-smart-menu">
<a class="js-smart-menu" data-idx="0" href="javascript:void(0)">所有文章</a>
<a class="js-smart-menu" data-idx="1" href="javascript:void(0)">标签</a>
<a class="js-smart-menu" data-idx="2" href="javascript:void(0)">关于我</a>
</nav>
<nav class="header-nav">
<div class="social">
<a class="github" target="_blank" href="https://github.com/TracyCuiCan/" title="github">github</a>
<a class="douban" target="_blank" href="https://www.douban.com/people/baimi/" title="douban">douban</a>
<a class="linkedin" target="_blank" href="https://www.linkedin.com/in/tracy-can-cui-b8902686?trk=hp-identity-name" title="linkedin">linkedin</a>
</div>
</nav>
</header>
</div>
</div>
<div class="mid-col">
<nav id="mobile-nav">
<div class="overlay">
<div class="slider-trigger"><i class="icon-list"></i></div>
<h1 class="header-author js-mobile-header hide">少女翠西</h1>
</div>
<div class="intrude-less">
<header id="header" class="inner">
<div class="profilepic">
<img src="/assets/image/tracy.png" class="js-avatar">
</div>
<hgroup>
<h1 class="header-author">少女翠西</h1>
</hgroup>
<p class="header-subtitle">Life is an adventure.</p>
<nav class="header-menu">
<ul>
<li><a href="/">主页</a></li>
<li><a href="/photos">相册</a></li>
</ul>
</nav>
<nav class="header-nav">
<div class="social">
<a class="github" target="_blank" href="https://github.com/TracyCuiCan/" title="github">github</a>
<a class="douban" target="_blank" href="https://www.douban.com/people/baimi/" title="douban">douban</a>
<a class="linkedin" target="_blank" href="https://www.linkedin.com/in/tracy-can-cui-b8902686?trk=hp-identity-name" title="linkedin">linkedin</a>
</div>
</nav>
</header>
</div>
</nav>
<div class="body-wrap">
<article id="post-二零二一" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2021/01/01/二零二一/">二零二一</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<img src="/2021/01/01/二零二一/flower_blossom_in_fence.jpeg" alt="Baybye 2020" title="Baybye 2020">
<p>现在其实是2021年的第一天了,迟到的总结也还是要写,今年这个特殊的年份不纪念一下说不过去,这一年的很多事情都是在创造历史,甚至日后人们谈论起一些事来都要用pre-2020和post-2020来描述。而我们则是在亲历历史,这么一说起来还有点自豪呢。</p>
<p>昨天大家朋友圈说自己年终总结的时候都提到今年的不易,平心而论,我今年过得还行。家人朋友均健康,工作不受影响每月付得起房租,头顶有一屋遮蔽三餐有食物饱腹,除了不能远行以及日常出门的不便利,实在没有太多可怨念的。我应该为此感到惭愧,疫情最初武汉生死一线,到现在全美沦陷,我除了捐款没有为帮助他人做出其它努力,这是我的无能。希望未来能有更大的能力去帮助他人,希望除了义愤填膺和悲天悯人之外我还能出点微薄之力。</p>
<p>这一年过得特别快,尽管今年很多时候都在内心盼望着“啊2020快点过去吧!”,但当它真的匆匆过去到了年末仅剩几天的时候又懊恼自己虚度了。“我怎么就只读了这几本书呢?今年不应该有大把的时间读书吗?”,于是去翻相册,发现主旋律就是自己在家鼓捣的各种食物,转而又安慰自己“今年最大的成就是健康活着”对于这个目标我拍胸脯保证是圆满完成!从写总结的角度,坐在这里回想,我觉得2020真的乏善可陈。就像前几天的一个晚上,我死活想不起来12/25自己是怎么度过的,最后还是借助iPhone相册才找回记忆。2020年的大多数日子,可能都像12/25那天一样,平常、普通、没有任何亮点,这三点是同一个意思。</p>
<p>居家隔离生活跟我疫情前的业余生活并无太大区别,只是少了一些外出和聚会而已。所以我的2020之所以乏善可陈归其原因大概是我这个人乏善可陈,跟很多人一样,刚开始隔离生活的时候,我试图发展自己的很多兴趣爱好,也变换了很多daily routine。比如早起跑步、健身环加哑铃、每天阅读1小时+…在对自己的内心世界穷尽探索发现一片贫瘠之后我决定还是回归舒适区,于是睡得更加晚起得更加晚,每天从显示器转到电视机再转到手机的四方屏幕,不健康不积极不建议模仿。再有下次机会,我应该会省略掉中间过渡直接梯度下降到本我。</p>
<p>2020年最大的成就是升职以及课题上完成的小项目。升职是准备了很久也其实早就该达成的,因为年中perf的取消还多了一点波折,但在11月份这件事的尘埃落定还是把我心里的石头给卸下来并且能轻装开启下一程,撒花!课题上我也“斥巨资”买了一台Ailenware计算机表明自己不是玩票而是要可持续地投入去做这件事情。感谢吴老板跟我说“你投入资金后才会更坚定地去做”,促成我做了这个决定。希望不会辜负这笔投资。</p>
<p>2020年最大的幸运是遇到了一个可爱的人。上帝没有亏待我,在我近乎绝望的时候宽慰自己“坚持,再坚持一下就有糖吃”,嗯,有点甜。这个人在吃、旅行和跑步上跟我有相似的喜好和态度,跟我一样对事物好奇,对生活有种吊儿郎当的不拘一格。对于我各种微不足道的事情都有回应,这一点目前足以弥补他所有已知的缺点。我们一起骑车跑步、计划旅行、看着菜谱做吃的、玩游戏、无所事事,在这么一个孤独的年份里有这样的陪伴,我觉得何其幸运,剩下的就交给缘分啦。</p>
<p>2020年最大的遗憾是没能陪家人一起共度难关。疫情刚开始国内封城的时候每天跟爸妈视频听他们说又做了什么吃的,看他们陪家里狗子玩。后来疫情中心转移到美国并一直持续到现在,我也这么一个人在纽约呆到了现在。好友CC自从春节回国被困在国内就一直在家等到11月份才回来,每次聊天我都很羡慕他。从高中离家之后我跟父母相处最久的时长是研究生毕业后到来纽约工作前的三个月,总觉得人生还长着呢,但真不知道下次突破这个纪录是什么时机。</p>
<p>2020年最感恩的是朋友们。三月初GS来纽约接XM回蒙特利尔,开着车带着我俩在纽约溜达,邀请我去XM临时租的屋子里吃饭,XM提前煲好了鸡汤,桌上有GS在蒙特利尔华人超市带过来的零食,我们一起洗菜切菜准备晚餐。吃完饭慢慢悠悠走回来我家吃东西聊天,一恍惚我以为自己回到了2014年,结束了一天的学习,在mcgill ghetto里面走去谁家吃个饭,啊我好怀念那段时光。圣诞跟梦瑶聊天,她说你没发现我之前每周六都会找你聊天吗?我怕你一个人呆着出什么事,所以每周都问一下你。我想起来四五月份纽约疫情严重的时候颖姐每天都来问候一下我,“看我有没有还活着”。夏天跟SQ每天晚上的散步互相交换自己做的食物,还有跟ruirui和Claire三个人每周六的视频晚餐,一起做饭吃饭闲聊,每次都两小时起。居家隔离,physical distancing自然会带来social distancing, 让我没有自我隔离成孤岛尚且保持sanity,全是朋友(和Switch)的功劳!我爱你们!</p>
<p>今年有三个朋友都跨入了重要的人生阶段,升级做了母亲。我以前觉得生育是很遥远的事,心里也一直很抗拒,但朋友们的变化让我发现它其实就在下一个转角。静思经常给我发她家跳跳的照片和视频,也会分享一些喂奶换尿布哄娃睡觉的日常,让我觉得养娃没有那么可怕,甚至很有乐趣,对此我要感谢她!某天无聊的周末下午,我翻出来Rose搬离纽约之前留给我的指甲油,花时间涂在指甲上,拍照发给她看。这好像是18年冬天跟她一起在ktown的某个彩妆店选的,那时候Rose住在北边的888 Octagon,我们周末时常约着出去逛街吃饭。我俩从在Mcgill做室友开始到在纽约重逢再分开,她现在在温哥华休产假中,偶尔给我打电话吐槽外国公婆,分享近期购物。耐着性子涂指甲油的那个下午,我是在怀念我们的少女时代,如今她是一个新手妈妈了,不知道我们以后是否还能不聊家庭不聊娃,以一个无牵无挂的少女身份,美美地化个妆出门逛吃,会是什么时候呢?珍惜你身边的未婚朋友们吧!时间的车轮滚滚毫不留情!</p>
<p>在家办公已经接近一年了,日趋麻木的同时我还是觉得自己要努力去珍惜。我从来没有这么认真地观察我我家楼下小花园里的树,看着它们抽芽,枝叶茂盛,一点点变黄、凋落,在秋天一场大雨后重新变成光秃秃的样子。我也得以坐在窗前的书桌上感受家里日光的变化,下午一两点的时候阳光会直着照进来,百叶窗的影子映在我的脸上,让视频会议里的我看起来很尴尬。纽约初雪的那天下午,我跟同事们在跟着组里的fun event学习如何制作纸花,雪花在窗外簌簌飘落,手里的纸花慢慢成型,那一瞬间我心里激起一阵荡漾,脑子里出现一个俗气的“岁月静好”。这一切都很难得。</p>
<p>享受了2020的平静生活,新的一年希望能突破一点自我,工作上有更多长进,生活上去接纳尝试更多可能性。希望多有机会出门,最好能回国抱一抱狗子。认真度过每一天,不蹉跎!愿望许的朴素一点,免得明年回来打脸 =D</p>
</div>
<div class="article-info article-info-index">
<a href="/2021/01/01/二零二一/" class="archive-article-date">
<time datetime="2021-01-02T03:16:34.000Z" itemprop="datePublished"><i class="icon-clock"></i>2021-01-01</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/随想/">随想</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-nlp-c2-wk4" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/08/04/nlp-c2-wk4/">nlp-c2-wk4</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h3 id="Word-representation"><a href="#Word-representation" class="headerlink" title="Word representation"></a>Word representation</h3><ul>
<li>Integer, assign a unique integer ID to each word.</li>
<li>One-hot vector, each word is a vector of vocabulary length and only that word is 1.</li>
<li>word embedding vector, represent word in a relatively low dimension and carries the meaning of words.</li>
</ul>
<p>Word embedding needs a corpus and an embedding method (normally a machine learning model). </p>
<h4 id="Embedding-methods"><a href="#Embedding-methods" class="headerlink" title="Embedding methods"></a>Embedding methods</h4><ul>
<li><p>word2vec</p>
<ul>
<li>Continuous bag-of-words (CBOW), predict a missing word given the surrounding words.</li>
<li>Continuous skip-gram with negative sampling (SGNS), predict the word surrounding a given input word.</li>
</ul>
</li>
<li><p>Global Vectors (GloVe)<br>Factorize the logarithm of the corpuses word co-occurence matrix.</p>
</li>
<li><p>fastText<br>Based on the skip-gram model and takes into account the structure of words by representing words as an n-gram of characters, this enables the model to support OOV words.</p>
</li>
</ul>
<p>More sophisticated modeling approaches that uses advanced deep neural network architecture. In the above models a given word always has the same embedding, in those more advanced models, the words have different embeddings depending on their context.</p>
<ul>
<li>Bidirectional Encoder Representations from Transformers (BERT)</li>
<li>Embeddings from Language Models (ELMO)</li>
<li>Generative Pretraining 2 (GPT-2)</li>
</ul>
<h3 id="Continuous-bag-of-words"><a href="#Continuous-bag-of-words" class="headerlink" title="Continuous bag of words"></a>Continuous bag of words</h3><p>Predicts a missing word based on the surrounding words. The rational is if two unique words are both frequently surrounded by a similar sets of words when used in various contexts, they tend to be related in their meaning. </p>
<p>Choose a hyper-parameter <code>C</code> (context half-size), if <code>C = 2</code>, the training input data is two words before a word plus two words after a word, the training output data is the word itself. While training, you first preprocess the data by converting to lowercase; process punctuation, numbers, and special characters; deal with emoji and hashtags etc. Then each word can be represented by a one-hot vector, and the context words is represented by the average of individual one-hot vectors. </p>
<h3 id="Word-Embedding-Evaluation"><a href="#Word-Embedding-Evaluation" class="headerlink" title="Word Embedding Evaluation"></a>Word Embedding Evaluation</h3><ul>
<li><p>Intrinsic evaluation, test relationships between words.</p>
<ul>
<li>Analogies</li>
<li>Clustering</li>
<li>Visualization</li>
</ul>
</li>
<li><p>Extrinsic evaluation, test word embeddings on external task, use the performance of the task as a proxy for the quality of the word embeddings, e.g. name entity recognition, POS tagging. This method is more time consuming and diffcult to troubleshoot. </p>
</li>
</ul>
<h3 id="Python-utils"><a href="#Python-utils" class="headerlink" title="Python utils"></a>Python utils</h3><p>For emoji preprocessing, import the emoji package.<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div></pre></td><td class="code"><pre><div class="line">import emoji</div><div class="line"></div><div class="line">data = re.sub(r'[,!?;-]+', '.', corpus)</div><div class="line">data = [ch.lower() for ch in data if ch.isalpha() or ch == '.' or emoji.get_emoji_regexp().search(ch)]</div></pre></td></tr></table></figure></p>
<p><code>yield</code> constructs a generator that returns multiple times, when it runs, it builds an iterator, and <code>next</code> gets the next available value from an iterator.</p>
<p>Activation functions:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div></pre></td><td class="code"><pre><div class="line"># Implement ReLU</div><div class="line">def relu(z):</div><div class="line"> result = z.copy()</div><div class="line"> result[result < 0] = 0</div><div class="line"> return result</div></pre></td></tr></table></figure></p>
<p>Or just <code>np.maximum(0, z)</code>.</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div></pre></td><td class="code"><pre><div class="line"># Implement Softmax</div><div class="line">def softmax(z):</div><div class="line"> e_z = np.exp(z)</div><div class="line"> sum_e_z = np.sum(e_z)</div><div class="line"> return e_z / sum_e_z</div></pre></td></tr></table></figure>
<p>Use <code>nltk.FreqDist(word for word in data)</code> to compute the frequency distribution of the words in the dataset.</p>
<p>For vector initialization, often use <code>np.random.rand(d0, d1, ..., dn)</code></p>
</div>
<div class="article-info article-info-index">
<a href="/2020/08/04/nlp-c2-wk4/" class="archive-article-date">
<time datetime="2020-08-04T14:09:40.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-08-04</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/NLP/">NLP</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-nlp-c2-wk3" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/08/02/nlp-c2-wk3/">nlp-c2-wk3</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=default"></script>
<h3 id="N-gram"><a href="#N-gram" class="headerlink" title="N-gram"></a>N-gram</h3><p>A sequence of N words. N-gram probability \(P(\omega_N|\omega_1^{N - 1}) = \frac{C(\omega_1^{N - 1}\omega_N)}{C(\omega_1^{N - 1})}\), and notice that \(C(\omega_1^{N - 1}\omega_N) = C(\omega_1^N)\).</p>
<p>When preprocessing sentence into N-gram models, we need to add character that mark the start and end of the sentence with <code><s></code> and <code></s></code> separately. For example, for n-gram preprocessing, <code>tokenized_sentence = ["<s>"] * (n - 1) + tokenized_sentence + ["</s>"]</code>.</p>
<h3 id="Probability-of-a-sentence"><a href="#Probability-of-a-sentence" class="headerlink" title="Probability of a sentence"></a>Probability of a sentence</h3><p>Apply the chain rule, that \(P(A, B, C, D) = P(A)P(A|B)(C|A, B)P(D|A, B, C)\), this exact formula has the limitation that the long subsequences of our testing sentence are almost never represented in the training corpus. To approximate sequence probability, we apply Markov assumption: only last N - 1 words matter, that is, \(P(\omega_n|\omega_1^{n - 1})\approx P(\omega_n|\omega_k^{n - 1})\) where <code>k = n - N + 1</code></p>
<h3 id="Generative-Language-Model"><a href="#Generative-Language-Model" class="headerlink" title="Generative Language Model"></a>Generative Language Model</h3><p>Once you have the n-gram probability model, you can use it to generate text by choosing sentence start -> choose next n-gram starting with the last n - 1 word -> continue until <\s> is picked.</p>
<h3 id="Perplexity"><a href="#Perplexity" class="headerlink" title="Perplexity"></a>Perplexity</h3><p>Perplexity is a measure of complexity in a sample of texts like how complex that text is. Smaller perplexity means better model, good language models have PP from 20 to 60.<br>$$PP(W) = P(s_1, s_2, …, s_m)^{-\frac{1}{m}}$$</p>
<p>Log Perplexity<br>\(\log PP(W) = -\frac{1}{m}\sum_1^m\log_2(P(\omega_i|\omega_j))\) where <code>j = i - 1</code>.</p>
<h3 id="Out-of-vocabulary-word-OOV"><a href="#Out-of-vocabulary-word-OOV" class="headerlink" title="Out of vocabulary word (OOV)"></a>Out of vocabulary word (OOV)</h3><p>OOV means unknown word, a special tag <code><UNK></code> is used to represent OOV in corpus and in input. To create a vocabulary V, you can use different criterias:</p>
<ul>
<li>Min word frequency <code>f</code></li>
<li>Max size of V, include words by frequency</li>
</ul>
<blockquote>
<p><strong><em>NOTE:</em></strong> When comparing perplexity, only compare LMs with the same V. </p>
</blockquote>
<h3 id="Handle-missing-n-grams"><a href="#Handle-missing-n-grams" class="headerlink" title="Handle missing n-grams"></a>Handle missing n-grams</h3><ul>
<li>Smoothing<br> – Add one smoothing<br> – Add k smoothing</li>
<li>Backoff, if N-gram missing => use (N-1)-gram.</li>
<li>Interpolation<br>$$\hat{P}(\omega<em>{n-2}\omega</em>{n-1}) = \lamda x P(\omega<em>n|\omega</em>{n-2}\omega_{n-1}) + \lamda_2 x P(\omega<em>n|\omega</em>{n - 1} + \lamda_3 x P(\omega_n)$$<br>and \(\sum_i \lamda_i = 1\)</li>
</ul>
<h3 id="Python-Utils"><a href="#Python-Utils" class="headerlink" title="Python Utils"></a>Python Utils</h3><p>NLP toolkit <code>nltk</code>, <code>import nltk</code>, to use it <code>tokenized_sentence = nltk.word_tokenize(sentence)</code>.</p>
<p>To remove special characters in the sentence <code>corpus = re.sub(r"[^a-zA-Z0-9.?!]", "", corpus)</code> keeps only digits, characters and ‘.’ ‘?’ ‘!’.</p>
<p>Creating a tuple for a single word <code>(word,)</code>, then merge it with another tuple <code>tuple1 + (word,)</code>.</p>
<p>A fast way to divide each element in a \b{pandas Dataframe} matrix by it’s row sum:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line">row_sums = matrix.sum(axis=1)</div><div class="line">result = matrix.div(row_sums, axis=0)</div></pre></td></tr></table></figure></p>
<p><code>str.startswith()</code> to test if a word starts with a prefix.</p>
</div>
<div class="article-info article-info-index">
<a href="/2020/08/02/nlp-c2-wk3/" class="archive-article-date">
<time datetime="2020-08-02T19:10:59.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-08-02</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/NLP/">NLP</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-nlp-c2-wk2" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/07/28/nlp-c2-wk2/">nlp-c2-wk2</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h3 id="Part-of-Speech-POS-Tagging"><a href="#Part-of-Speech-POS-Tagging" class="headerlink" title="Part of Speech (POS) Tagging"></a>Part of Speech (POS) Tagging</h3><p>Assign short representation (tags) to represent catagory of words or lexical terms in the language. E.g. verb, adverb, noun, etc.</p>
<h3 id="HMM"><a href="#HMM" class="headerlink" title="HMM"></a>HMM</h3><p>A hidden Markov model has States, Transition matrix (including initial probability) and Emission matrix (from a hidden state to an observable state). </p>
<h3 id="Viterbi-Algorithm"><a href="#Viterbi-Algorithm" class="headerlink" title="Viterbi Algorithm"></a>Viterbi Algorithm</h3><p>Viberbi algorithm can be used to find the mostly likely sequence of a hidden states. The implementation can be decomposed into three main steps:</p>
<ul>
<li>Initialization - Initialize best_probabilities and best_paths matrices that will be populated in feed foward path.</li>
<li>Feed Forward - At each step, calculate the probility of each path happening and the best paths up to that point.</li>
<li>Feed Backward - Start from the end of the sequence, find the best path with the highest probabilities.</li>
</ul>
<h3 id="Python-Utils"><a href="#Python-Utils" class="headerlink" title="Python Utils"></a>Python Utils</h3><p><code>defaultdict</code> is a special kind of dictionaries that returns the “zero” value of a type if you try to access a key that does not exist. With the following example, you don’t need to worry about the case when the word is not present within the dictionary, for example.<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div></pre></td><td class="code"><pre><div class="line">freq = defaultdict(int)</div><div class="line">for word in words:</div><div class="line"> freq[word] = 1</div></pre></td></tr></table></figure></p>
<p><code>any()</code> function returns <code>True</code> if at least one of the cases it evaluate is <code>True</code>, for example:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line">if any(char.isdigit() for char in word):</div><div class="line"> return "--unk_digit--"</div></pre></td></tr></table></figure></p>
<p><code>line.split(char)</code>, where <code>char</code> can be <code>'\t'</code> or <code>' '</code> or <code>line.split()</code> default to any whitespace, splits <code>line</code> into words separated by the delimiter. </p>
<p><code>line.strip(char)</code> removes any leading and training characters. This is often used to trim off any spaces around a word and returns just that word.</p>
<p><code>pd.DataFrame(matrix, index=m_index, columns=m_columns)</code> creates a DataFrame out of a numpy array <code>matrix</code> with given index name and column name, this is a prettier version of the array.</p>
<p><code>row_sum = transition_matrix.sum(axis=1, keepdims=True)</code> note the <code>keepdims</code> parameters here, without it being <code>True</code>, if <code>transition_matrix</code> has 3 rows, the resulting array has shape <code>(3,)</code>, with it being <code>True</code>, the resulting array has shape <code>(3, 1)</code>. When working with Numpy, always check the shape of the array you are working with. If you get a <code>(3,)</code> matrix and want to change it to <code>(3, 1)</code>, use <code>np.reshape(matrix, (3, 1))</code>.</p>
</div>
<div class="article-info article-info-index">
<a href="/2020/07/28/nlp-c2-wk2/" class="archive-article-date">
<time datetime="2020-07-28T13:25:52.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-07-28</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/NLP/">NLP</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-nlp-c2-wk1" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/07/26/nlp-c2-wk1/">nlp-c2-wk1</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Autocorrect-and-Minimum-Edit-Distance"><a href="#Autocorrect-and-Minimum-Edit-Distance" class="headerlink" title="Autocorrect and Minimum Edit Distance"></a>Autocorrect and Minimum Edit Distance</h2><h3 id="How-does-it-work"><a href="#How-does-it-work" class="headerlink" title="How does it work"></a>How does it work</h3><p>For a given word, find all the “words” that are one/two edit distance away from it; filter out those not in the vocabulary; order by the probability of each word by it’s prior probability of appearing in the context; choose the one with the highest probability. </p>
<h3 id="Python-Utils"><a href="#Python-Utils" class="headerlink" title="Python Utils"></a>Python Utils</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div></pre></td><td class="code"><pre><div class="line">from collection import Counter</div><div class="line">dict = Counter(word) # gives a dictionary of {a : count_a} ordered by count_a.</div><div class="line"></div><div class="line">text.lower() # converts all letters in text to lower case.</div><div class="line"></div><div class="line">words = re.findall(r'\w+', text) # tokenize the string to words and return them in a list.</div><div class="line"></div><div class="line">set(words) # converts list into a set</div><div class="line"></div><div class="line">set().intersection() # finds intersection of two sets.</div><div class="line">set1 & set2 # also finds intersection of two sets.</div><div class="line"></div><div class="line"># File operations.</div><div class="line">with open(file_name, 'r') as f:</div><div class="line"> data = f.read()</div><div class="line">f.close()</div><div class="line"></div><div class="line">dict.items() # Returns tuples in the dictionary, can be accessed by item[0], item[1].</div><div class="line"></div><div class="line"># Find the keys that have the highest values in a dictionary</div><div class="line"># Counter(dict) creates a Counter object from a regular dictionary</div><div class="line"># Counter.most_common(n) gets the n most common keys.</div><div class="line">Counter(dict).most_common(n)</div></pre></td></tr></table></figure>
<h3 id="List-comprehension"><a href="#List-comprehension" class="headerlink" title="List comprehension"></a>List comprehension</h3><p>Syntax: <code>[expression for item in list if condition]</code> input can be for string or tuple or list, output is a list. Examples:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div></pre></td><td class="code"><pre><div class="line">splits = [(word[:i], word[i:]) for i in range(len(word))]</div><div class="line">result = [L + R for L, R in splits if len(R) > 1]</div><div class="line"></div><div class="line">[f(a, b, c) for a, b in splits if condition for c in string]</div><div class="line"></div><div class="line">{word : probs.get(word, 0) for word in words}</div></pre></td></tr></table></figure></p>
<h3 id="Short-circuit"><a href="#Short-circuit" class="headerlink" title="Short circuit"></a>Short circuit</h3><p>Logical operations such as <code>and</code> and <code>or</code> have two useful perperties, they can operate on lists and have short-circuit behavior. Examples:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div></pre></td><td class="code"><pre><div class="line">print([] and ['a', 'b']) # []</div><div class="line">print([] or ['a', 'b']) # ['a', 'b']</div><div class="line"></div><div class="line">val = ['most likely'] or ['less so'] or ['least of all'] # selects first, does not evaluate remainder.</div><div class="line"></div><div class="line">val = [] or [] or ['least of all'] # continues evaluation until there is a non-empty list.</div></pre></td></tr></table></figure></p>
<h3 id="Pitfalls"><a href="#Pitfalls" class="headerlink" title="Pitfalls"></a>Pitfalls</h3><p><code>set(str)</code> does not return a set with <code>str</code> as an element, for example <code>set('str') = {'s', 't', 'r'}</code>.</p>
</div>
<div class="article-info article-info-index">
<a href="/2020/07/26/nlp-c2-wk1/" class="archive-article-date">
<time datetime="2020-07-26T15:38:38.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-07-26</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/NLP/">NLP</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-nlp-c1-wk4" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/07/25/nlp-c1-wk4/">Notes for NLP Course1 Week4</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Machine-Translation-and-Document-Search"><a href="#Machine-Translation-and-Document-Search" class="headerlink" title="Machine Translation and Document Search"></a>Machine Translation and Document Search</h2><p>Frobenius Norm of matrix <code>A</code> = <code>np.squrt(np.sum(np.square(A)))</code></p>
<p>To translate English to French by using embedding, we need to approximate the transformation matrix <code>R</code> that transforms the English embedding matrix <code>X</code> to the French embedding matrix <code>Y</code>. We can use gradient descent to find matrix <code>R</code> where argmin_R ||XR - Y||_F. After finding R, during inference process, we need to find K nearest neighbors of Y_hat in French vocabulary. When there are so many candidate neighbors (the whole vocabulary), KNN is very expensive. And Locality sensitive hashing comes into play.</p>
<p>Locality sensitive hashing defines multiple (N) planes that divide the space into 2^N hash buckets. Planes are represented by a vector that marks the direction in which you find the positive side of the plane. Given a embedding, if the sign of it’s product with the plane vector is positive, it’s on one side of the plane, this way we can hash the embeddings into buckets. The hash value of an input vector <code>hash_value = 2^i * h_i</code> where <code>h_i = 1</code> for positive sign <code>h_i = 0</code> for negative sign for the ith plane. </p>
<p><code>np.where(condition, x, y)</code> returns elements chosen from x or y depending on condition. e.g. <code>A = np.where(B >= 0, 1, 0)</code> where <code>A</code> <code>B</code> are both matrixs, and A[i, j] equals to 0 or 1 depending on the value of B[i, j].</p>
</div>
<div class="article-info article-info-index">
<a href="/2020/07/25/nlp-c1-wk4/" class="archive-article-date">
<time datetime="2020-07-26T01:32:08.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-07-25</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/NLP/">NLP</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-nlp-c1-wk3" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/07/12/nlp-c1-wk3/">Notes for NLP Course1 Week2 & 3</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Naive-Bayes-amp-Word-Embedding"><a href="#Naive-Bayes-amp-Word-Embedding" class="headerlink" title="Naive Bayes & Word Embedding"></a>Naive Bayes & Word Embedding</h2><h3 id="Naive-Bayes"><a href="#Naive-Bayes" class="headerlink" title="Naive Bayes"></a>Naive Bayes</h3><ul>
<li>Bayes rule</li>
<li>Log likelihood</li>
<li>Laplacian smoothing</li>
<li>For sentiment analysis, calculate prior as the the number of positive and negative documents,<br>calculate conditional probablity as the frequency of the word in positive and negative documents.</li>
</ul>
<h4 id="Application-of-Naive-Bayes"><a href="#Application-of-Naive-Bayes" class="headerlink" title="Application of Naive Bayes"></a>Application of Naive Bayes</h4><ul>
<li>Sentiment analysis</li>
<li>Author identification</li>
<li>Information retrieval</li>
<li>Word disambiguation</li>
</ul>
<h3 id="Naive-Bayes-Assumptions"><a href="#Naive-Bayes-Assumptions" class="headerlink" title="Naive Bayes Assumptions"></a>Naive Bayes Assumptions</h3><ul>
<li>Independence: Not true in NLP</li>
<li>Relative frequency of classes affect the model, data is often imbalanced.</li>
</ul>
<h3 id="Word-Embedding"><a href="#Word-Embedding" class="headerlink" title="Word Embedding"></a>Word Embedding</h3><h4 id="Linear-algebra-operations-in-Numpy"><a href="#Linear-algebra-operations-in-Numpy" class="headerlink" title="Linear algebra operations in Numpy"></a>Linear algebra operations in Numpy</h4><p>Difference between lists and numpy arrays:</p>
<p>The <code>+</code> operator on NumPy arrays perform an element-wise addition, while the same operation on Python lists results in a list concatenation, same with the <code>*</code> operator<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div></pre></td><td class="code"><pre><div class="line">alist = [1, 2, 3, 4, 5]</div><div class="line">narray = np.array([1, 2, 3, 4])</div><div class="line"></div><div class="line">print(narray + narray)</div><div class="line">print(alist + alist)</div><div class="line">[2 4 6 8]</div><div class="line">[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]</div><div class="line"></div><div class="line">print(narray * 3)</div><div class="line">print(alist * 3)</div><div class="line">[ 3 6 9 12]</div><div class="line">[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5]</div></pre></td></tr></table></figure></p>
<p>The product operator <code>*</code> when used on arrays or matrices indicates element-wise multiplications. Do not confuse it with the dot product.</p>
<p>Find norm of a nparray or matrix: <code>np.linalg.norm(X)</code></p>
<p><strong>axis=0</strong> in params performs column wise operations while <strong>axis=1</strong> in params performs row wise operation.</p>
<h4 id="Vector-space"><a href="#Vector-space" class="headerlink" title="Vector space"></a>Vector space</h4><p>Similarity between two vectors can be calculated by Euclidean distance or cosine similarity, cosine similarity is prefered.</p>
<ul>
<li>Euclidean distance: <code>np.linalg.norm(A, B)</code></li>
<li>Cosine similarity: <code>np.dot(A, B) / np.linalg.norm(A) / np.linalg.norm(B)</code></li>
</ul>
<p>PCA for dimension reduction.</p>
<p><code>np.cov(m, rowvar=True)</code> to calculate the covariance matrix, <code>rowvar</code> is True by default, then each column is a sample and each row is a feature. </p>
<p><code>np.linalg.eigh(a)</code> to calculate eigen value and eigen vector. If <code>a</code> is symmetric, using <code>np.linalg.eigh()</code> is much more performant than <code>np.linalg.eig()</code>.</p>
<p><code>np.argsort()</code> sorts the values in an array from smallest to largest, then returns the indices from this sort. To apply the sorted index row wise, do <code>X[indices_sorted]</code>, to apply the sorted index column wise, do <code>X[:, indices_sorted]</code>.</p>
<p><code>X[::-1]</code> to reverse the order of a list.</p>
</div>
<div class="article-info article-info-index">
<a href="/2020/07/12/nlp-c1-wk3/" class="archive-article-date">
<time datetime="2020-07-13T02:53:53.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-07-12</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/NLP/">NLP</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-nlp-c1-wk1" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/07/06/nlp-c1-wk1/">Notes for NLP Course1 Week1</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Logistic-Regression"><a href="#Logistic-Regression" class="headerlink" title="Logistic Regression"></a>Logistic Regression</h2><h3 id="Twitter-text-data-preprocessing-steps"><a href="#Twitter-text-data-preprocessing-steps" class="headerlink" title="Twitter text data preprocessing steps:"></a>Twitter text data preprocessing steps:</h3><p>1.Stem<br> <code>from nltk.stem import PorterStemmer</code><br>2.Remove special symbols<br> <figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div></pre></td><td class="code"><pre><div class="line"># remove stock market tickers like $GE</div><div class="line"> tweet = re.sub(r'\$\w*', '', tweet)</div><div class="line"> # remove old style retweet text "RT"</div><div class="line"> tweet = re.sub(r'^RT[\s]+', '', tweet)</div><div class="line"> # remove hyperlinks</div><div class="line"> tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet)</div><div class="line"> # remove hashtags</div><div class="line"> # only removing the hash # sign from the word</div><div class="line"> tweet = re.sub(r'#', '', tweet)</div></pre></td></tr></table></figure></p>
<p>3.Tokenize<br> <code>from nltk.tokenize import TweetTokenizer</code><br>4.Remove stop words<br> <code>from nltk.corpus import stopwords</code></p>
<h3 id="Functions-I-learned"><a href="#Functions-I-learned" class="headerlink" title="Functions I learned"></a>Functions I learned</h3><p><em><code>np.dot()</code> for matrix mulplication
</em><code>np.exp()</code> for matrix exponential<br><em><code>np.log()</code> for matrix logarithm
</em><code>dict.get(key, substitution_value)</code> instead of [] to avoid key error.<br><em><code>np.asarray()</code> to convert a list to a numpy array, difference with <code>np.array()</code> is that <code>np.array()</code> does copy.
</em><code>np.squeeze()</code> to make an (m, 1) dimensional array [[],[],[]] into an (m,) array [,,,], when a list<br>is converted to an array using <code>np.asarray()</code>, it’s shaped as (m,).</p>
</div>
<div class="article-info article-info-index">
<a href="/2020/07/06/nlp-c1-wk1/" class="archive-article-date">
<time datetime="2020-07-07T02:43:07.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-07-06</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/NLP/">NLP</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-末日杂念" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2020/03/14/末日杂念/">末日杂念</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>纽约疫情加剧,剧情发展到跟一个月前的国内角色互换,我大部分时间被困在家里,生活像被按了下shift键。上下班的界限变的模糊,有的时候可能吃完晚饭还要工作一下。很奇怪在家办公竟然觉得更累,大概是因为一陷进工作里就忘了起身忘了喝水。代码不知道怎么写的时候就在屋子里走来走去,走到卧室翻倒在床上,三秒后劝自己”let’s get on with it!”。有点担心精神状态垮掉,还好有跟纽约的小伙伴交流防疫的最新信息,同事之间一起云吃饭,还有国内小伙伴每天的关心。坏处是开始嫌弃自己的厨艺,研究菜谱的热情只维持在开始的两天,现在还在好好做饭,很可能再过两周吃饭会等于填饱肚子而已。好处是天还没黑的时候就可以出门跑步,室外已经是春天的温度了,风是暖的很舒服;以及可以在家放肆地听歌,虽然经常因为打扰到工作而暂停…</p>
<p>因为已经从国内看了一个多月的预告片,我对于这些生活变化的接受很顺利,早就在mindset里面了,完全没感到困扰。跟着大众买口罩买酒精囤日用品囤粮食,竟然有了一些本不该有的麻木。对待病毒,从轻视到恐慌再到理性对待,担心过病毒大规模爆发,自己被感染,纽约的医疗设施无法接收这么多病人,我作为外国人一定会优先靠边站,没办法回国,爸妈又来不了美国,连最后一面都见不了,这大概是最惨的状况了吧。也因为各大送菜app都暂停配送服务而panic shopping,去平时很少去的楼下超市买了一堆硬核粮食,以至于被收银员嘲笑,回到家之后自己也觉得好笑没出息,但至少安心。</p>
<p>疫情的消息铺天盖地,新闻媒体想要给我最快最客观最全面的报道,这样很好,感谢所有致力于把信息带给每个人的工作者们,但有时候吧我也想喘口气。哦听到了意大利封国,哦知道了美国对欧洲travel ban,哦病毒可能会造成肺部永久性损伤,是这样吗?好的。大家都在讨论各国采取什么政策来应对什么样的国情,我由于我自己的愚蠢对此没有观察没有想法也并不太关心,更关心的却是生活里的小事,去办公室拯救我的两盆草,同事们分享的WFH-lunch集锦,WFH-pets集锦,意大利人民在阳台上拿出自己五花八门的乐器绝学合唱国歌,同样是末日囤货,有人买可乐汽水、有人如我买大米和面粉、有人直奔罐头食物,为什么都在抢厕纸?switch玩什么游戏好…如果人类被这个病毒搞到灭绝,我想的记得的全是这些琐事,也不知道生命安全都受到威胁了,还想着午饭吃什么工作怎么做好这些有什么用。我已经俗到底了。</p>
<p>生活什么时候能恢复正常呢?什么样的生活是正常的呢?我不是一直这样工作吃饭,周末在家宅到天荒地老吗?好像没什么不一样。但我也知道,看不到的地方有很多人在无声的战场上,医务工作者们、研究人员、政府职员、楼下超市的补货员、送菜app的外卖小哥,我家里有水有电有wifi有食物,这么简单的平常是很多人的关照,感谢他们!我们无法作为个体来对抗这个未知又凶险的病毒,我们依附于文明,依附于国家政体,依附于制度,再单纯也要依附于集体。</p>
<p>昨天听完 The Daily 的 <a href="https://www.nytimes.com/2020/03/13/podcasts/the-daily/coronavirus-relief.html" target="_blank" rel="external">Special Episode - A Bit of Relief</a> 有点感动,大概因为跟我一样,发现在危机和恐慌之下,回归生活细节最能给人以安慰和镇定,最后一个片段里读了 “On Living in an Atomic Age” by C.S. Lewis,贴出来分享一下:<br>“Believe me, dear sir or madam, you and all whom you love were already sentenced to death before the atomic bomb was invented: and quite a high percentage of us were going to die in unpleasant ways.”<br>“If we are all going to be destroyed by an atomic bomb, let that bomb when it comes find us doing sensible and human things—praying, working, teaching, reading, listening to music, bathing the children, playing tennis, chatting to our friends over a pint and a game of darts—not huddled together like frightened sheep and thinking about bombs. They may break our bodies (a microbe can do that) but they need not dominate our minds.”</p>
</div>
<div class="article-info article-info-index">
<a href="/2020/03/14/末日杂念/" class="archive-article-date">
<time datetime="2020-03-15T01:35:03.000Z" itemprop="datePublished"><i class="icon-clock"></i>2020-03-14</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/杂/">杂</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<article id="post-又一年夏天,我们仨" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-inner">
<input type="hidden" class="isFancy" />
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2019/08/25/又一年夏天,我们仨/">又一年夏天,我们仨</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<img src="/2019/08/25/又一年夏天,我们仨/us19.jpg" alt="Together 2019" title="Together 2019">
<h4 id="厉害的冯老师"><a href="#厉害的冯老师" class="headerlink" title="厉害的冯老师"></a>厉害的冯老师</h4><p>去接他们那天晚上,飞机晚点了,我在出口等了很久,牌子上显示飞机land了很久还是没见两个人出来,无奈打电话过去,冯老师竟然给我挂了!我正表示纳闷并愤怒的时候,一个微信过来“马上就到。”,厉害了冯老师,没想到还开通了国际流量,我觉得下次可以不用去机场接了…</p>
<h4 id="又是一年学英语"><a href="#又是一年学英语" class="headerlink" title="又是一年学英语"></a>又是一年学英语</h4><p>一年一度,冯老师开始了她为期两周打满鸡血的英语学习惯例,跟去年一样坐在书桌前认真记笔记、信誓旦旦“我回去一定要好好学英语!”、试着pronounce每一个看到的单词并问我对不对、不到两周放弃… 让我们明年继续期待吧!</p>
<h4 id="去囤菜"><a href="#去囤菜" class="headerlink" title="去囤菜"></a>去囤菜</h4><p>基于去年的经验和总结,二老来了的第二周就让我陪他们去河对岸costco买菜,主打土豆和洋葱,因为这两样便宜又耐放,于是可怜的我,这个假期吃了无数顿凉拌土豆丝和洋葱炒蛋…</p>
<h4 id="土豆想发芽"><a href="#土豆想发芽" class="headerlink" title="土豆想发芽"></a>土豆想发芽</h4><p>有一天正在上班的我突然收到冯老师微信“土豆想发芽,拿两个苹果回来”,??土豆是怎么告诉你它想发芽的?跟苹果又有什么关系呢?</p>
<h4 id="灯又闪了又买啥了"><a href="#灯又闪了又买啥了" class="headerlink" title="灯又闪了又买啥了"></a>灯又闪了又买啥了</h4><p>去年两个人在家被Alexa吓到之后,我给他们解释了家里为什么有人突然讲话,灯为什么会一直闪,于是今年我就会时不时收到冯老师的责问“灯又闪了又买啥了”,或者友情提醒“记得拿快递。”</p>
<h4 id="她们都没你高"><a href="#她们都没你高" class="headerlink" title="她们都没你高"></a>她们都没你高</h4><p>因为我在某次“找对象”话题里说过自己又丑又矮,身高不到160,后来很多次在岛上散步或者在地铁站等车或者任意走在大街上,老冯总是突然跟我说“你看崔粲,刚才走过去那两个女生,她们都没你高!”,ok, fine, good to know, 多谢鼓励。</p>
<h4 id="我需要QQ截屏"><a href="#我需要QQ截屏" class="headerlink" title="我需要QQ截屏"></a>我需要QQ截屏</h4><p>老崔要时不时的去网上抢卷子做题(做一道题可以赚5毛钱好像),他们小学数学题有时候需要写公式或者画图证明,由于网页上不方便进行这些操作,跟老崔分任务那个老师就远程指导他可以用QQ截图到word里面做完再截图贴回来。我告诉他不需要QQ也能截图并教了他怎么用mac快捷键截图,但他还是戴着个老花镜跟我不停强调“你这个不行,老师说了要用QQ截屏!”很着急的样子,看得我真想打人!</p>
<h4 id="一起旅行"><a href="#一起旅行" class="headerlink" title="一起旅行"></a>一起旅行</h4><p>计划了很久,期待了很久,二老不情愿了很久之后,终于等来了一起去Vegas和黄石的旅行!一起从家里出发,飞机上坐在一排(老崔没带耳机,坐在旁边瞅我屏幕看我看的电影),一起因为飞机delay半夜从机场回来赶着睡觉,八天的旅行留下了太多美好回忆!</p>
<h4 id="拍照黑洞"><a href="#拍照黑洞" class="headerlink" title="拍照黑洞"></a>拍照黑洞</h4><p>一家人出门旅行当然要拍美美的照片,审美和技术一如既往在线的我,为老崔和老冯取景找角度,拍了不少好看的照片。然而轮到他们给我拍照的时候,老冯不是相机怼到脸上出了我蔑视的表情有细微的变化看不出照片a和照片b有任何区别,就是蹲在地上仰拍留下擎天柱一样的我竖立在照片中央,就是努力辨认半天才能找到为什么会拍这么一张照片而我又在干嘛,可是人家又热衷于拍照,并且很委屈地说“我一直对我的拍照水平很有自信,你这两天说得我都没信心了”,这样的拍照黑洞哪里来的自信!</p>
<h4 id="没资格睡枕头的人"><a href="#没资格睡枕头的人" class="headerlink" title="没资格睡枕头的人"></a>没资格睡枕头的人</h4><p>出去旅行的几天,我们三个住一间屋。老崔是打呼专业户,而我睡觉不能有声音,所以每天晚上老崔都会被迫睡到离我最远的那个角,没有枕头睡,睡到一半会被老冯踹或者被我一声“爸!!!”吓醒。没办法,睡觉打呼的人就是这么卑微。</p>
<h4 id="看星星"><a href="#看星星" class="headerlink" title="看星星"></a>看星星</h4><p>黄石的行程,有一天晚上我们是住在黄石公园里面的,在我怂恿诱惑不成的逼迫下,老崔冒着被狼吃掉的风险(老冯瘫在床上完全拒绝)跟我一起出门看星星了,我们下楼走过路灯,来到一片空地上,一抬头满眼的星星,还有微弱的银河,那一瞬间真的很神奇!后来老崔怕冷很快遛回去了,我跟其他看星星的人一起又交流了一会儿。我会记得黄石公园的星空!</p>
<h4 id="盖世神功"><a href="#盖世神功" class="headerlink" title="盖世神功"></a>盖世神功</h4><p>还是跟拍照有关,路过大提顿的时候,有一处风景背景是连绵的雪山和草地,下车拍照前导游提醒大家可以跳起来拍照比较有趣,于是冯老师当然跃跃欲试,下车就在我的指导下开始了,然而由于手伸太直,腿抬太高,表情太用力而狰狞,拍出来的照片她像是在展示某种盖世神功,我跟老崔看到的一瞬间都笑喷了,而且每次看到都觉得很好笑,这张照片我会永久保存的!</p>
<h4 id="冰淇淋好吃吧"><a href="#冰淇淋好吃吧" class="headerlink" title="冰淇淋好吃吧"></a>冰淇淋好吃吧</h4><p>回程到Jackson town的时候,有一家冰淇淋店据说很好吃,所以吃过午饭逛过小镇之后,预计开车出发前10分钟,我去冰淇淋店买了双球的cone,大概是因为民风淳朴,小哥给我挖了超大的两颗球,走到车前我就后悔了因为冰淇淋这种东西是不让带上车的…老崔知道之后愤然上了车,于是在接下来的五分钟里我和冯老师一人一口,冰到嘴巴麻木啃完了那个大甜筒!回到车上我问冯老师冰淇淋是不是还挺好吃的,她说吃太快了完全没尝到味道…</p>
<h4 id="每日游记"><a href="#每日游记" class="headerlink" title="每日游记"></a>每日游记</h4><p>旅行大巴上我都是跟冯老师坐在一起,每次导游讲话的时候她都会打开手机备忘录,认真的记笔记,“今天六点五十从酒店出发前往黄石公园,黄石公园是美国第一个国家公园….我们游览了瀑布、老忠实…”,同一车坐在我们后面一个家庭,爸爸妈妈带着上小学的女儿出来游历,小姑娘在妈妈的督促下也在写游记,所以像冯老师这么自觉的孩子还是很让我省心的 :)</p>
<h4 id="阿拉斯加"><a href="#阿拉斯加" class="headerlink" title="阿拉斯加"></a>阿拉斯加</h4><p>八天的行程前后路过Las Vegas两次,待了三天两个晚上,然而冯老师每次说起来依然是“阿拉斯加”,Vegas:没能让您记住,是我的错…</p>
<h4 id="KA秀"><a href="#KA秀" class="headerlink" title="KA秀"></a>KA秀</h4><p>回程在Vegas的第二个晚上我们一起去看了KA秀,太阳马戏团真牛逼!三个人都看得非常开心,是很难忘的记忆!</p>
<h4 id="赛跑"><a href="#赛跑" class="headerlink" title="赛跑"></a>赛跑</h4><p>天气好的时候吃过晚饭,我们会一起去环岛跑步,我会challenge老崔来跟我赛跑,像小时候一样,一察觉到老崔加速就玩命往前跑,我觉得现在,不管老崔让我让我,我都能赢哈哈!然后冯老师在后面喊“看着脚下!!”</p>
<h4 id="脚"><a href="#脚" class="headerlink" title="脚"></a>脚</h4><p>今年二老打地铺,我买了两个海绵垫子铺在沙发前面的地毯上,睡得还挺舒服的,除了老崔,每次大家看电视的时候,或者从垫子上光脚到地板上再回来,老崔都痛心疾首的说“脚!脚!脚!那是我睡觉的地方啊,你们每天放脚的地方是我放头的地方啊,唉…” 哈哈哈</p>
<h4 id="跑步去买奶茶"><a href="#跑步去买奶茶" class="headerlink" title="跑步去买奶茶"></a>跑步去买奶茶</h4><p>还是跑步轶事,有一天心血来潮我决定去LIC那家奶茶店去买脏脏茶喝,跟二老一路跑一路等,被问了好几次怎么还没到之后,终于买到了!回程边喝边走,真的超开心,只是以后每次我再做这件事都会有一点点点不开心吧…</p>
<h4 id="两个“土匪”"><a href="#两个“土匪”" class="headerlink" title="两个“土匪”"></a>两个“土匪”</h4><p>收拾行李的时候,老崔一直试图拿走我的冰酒和清酒,老冯一直试图拿走我的香水和鞋,我打趣说“你俩就是两个土匪,吃完喝完还要带走!”</p>
<h4 id="健身达人"><a href="#健身达人" class="headerlink" title="健身达人"></a>健身达人</h4><p>今年楼下健身房翻修,24小时开放不用再找门卫开门了,所以两个人白天在家没事的时候找机会就回去楼下健身,器械都试个遍,比我还6,每天回到家给我炫耀嘲笑我今天又不去运动,真烦人!</p>
<h4 id="阳台会塌"><a href="#阳台会塌" class="headerlink" title="阳台会塌"></a>阳台会塌</h4><p>在没学会去健身房之前,老崔白天在岛上暴走,老冯晚上在阳台跟着视频跳健身操/舞,有一次我看到跟她说“妈你注意点,别把阳台跳塌了”,于是冯老师再也没有去阳台上跳过了…</p>
<h4 id="人家是不是不理你了"><a href="#人家是不是不理你了" class="headerlink" title="人家是不是不理你了"></a>人家是不是不理你了</h4><p>老冯的亲戚给我介绍了一个相亲对象,认识了几天之后二老关心一下进展,我说我们交换了照片,说完老崔问“是不是发完照片,人家就不理你了”,execuse me??有这么嫌弃自己闺女的吗??后来跟老崔下楼买菜的路上我就反复逼问,老崔在“嘿嘿”尬笑了好几下之后躲不过,吞吞吐吐跟我说“人家看你在国外,以为你是国外留学的样子” “国外留学什么样子?” “高端一点” “我不高端吗?” “你看你那照片里穿个t恤…人家应该喜欢穿正装的”,这个逻辑你懂吗?反正我跟老冯都没懂。 </p>
<h4 id="吃"><a href="#吃" class="headerlink" title="吃"></a>吃</h4><p>因为吃跟老冯吵了好多架,在Vegas为了吃InNOut跟着Google map走半天结果走到高速公路边只好无功折返,临走之前想去吃自助,回到纽约带他们去Nyonya吃马来西亚菜,每次我都能跟冯老师吵起来,她每次都是“崔粲,你怎么这么好吃,都已经很胖了!”,我真的好委屈…</p>
<h4 id="没有人洗袜子了"><a href="#没有人洗袜子了" class="headerlink" title="没有人洗袜子了"></a>没有人洗袜子了</h4><p>假期快要结束的时候,依依惜别的情绪已经开始发芽,每次洗完澡把袜子丢在浴室门口的时候,我都要朝正在看电视的老冯痛呼一声“妈!你走了以后就没人给我洗袜子了!”,是啊,真的很让人绝望啊…今天晚上就要自己洗袜子了…</p>
<p>近日气温已经在26/7度,早晚已经有些凉了,窗外的叶子开始泛黄,夏天又悄悄过去了。这个夏天我们仨一起去了一些地方做了很多事情留下了很多美好回忆,我坐在这里间歇抹着眼泪敲完这些字,不得不为今年夏天画上句号了。回到一个人的生活,期待下一个夏天…</p>
</div>
<div class="article-info article-info-index">
<a href="/2019/08/25/又一年夏天,我们仨/" class="archive-article-date">
<time datetime="2019-08-26T00:20:56.000Z" itemprop="datePublished"><i class="icon-clock"></i>2019-08-25</time>
</a>
<div class="article-tag tagcloud">
<i class="icon-price-tags"></i>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/all/">all</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/杂/">杂</a></li></ul>
</div>
<div class="clearfix"></div>
</div>
</div>
</article>
<nav id="page-nav">
<span class="page-number current">1</span><a class="page-number" href="/page/2/">2</a><a class="page-number" href="/page/3/">3</a><a class="page-number" href="/page/4/">4</a><a class="extend next" rel="next" href="/page/2/">Next »</a>
</nav>
</div>
<footer id="footer">
<div class="outer">
<div id="footer-info">
<div class="footer-left">
© 2021 少女翠西
</div>
<div class="footer-right">
<a href="http://hexo.io/" target="_blank">Hexo</a> Theme <a href="https://github.com/litten/hexo-theme-yilia" target="_blank">Yilia</a> by Litten
</div>
</div>
</div>
</footer>
</div>
<script>
var yiliaConfig = {
fancybox: true,
mathjax: false,
animate: true,
isHome: true,
isPost: false,
isArchive: false,
isTag: false,
isCategory: false,
open_in_new: false,
root: "/",
innerArchive: true
}
</script>
<script src="/./main.js"></script>
<div class="tools-col">
<ul class="btn-wrap">
<li class="chose" data-hook="tools-section-all"><span class="text">全部</span><i class="icon-book"></i></li>
<li data-hook="tools-section-tag"><span class="text">标签</span><i class="icon-price-tags"></i></li>
<li data-hook="tools-section-me"><span class="text">我</span><i class="icon-smile"></i></li>
</ul>
<div class="tools-wrap">
<section class="tools-section tools-section-all chose">
</section>
<section class="tools-section tools-section-tag">
<div class="widget tagcloud" id="js-tagcloud">
<a href="/tags/NLP/" style="font-size: 17.5px;">NLP</a> <a href="/tags/all/" style="font-size: 20px;">all</a> <a href="/tags/技术/" style="font-size: 15px;">技术</a> <a href="/tags/机器学习/" style="font-size: 10px;">机器学习</a> <a href="/tags/杂/" style="font-size: 17.5px;">杂</a> <a href="/tags/电影/" style="font-size: 12.5px;">电影</a> <a href="/tags/读书/" style="font-size: 15px;">读书</a> <a href="/tags/随想/" style="font-size: 17.5px;">随想</a>
</div>
</section>
<section class="tools-section tools-section-me">
<div class="aboutme-wrap" id="js-aboutme"><br>一个现实的理想主义者<br></div>
</section>
</div>
</div>
<!-- Root element of PhotoSwipe. Must have class pswp. -->
<div class="pswp" tabindex="-1" role="dialog" aria-hidden="true">
<!-- Background of PhotoSwipe.
It's a separate element as animating opacity is faster than rgba(). -->
<div class="pswp__bg"></div>
<!-- Slides wrapper with overflow:hidden. -->
<div class="pswp__scroll-wrap">
<!-- Container that holds slides.
PhotoSwipe keeps only 3 of them in the DOM to save memory.
Don't modify these 3 pswp__item elements, data is added later on. -->
<div class="pswp__container">
<div class="pswp__item"></div>
<div class="pswp__item"></div>
<div class="pswp__item"></div>
</div>
<!-- Default (PhotoSwipeUI_Default) interface on top of sliding area. Can be changed. -->
<div class="pswp__ui pswp__ui--hidden">
<div class="pswp__top-bar">
<!-- Controls are self-explanatory. Order can be changed. -->
<div class="pswp__counter"></div>
<button class="pswp__button pswp__button--close" title="Close (Esc)"></button>
<button class="pswp__button pswp__button--share" style="display:none" title="Share"></button>
<button class="pswp__button pswp__button--fs" title="Toggle fullscreen"></button>
<button class="pswp__button pswp__button--zoom" title="Zoom in/out"></button>
<!-- Preloader demo http://codepen.io/dimsemenov/pen/yyBWoR -->
<!-- element will get class pswp__preloader--active when preloader is running -->
<div class="pswp__preloader">
<div class="pswp__preloader__icn">
<div class="pswp__preloader__cut">
<div class="pswp__preloader__donut"></div>
</div>
</div>
</div>
</div>
<div class="pswp__share-modal pswp__share-modal--hidden pswp__single-tap">
<div class="pswp__share-tooltip"></div>
</div>
<button class="pswp__button pswp__button--arrow--left" title="Previous (arrow left)">
</button>
<button class="pswp__button pswp__button--arrow--right" title="Next (arrow right)">
</button>
<div class="pswp__caption">
<div class="pswp__caption__center"></div>
</div>
</div>
</div>
</div>
</div>
</body>
</html>