-
Notifications
You must be signed in to change notification settings - Fork 0
/
ref.clean.bib
6715 lines (6146 loc) · 213 KB
/
ref.clean.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@inproceedings{shah2020predictive,
author = {Shah, Deven Santosh and
Schwartz, H. Andrew and
Hovy, Dirk},
booktitle = {Proc. of ACL},
pages = {5248--5264},
title = {Predictive Biases in Natural Language Processing Models: A Conceptual Framework and Overview},
year = {2020}
}
@inproceedings{wang2020covost,
author = {Wang, Changhan and
Pino, Juan and
Wu, Anne and
Gu, Jiatao},
booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference},
isbn = {979-10-95546-34-4},
language = {English},
pages = {4197--4203},
title = {{C}o{V}o{ST}: A Diverse Multilingual Speech-To-Text Translation Corpus},
year = {2020}
}
@Article{cattoni2021must,
author = {Roldano Cattoni and Mattia Antonino {Di Gangi} and Luisa Bentivogli and Matteo Negri and Marco Turchi},
journal = {Computer Speech and Language},
title = {MuST-C: A multilingual corpus for end-to-end speech translation},
year = {2021},
issn = {0885-2308},
pages = {101155},
volume = {66},
abstract = {End-to-end spoken language translation (SLT) has recently gained popularity thanks to the advancement of sequence to sequence learning in its two parent tasks: automatic speech recognition (ASR) and machine translation (MT). However, research in the field has to confront with the scarcity of publicly available corpora to train data-hungry neural networks. Indeed, while traditional cascade solutions can build on sizable ASR and MT training data for a variety of languages, the available SLT corpora suitable for end-to-end training are few, typically small and of limited language coverage. We contribute to fill this gap by presenting MuST-C, a large and freely available Multilingual Speech Translation Corpus built from English TED Talks. Its unique features include: i) language coverage and diversity (from English into 14 languages from different families), ii) size (at least 237 hours of transcribed recordings per language, 430 on average), iii) variety of topics and speakers, and iv) data quality. Besides describing the corpus creation methodology and discussing the outcomes of empirical and manual quality evaluations, we present baseline results computed with strong systems on each language direction covered by MuST-C.},
keywords = {Spoken language translation, Multilingual corpus},
}
@InProceedings{hovy2020you,
author = {Hovy, Dirk and Bianchi, Federico and Fornaciari, Tommaso},
booktitle = {Proc. of ACL},
title = {{``}You Sound Just Like Your Father{''} Commercial Machine Translation Systems Include Stylistic Biases},
year = {2020},
pages = {1686--1690},
}
@inproceedings{vanmassenhove2018getting,
author = {Vanmassenhove, Eva and
Hardmeier, Christian and
Way, Andy},
booktitle = {Proc. of EMNLP},
pages = {3003--3008},
title = {Getting Gender Right in Neural Machine Translation},
year = {2018}
}
@article{czarnowska2021quantifying,
author = {Czarnowska, Paula and
Vyas, Yogarshi and
Shah, Kashif},
journal = {Transactions of the Association for Computational Linguistics},
pages = {1249--1267},
title = {Quantifying Social Biases in {NLP}: A Generalization and Empirical Comparison of Extrinsic Fairness Metrics},
volume = {9},
year = {2021}
}
@inproceedings{zhu2021counter,
author = {Zhu, Yaoming and
Feng, Jiangtao and
Zhao, Chengqi and
Wang, Mingxuan and
Li, Lei},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
pages = {2812--2823},
title = {Counter-Interference Adapter for Multilingual Machine Translation},
year = {2021}
}
@inproceedings{nadeem2021stereoset,
author = {Nadeem, Moin and
Bethke, Anna and
Reddy, Siva},
booktitle = {Proc. of ACL},
pages = {5356--5371},
title = {{S}tereo{S}et: Measuring stereotypical bias in pretrained language models},
year = {2021}
}
@inproceedings{peng2020reducing,
author = {Peng, Xiangyu and
Li, Siyan and
Frazier, Spencer and
Riedl, Mark},
booktitle = {Proceedings of the 13th International Conference on Natural Language Generation},
pages = {374--383},
title = {Reducing Non-Normative Text Generation from Language Models},
year = {2020}
}
@inproceedings{groenwold2020investigating,
author = {Groenwold, Sophie and
Ou, Lily and
Parekh, Aesha and
Honnavalli, Samhita and
Levy, Sharon and
Mirza, Diba and
Wang, William Yang},
booktitle = {Proc. of EMNLP},
pages = {5877--5883},
title = {Investigating {A}frican-{A}merican {V}ernacular {E}nglish in Transformer-Based Text Generation},
year = {2020}
}
@inproceedings{liang2020monolingual,
author = {Liang, Sheng and
Dufter, Philipp and
Sch{\"u}tze, Hinrich},
booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
pages = {5082--5093},
title = {Monolingual and Multilingual Reduction of Gender Bias in Contextualized Representations},
year = {2020}
}
@inproceedings{may2019measuring,
author = {May, Chandler and
Wang, Alex and
Bordia, Shikha and
Bowman, Samuel R. and
Rudinger, Rachel},
booktitle = {Proc. of NAACL-HLT},
pages = {622--628},
title = {On Measuring Social Biases in Sentence Encoders},
year = {2019}
}
@inproceedings{gonen2019lipstick,
author = {Gonen, Hila and
Goldberg, Yoav},
booktitle = {Proc. of NAACL-HLT},
pages = {609--614},
title = {Lipstick on a Pig: {D}ebiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them},
year = {2019}
}
@Article{caliskan2010semantics,
author = {Aylin Caliskan and Joanna J. Bryson and Arvind Narayanan},
journal = {ArXiv preprint},
title = {Semantics derived automatically from language corpora contain human-like biases},
year = {2010},
volume = {abs/10.1126},
}
@inproceedings{bolukbasi2016man,
author = {Tolga Bolukbasi and
Kai{-}Wei Chang and
James Y. Zou and
Venkatesh Saligrama and
Adam Tauman Kalai},
booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
on Neural Information Processing Systems 2016, December 5-10, 2016,
Barcelona, Spain},
editor = {Daniel D. Lee and
Masashi Sugiyama and
Ulrike von Luxburg and
Isabelle Guyon and
Roman Garnett},
pages = {4349--4357},
title = {Man is to Computer Programmer as Woman is to Homemaker? Debiasing
Word Embeddings},
year = {2016}
}
@inproceedings{stanovsky2019evaluating,
author = {Stanovsky, Gabriel and
Smith, Noah A. and
Zettlemoyer, Luke},
booktitle = {Proc. of ACL},
pages = {1679--1684},
title = {Evaluating Gender Bias in Machine Translation},
year = {2019}
}
@inproceedings{anastasopoulos2016unsupervised,
author = {Anastasopoulos, Antonios and
Chiang, David and
Duong, Long},
booktitle = {Proc. of EMNLP},
pages = {1255--1263},
title = {An Unsupervised Probability Model for Speech-to-Translation Alignment of Low-Resource Languages},
year = {2016}
}
@inproceedings{anastasopoulos2018tied,
author = {Anastasopoulos, Antonios and
Chiang, David},
booktitle = {Proc. of NAACL-HLT},
pages = {82--91},
title = {Tied Multitask Learning for Neural Speech Translation},
year = {2018}
}
@inproceedings{baevski2020wav2vec,
author = {Alexei Baevski and
Yuhao Zhou and
Abdelrahman Mohamed and
Michael Auli},
booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
title = {wav2vec 2.0: {A} Framework for Self-Supervised Learning of Speech
Representations},
year = {2020}
}
@inproceedings{bahar2019comparative,
author = {Bahar, Parnia and Bieschke, Tobias and Ney, Hermann},
booktitle = {Proc. of ASRU},
organization = {IEEE},
pages = {792--799},
title = {A comparative study on end-to-end speech to text translation},
year = {2019}
}
@inproceedings{bahar2019using,
author = {Bahar, Parnia and
Zeyer, Albert and
Schl{\"u}ter, Ralf and
Ney, Hermann},
booktitle = {Proceedings of the 16th International Conference on Spoken Language Translation},
title = {On Using {S}pec{A}ugment for End-to-End Speech Translation},
year = {2019}
}
@inproceedings{bansal2019pre,
author = {Bansal, Sameer and
Kamper, Herman and
Livescu, Karen and
Lopez, Adam and
Goldwater, Sharon},
booktitle = {Proc. of NAACL-HLT},
pages = {58--68},
title = {Pre-training on high-resource speech recognition improves low-resource speech-to-text translation},
year = {2019}
}
@inproceedings{battenberg2017exploring,
author = {Battenberg, Eric and Chen, Jitong and Child, Rewon and Coates, Adam and Li, Yashesh Gaur Yi and Liu, Hairong and Satheesh, Sanjeev and Sriram, Anuroop and Zhu, Zhenyao},
booktitle = {Proc. of ASRU},
organization = {IEEE},
pages = {206--213},
title = {Exploring neural transducers for end-to-end speech recognition},
year = {2017}
}
@inproceedings{beck2019neural,
author = {Beck, Daniel and
Cohn, Trevor and
Haffari, Gholamreza},
booktitle = {Proceedings of the Thirteenth Workshop on Graph-Based Methods for Natural Language Processing (TextGraphs-13)},
pages = {26--31},
title = {Neural Speech Translation using Lattice Transformations and Graph Networks},
year = {2019}
}
@inproceedings{berard2016listen,
author = {B{\'e}rard, Alexandre and Pietquin, Olivier and Servan, Christophe and Besacier, Laurent},
booktitle = {NIPS workshop on End-to-end Learning for Speech and Audio Processing},
title = {Listen and translate: A proof of concept for end-to-end speech-to-text translation},
year = {2016}
}
@inproceedings{berard2018end,
author = {Alexandre Berard and
Laurent Besacier and
Ali Can Kocabiyikoglu and
Olivier Pietquin},
booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
pages = {6224--6228},
title = {End-to-End Automatic Speech Translation of Audiobooks},
year = {2018}
}
@inproceedings{bertoldi2005new,
author = {Bertoldi, Nicola and Federico, Marcello},
booktitle = {Proc. of ARSU},
organization = {IEEE},
pages = {86--91},
title = {A new decoder for spoken language translation based on confusion networks},
year = {2005}
}
@inproceedings{besacier2006towards,
author = {Besacier, Laurent and Zhou, Bowen and Gao, Yuqing},
booktitle = {Proc. of SLT},
organization = {IEEE},
pages = {222--225},
title = {Towards speech translation of non written languages},
year = {2006}
}
@inproceedings{biadsy2019parrotron,
author = {Fadi Biadsy and
Ron J. Weiss and
Pedro J. Moreno and
Dimitri Kanvesky and
Ye Jia},
booktitle = {Proc. of INTERSPEECH},
editor = {Gernot Kubin and
Zdravko Kacic},
pages = {4115--4119},
title = {Parrotron: An End-to-End Speech-to-Speech Conversion Model and its
Applications to Hearing-Impaired Speech and Speech Separation},
year = {2019}
}
@article{binder2003neural,
author = {Binder, Jeffrey R and McKiernan, Kristen A and Parsons, Melanie E and Westbury, Chris F and Possing, Edward T and Kaufman, Jacqueline N and Buchanan, Lori},
journal = {Journal of cognitive neuroscience},
number = {3},
pages = {372--393},
title = {Neural correlates of lexical access during visual word recognition},
volume = {15},
year = {2003}
}
@article{blank2002speech,
author = {Blank, S Catrin and Scott, Sophie K and Murphy, Kevin and Warburton, Elizabeth and Wise, Richard JS},
journal = {Brain},
number = {8},
pages = {1829--1838},
title = {Speech production: Wernicke, Broca and beyond},
volume = {125},
year = {2002}
}
@inproceedings{callisonburch2009findings,
author = {Callison-Burch, Chris and
Koehn, Philipp and
Monz, Christof and
Schroeder, Josh},
booktitle = {Proceedings of the Fourth Workshop on Statistical Machine Translation},
pages = {1--28},
title = {Findings of the 2009 {W}orkshop on {S}tatistical {M}achine {T}ranslation},
year = {2009}
}
@inproceedings{bojar2016findings,
author = {Bojar, Ond{\v{r}}ej and
Chatterjee, Rajen and
Federmann, Christian and
Graham, Yvette and
Haddow, Barry and
Huck, Matthias and
Jimeno Yepes, Antonio and
Koehn, Philipp and
Logacheva, Varvara and
Monz, Christof and
Negri, Matteo and
N{\'e}v{\'e}ol, Aur{\'e}lie and
Neves, Mariana and
Popel, Martin and
Post, Matt and
Rubino, Raphael and
Scarton, Carolina and
Specia, Lucia and
Turchi, Marco and
Verspoor, Karin and
Zampieri, Marcos},
booktitle = {Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers},
pages = {131--198},
title = {Findings of the 2016 Conference on Machine Translation},
year = {2016}
}
@inproceedings{cettolo2014report,
author = {Cettolo, Mauro and Niehues, Jan and St{\"u}ker, Sebastian and Bentivogli, Luisa and Federico, Marcello},
booktitle = {Proc. of IWSLT},
title = {Report on the 11th iwslt evaluation campaign, iwslt 2014},
volume = {57},
year = {2014}
}
@article{chen2016phone,
author = {Chen, Zhehuai and Zhuang, Yimeng and Qian, Yanmin and Yu, Kai},
journal = {TASLP},
number = {1},
pages = {90--101},
title = {Phone synchronous speech recognition with ctc lattices},
volume = {25},
year = {2016}
}
@inproceedings{cheng2018towards,
author = {Cheng, Yong and
Tu, Zhaopeng and
Meng, Fandong and
Zhai, Junjie and
Liu, Yang},
booktitle = {Proc. of ACL},
pages = {1756--1766},
title = {Towards Robust Neural Machine Translation},
year = {2018}
}
@inproceedings{cheng2019breaking,
author = {Cheng, Qiao and
Fan, Meiyuan and
Han, Yaqian and
Huang, Jin and
Duan, Yitao},
booktitle = {Proceedings of the 16th International Conference on Spoken Language Translation},
title = {Breaking the Data Barrier: Towards Robust Speech Translation via Adversarial Stability Training},
year = {2019}
}
@article{collobert2011natural,
author = {Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel},
journal = {JMLR},
number = {Aug},
pages = {2493--2537},
title = {Natural language processing (almost) from scratch},
volume = {12},
year = {2011}
}
@inproceedings{dai2019transformer,
author = {Dai, Zihang and
Yang, Zhilin and
Yang, Yiming and
Carbonell, Jaime and
Le, Quoc and
Salakhutdinov, Ruslan},
booktitle = {Proc. of ACL},
pages = {2978--2988},
title = {Transformer-{XL}: Attentive Language Models beyond a Fixed-Length Context},
year = {2019}
}
@inproceedings{devlin2019bert,
author = {Devlin, Jacob and
Chang, Ming-Wei and
Lee, Kenton and
Toutanova, Kristina},
booktitle = {Proc. of NAACL-HLT},
pages = {4171--4186},
title = {{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
year = {2019}
}
@InProceedings{gangi2019adapting,
author = {Mattia Antonino Di Gangi and Matteo Negri and Marco Turchi},
booktitle = {Proc. of INTERSPEECH},
title = {Adapting Transformer to End-to-End Spoken Language Translation},
year = {2019},
editor = {Gernot Kubin and Zdravko Kacic},
pages = {1133--1137},
}
@inproceedings{digangi2019enhancing,
author = {Di Gangi, Mattia Antonino and
Negri, Matteo and
Cattoni, Roldano and
Dessi, Roberto and
Turchi, Marco},
booktitle = {Proceedings of Machine Translation Summit XVII: Research Track},
pages = {21--31},
title = {Enhancing Transformer for End-to-end Speech-to-Text Translation},
year = {2019}
}
@inproceedings{digangi2019must,
author = {Di Gangi, Mattia A. and
Cattoni, Roldano and
Bentivogli, Luisa and
Negri, Matteo and
Turchi, Marco},
booktitle = {Proc. of NAACL-HLT},
pages = {2012--2017},
title = {{M}u{ST}-{C}: a {M}ultilingual {S}peech {T}ranslation {C}orpus},
year = {2019}
}
@inproceedings{gangi2020instance,
author = {Mattia Antonino Di Gangi and
Viet{-}Nhat Nguyen and
Matteo Negri and
Marco Turchi},
booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
pages = {7914--7918},
title = {Instance-based Model Adaptation for Direct Speech Translation},
year = {2020}
}
@inproceedings{dixon2011investigation,
author = {Dixon, Paul R and Finch, Andrew and Hori, Chiori and Kashioka, Hideki},
booktitle = {Proc. of IWSLT},
title = {Investigation on the effects of ASR tuning on speech translation performance},
year = {2011}
}
@inproceedings{dong2018speech,
author = {Linhao Dong and
Shuang Xu and
Bo Xu},
booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
pages = {5884--5888},
title = {Speech-Transformer: {A} No-Recurrence Sequence-to-Sequence Model for
Speech Recognition},
year = {2018}
}
@inproceedings{dong2021consecutive,
author = {Dong, Qianqian and Wang, Mingxuan and Zhou, Hao and Xu, Shuang and Xu, Bo and Li, Lei},
booktitle = {Proc. of AAAI},
title = {Consecutive Decoding for Speech-to-text Translation},
year = {2021}
}
@inproceedings{dong2021listen,
author = {Dong, Qianqian and Ye, Rong and Wang, Mingxuan and Zhou, Hao and Xu, Shuang and Xu, Bo and Li, Lei},
booktitle = {Proc. of AAAI},
number = {14},
pages = {12749--12759},
title = {Listen, Understand and Translate: Triple Supervision Decouples End-to-end Speech-to-text Translation},
volume = {35},
year = {2021}
}
@inproceedings{duong2016attentional,
author = {Duong, Long and
Anastasopoulos, Antonios and
Chiang, David and
Bird, Steven and
Cohn, Trevor},
booktitle = {Proc. of NAACL-HLT},
pages = {949--959},
title = {An Attentional Model for Speech Translation Without Transcription},
year = {2016}
}
@inproceedings{dyer2013simple,
author = {Dyer, Chris and
Chahuneau, Victor and
Smith, Noah A.},
booktitle = {Proc. of NAACL-HLT},
pages = {644--648},
title = {A Simple, Fast, and Effective Reparameterization of {IBM} Model 2},
year = {2013}
}
@inproceedings{fitzgerald2009reconstructing,
author = {Fitzgerald, Erin and
Hall, Keith and
Jelinek, Frederick},
booktitle = {Proceedings of the 12th Conference of the {E}uropean Chapter of the {ACL} ({EACL} 2009)},
pages = {255--263},
title = {Reconstructing False Start Errors in Spontaneous Speech Text},
year = {2009}
}
@article{froyen2009long,
abstract = {In transparent alphabetic languages, the expected standard for complete acquisition of letter-speech sound associations is within one year of reading instruction. The neural mechanisms underlying the acquisition of letter-speech sound associations have, however, hardly been investigated. The present article describes an ERP study with beginner and advanced readers in which the influence of letters on speech sound processing is investigated by comparing the MMN to speech sounds presented in isolation with the MMN to speech sounds accompanied by letters. Furthermore, SOA between letter and speech sound presentation was manipulated in order to investigate the development of the temporal window of integration for letter-speech sound processing. Beginner readers, despite one year of reading instruction, showed no early letter-speech sound integration, that is, no influence of the letter on the evocation of the MMN to the speech sound. Only later in the difference wave, at 650 msec, was an influence of the letter on speech sound processing revealed. Advanced readers, with 4 years of reading instruction, showed early and automatic letter-speech sound processing as revealed by an enhancement of the MMN amplitude, however, at a different temporal window of integration in comparison with experienced adult readers. The present results indicate a transition from mere association in beginner readers to more automatic, but still not "adult-like," integration in advanced readers. In contrast to general assumptions, the present study provides evidence for an extended development of letter-speech sound integration.},
author = {Froyen, Dries J. W. and Bonte, Milene L. and van Atteveldt, Nienke and Blomert, Leo},
issn = {0898-929X},
issue_date = {March 2009},
journal = {J. Cognitive Neuroscience},
number = {3},
numpages = {14},
pages = {567–580},
title = {The Long Road to Automation: Neurocognitive Development of Letter-Speech Sound Processing},
volume = {21},
year = {2009}
}
@phdthesis{fuegen2008system,
author = {F{\"u}gen, Christian},
school = {Verlag nicht ermittelbar},
title = {A system for simultaneous translation of lectures and speeches},
year = {2008}
}
@inproceedings{graves2006connectionist,
author = {Alex Graves and
Santiago Fern{\'{a}}ndez and
Faustino J. Gomez and
J{\"{u}}rgen Schmidhuber},
booktitle = {Proc. of ICML},
editor = {William W. Cohen and
Andrew W. Moore},
pages = {369--376},
series = {{ACM} International Conference Proceeding Series},
title = {Connectionist temporal classification: labelling unsegmented sequence
data with recurrent neural networks},
volume = {148},
year = {2006}
}
@inproceedings{he2011why,
author = {He, Xiaodong and Deng, Li and Acero, Alex},
booktitle = {Proc. of ICASSP},
organization = {IEEE},
pages = {5632--5635},
title = {Why word error rate is not a good metric for speech recognizer training for the speech translation task?},
year = {2011}
}
@inproceedings{inaguma2019multilingual,
author = {Inaguma, Hirofumi and Duh, Kevin and Kawahara, Tatsuya and Watanabe, Shinji},
booktitle = {Proc. of ASRU},
organization = {IEEE},
pages = {570--577},
title = {Multilingual end-to-end speech translation},
year = {2019}
}
@inproceedings{inaguma2020espnet,
author = {Inaguma, Hirofumi and
Kiyono, Shun and
Duh, Kevin and
Karita, Shigeki and
Yalta, Nelson and
Hayashi, Tomoki and
Watanabe, Shinji},
booktitle = {Proc. of ACL},
pages = {302--311},
title = {{ESP}net-{ST}: All-in-One Speech Translation Toolkit},
year = {2020}
}
@inproceedings{indurthi2020data,
author = {Indurthi, Sathish and Han, Houjeung and Lakumarapu, Nikhil Kumar and Lee, Beomseok and Chung, Insoo and Kim, Sangha and Kim, Chanwoo},
booktitle = {Proc. of ICASSP},
organization = {IEEE},
title = {Data efficient direct speech-to-text translation with modality agnostic meta-learning},
year = {2020}
}
@InProceedings{cettolo2015iwslt,
author = {Cettolo, Mauro and Niehues, Jan and St{\"u}ker, Sebastian and Bentivogli, Luisa and Cattoni, Roldano and Federico, Marcello},
booktitle = {Proceedings of the 12th International Workshop on Spoken Language Translation: Evaluation Campaign},
title = {The {IWSLT} 2015 Evaluation Campaign},
year = {2015},
pages = {2--14},
}
@inproceedings{jawahar2019what,
author = {Jawahar, Ganesh and
Sagot, Beno{\^\i}t and
Seddah, Djam{\'e}},
booktitle = {Proc. of ACL},
pages = {3651--3657},
title = {What Does {BERT} Learn about the Structure of Language?},
year = {2019}
}
@inproceedings{jia2019direct,
author = {Ye Jia and
Ron J. Weiss and
Fadi Biadsy and
Wolfgang Macherey and
Melvin Johnson and
Zhifeng Chen and
Yonghui Wu},
booktitle = {Proc. of INTERSPEECH},
editor = {Gernot Kubin and
Zdravko Kacic},
pages = {1123--1127},
title = {Direct Speech-to-Speech Translation with a Sequence-to-Sequence Model},
year = {2019}
}
@inproceedings{jia2019leveraging,
author = {Ye Jia and
Melvin Johnson and
Wolfgang Macherey and
Ron J. Weiss and
Yuan Cao and
Chung{-}Cheng Chiu and
Naveen Ari and
Stella Laurenzo and
Yonghui Wu},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
pages = {7180--7184},
title = {Leveraging Weakly Supervised Data to Improve End-to-end Speech-to-text
Translation},
year = {2019}
}
@inproceedings{kannan2018analysis,
author = {Anjuli Kannan and
Yonghui Wu and
Patrick Nguyen and
Tara N. Sainath and
Zhifeng Chen and
Rohit Prabhavalkar},
booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
pages = {5824--5828},
title = {An Analysis of Incorporating an External Language Model into a Sequence-to-Sequence
Model},
year = {2018}
}
@inproceedings{kano2017structured,
author = {Takatomo Kano and
Sakriani Sakti and
Satoshi Nakamura},
booktitle = {Proc. of INTERSPEECH},
editor = {Francisco Lacerda},
pages = {2630--2634},
title = {Structured-Based Curriculum Learning for End-to-End English-Japanese
Speech Translation},
year = {2017}
}
@inproceedings{kiros2015skip,
author = {Ryan Kiros and
Yukun Zhu and
Ruslan Salakhutdinov and
Richard S. Zemel and
Raquel Urtasun and
Antonio Torralba and
Sanja Fidler},
booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference
on Neural Information Processing Systems 2015, December 7-12, 2015,
Montreal, Quebec, Canada},
editor = {Corinna Cortes and
Neil D. Lawrence and
Daniel D. Lee and
Masashi Sugiyama and
Roman Garnett},
pages = {3294--3302},
title = {Skip-Thought Vectors},
year = {2015}
}
@inproceedings{kocabiyikoglu2018augmenting,
author = {Kocabiyikoglu, Ali Can and
Besacier, Laurent and
Kraif, Olivier},
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)},
title = {Augmenting Librispeech with {F}rench Translations: A Multimodal Corpus for Direct Speech Translation Evaluation},
year = {2018}
}
@inproceedings{conneau2019cross,
author = {Alexis Conneau and
Guillaume Lample},
booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
pages = {7057--7067},
title = {Cross-lingual Language Model Pretraining},
year = {2019}
}
@inproceedings{lavie1996multi,
author = {Lavie, Alon and
Gates, Donna and
Gavalda, Marsal and
Mayfield, Laura and
Waibel, Alex and
Levin, Lori},
booktitle = {{COLING} 1996 Volume 1: The 16th International Conference on Computational Linguistics},
title = {Multi-lingual Translation of Spontaneously Spoken Language in a Limited Domain},
year = {1996}
}
@inproceedings{le2014distributed,
author = {Quoc V. Le and
Tom{\'{a}}s Mikolov},
booktitle = {Proc. of ICML},
pages = {1188--1196},
series = {{JMLR} Workshop and Conference Proceedings},
title = {Distributed Representations of Sentences and Documents},
volume = {32},
year = {2014}
}
@inproceedings{le2020dual,
author = {Le, Hang and
Pino, Juan and
Wang, Changhan and
Gu, Jiatao and
Schwab, Didier and
Besacier, Laurent},
booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
pages = {3520--3533},
title = {Dual-decoder Transformer for Joint Automatic Speech Recognition and Multilingual Speech Translation},
year = {2020}
}
@inproceedings{lison2016opensubtitles2016,
author = {Lison, Pierre and
Tiedemann, J{\"o}rg},
booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)},
pages = {923--929},
title = {{O}pen{S}ubtitles2016: Extracting Large Parallel Corpora from Movie and {TV} Subtitles},
year = {2016}
}
@inproceedings{liu2003use,
author = {Liu, Fu-Hua and Gu, Liang and Gao, Yuqing and Picheny, Michael},
booktitle = {{Proc. of ICASSP}},
organization = {IEEE},
pages = {I--I},
title = {Use of statistical N-gram models in natural language generation for machine translation},
volume = {1},
year = {2003}
}
@article{liu2018ustc,
author = {Liu, Dan and Liu, Junhua and Guo, Wu and Xiong, Shifu and Ma, Zhiqiang and Song, Rui and Wu, Chongliang and Liu, Quan},
journal = {ArXiv preprint},
title = {The USTC-NEL Speech Translation system at Proc. of IWSLT 2018},
volume = {abs/1812.02455},
year = {2018}
}
@inproceedings{liu2019end,
author = {Yuchen Liu and
Hao Xiong and
Jiajun Zhang and
Zhongjun He and
Hua Wu and
Haifeng Wang and
Chengqing Zong},
booktitle = {Proc. of INTERSPEECH},
editor = {Gernot Kubin and
Zdravko Kacic},
pages = {1128--1132},
title = {End-to-End Speech Translation with Knowledge Distillation},
year = {2019}
}
@article{liu2020bridging,
author = {Liu, Yuchen and Zhu, Junnan and Zhang, Jiajun and Zong, Chengqing},
journal = {ArXiv preprint},
title = {Bridging the Modality Gap for Speech-to-Text Translation},
volume = {abs/2010.14920},
year = {2020}
}
@inproceedings{lu2018neural,
author = {Lu, Yichao and
Keung, Phillip and
Ladhak, Faisal and
Bhardwaj, Vikas and
Zhang, Shaonan and
Sun, Jason},
booktitle = {Proceedings of the Third Conference on Machine Translation: Research Papers},
pages = {84--92},
title = {A neural interlingua for multilingual machine translation},
year = {2018}
}
@inproceedings{lugosch2019speech,
author = {Loren Lugosch and
Mirco Ravanelli and
Patrick Ignoto and
Vikrant Singh Tomar and
Yoshua Bengio},
booktitle = {Proc. of INTERSPEECH},
editor = {Gernot Kubin and
Zdravko Kacic},
pages = {814--818},
title = {Speech Model Pre-Training for End-to-End Spoken Language Understanding},
year = {2019}
}
@inproceedings{matusov2005phrase,
author = {Matusov, Evgeny and Ney, Hermann and Schluter, Ralph},
booktitle = {Proc. of ASRU},
organization = {IEEE},
pages = {110--115},
title = {Phrase-based translation of speech recognizer word lattices using loglinear model combination},
year = {2005}
}
@inproceedings{matusov2006automatic,
author = {Matusov, Evgeny and
Mauser, Arne and
Ney, Hermann},
booktitle = {Proceedings of the Third International Workshop on Spoken Language Translation: Papers},
title = {Automatic sentence segmentation and punctuation prediction for spoken language translation},
year = {2006}
}
@inproceedings{matusov2008spoken,
author = {Matusov, Evgeny and Hoffmeister, Bj{\"o}rn and Ney, Hermann},
booktitle = {Proc. of INTERSPEECH},
title = {Spoken Language Translation Systems************ ASR Word Lattice Translation with Exhaustive Reordering is Possible},
year = {2008}
}
@inproceedings{mikolov2013distributed,
author = {Tom{\'{a}}s Mikolov and
Ilya Sutskever and
Kai Chen and
Gregory S. Corrado and
Jeffrey Dean},
booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual
Conference on Neural Information Processing Systems 2013. Proceedings
of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
editor = {Christopher J. C. Burges and
L{\'{e}}on Bottou and
Zoubin Ghahramani and
Kilian Q. Weinberger},
pages = {3111--3119},
title = {Distributed Representations of Words and Phrases and their Compositionality},
year = {2013}
}
@inproceedings{ott2019fairseq,
author = {Ott, Myle and
Edunov, Sergey and
Baevski, Alexei and
Fan, Angela and
Gross, Sam and
Ng, Nathan and
Grangier, David and
Auli, Michael},
booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics (Demonstrations)},
pages = {48--53},
title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},
year = {2019}
}
@inproceedings{papineni2002bleu,
author = {Papineni, Kishore and
Roukos, Salim and
Ward, Todd and
Zhu, Wei-Jing},
booktitle = {Proc. of ACL},
pages = {311--318},
title = {{B}leu: a Method for Automatic Evaluation of Machine Translation},
year = {2002}
}
@inproceedings{park2019specaugment,
author = {Daniel S. Park and
William Chan and
Yu Zhang and
Chung{-}Cheng Chiu and
Barret Zoph and
Ekin D. Cubuk and
Quoc V. Le},
booktitle = {Proc. of INTERSPEECH},
editor = {Gernot Kubin and
Zdravko Kacic},
pages = {2613--2617},
title = {SpecAugment: {A} Simple Data Augmentation Method for Automatic Speech
Recognition},
year = {2019}
}
@inproceedings{peitz2012spoken,
author = {Peitz, Stephan and
Wiesler, Simon and
Nu{\ss}baum-Thom, Markus and
Ney, Hermann},
booktitle = {Proceedings of the 9th International Workshop on Spoken Language Translation: Papers},
pages = {276--283},
title = {Spoken language translation using automatically transcribed text in training},
year = {2012}
}
@inproceedings{pennington2014glove,
author = {Pennington, Jeffrey and
Socher, Richard and
Manning, Christopher},
booktitle = {Proc. of EMNLP},
pages = {1532--1543},
title = {{G}lo{V}e: Global Vectors for Word Representation},
year = {2014}
}
@inproceedings{peters2018deep,
author = {Peters, Matthew E. and
Neumann, Mark and
Iyyer, Mohit and
Gardner, Matt and
Clark, Christopher and
Lee, Kenton and
Zettlemoyer, Luke},
booktitle = {Proc. of NAACL-HLT},
pages = {2227--2237},