ref.clean.bib

@inproceedings{shah2020predictive,
 author = {Shah, Deven Santosh  and
Schwartz, H. Andrew  and
Hovy, Dirk},
 booktitle = {Proc. of ACL},
 pages = {5248--5264},
 title = {Predictive Biases in Natural Language Processing Models: A Conceptual Framework and Overview},
 year = {2020}
}

@inproceedings{wang2020covost,
 author = {Wang, Changhan  and
Pino, Juan  and
Wu, Anne  and
Gu, Jiatao},
 booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference},
 isbn = {979-10-95546-34-4},
 language = {English},
 pages = {4197--4203},
 title = {{C}o{V}o{ST}: A Diverse Multilingual Speech-To-Text Translation Corpus},
 year = {2020}
}

@Article{cattoni2021must,
  author   = {Roldano Cattoni and Mattia Antonino {Di Gangi} and Luisa Bentivogli and Matteo Negri and Marco Turchi},
  journal  = {Computer Speech and Language},
  title    = {MuST-C: A multilingual corpus for end-to-end speech translation},
  year     = {2021},
  issn     = {0885-2308},
  pages    = {101155},
  volume   = {66},
  abstract = {End-to-end spoken language translation (SLT) has recently gained popularity thanks to the advancement of sequence to sequence learning in its two parent tasks: automatic speech recognition (ASR) and machine translation (MT). However, research in the field has to confront with the scarcity of publicly available corpora to train data-hungry neural networks. Indeed, while traditional cascade solutions can build on sizable ASR and MT training data for a variety of languages, the available SLT corpora suitable for end-to-end training are few, typically small and of limited language coverage. We contribute to fill this gap by presenting MuST-C, a large and freely available Multilingual Speech Translation Corpus built from English TED Talks. Its unique features include: i) language coverage and diversity (from English into 14 languages from different families), ii) size (at least 237 hours of transcribed recordings per language, 430 on average), iii) variety of topics and speakers, and iv) data quality. Besides describing the corpus creation methodology and discussing the outcomes of empirical and manual quality evaluations, we present baseline results computed with strong systems on each language direction covered by MuST-C.},
  keywords = {Spoken language translation, Multilingual corpus},
}

@InProceedings{hovy2020you,
  author    = {Hovy, Dirk and Bianchi, Federico and Fornaciari, Tommaso},
  booktitle = {Proc. of ACL},
  title     = {{``}You Sound Just Like Your Father{''} Commercial Machine Translation Systems Include Stylistic Biases},
  year      = {2020},
  pages     = {1686--1690},
}

@inproceedings{vanmassenhove2018getting,
 author = {Vanmassenhove, Eva  and
Hardmeier, Christian  and
Way, Andy},
 booktitle = {Proc. of EMNLP},
 pages = {3003--3008},
 title = {Getting Gender Right in Neural Machine Translation},
 year = {2018}
}

@article{czarnowska2021quantifying,
 author = {Czarnowska, Paula  and
Vyas, Yogarshi  and
Shah, Kashif},
 journal = {Transactions of the Association for Computational Linguistics},
 pages = {1249--1267},
 title = {Quantifying Social Biases in {NLP}: A Generalization and Empirical Comparison of Extrinsic Fairness Metrics},
 volume = {9},
 year = {2021}
}

@inproceedings{zhu2021counter,
 author = {Zhu, Yaoming  and
Feng, Jiangtao  and
Zhao, Chengqi  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
 pages = {2812--2823},
 title = {Counter-Interference Adapter for Multilingual Machine Translation},
 year = {2021}
}

@inproceedings{nadeem2021stereoset,
 author = {Nadeem, Moin  and
Bethke, Anna  and
Reddy, Siva},
 booktitle = {Proc. of ACL},
 pages = {5356--5371},
 title = {{S}tereo{S}et: Measuring stereotypical bias in pretrained language models},
 year = {2021}
}

@inproceedings{peng2020reducing,
 author = {Peng, Xiangyu  and
Li, Siyan  and
Frazier, Spencer  and
Riedl, Mark},
 booktitle = {Proceedings of the 13th International Conference on Natural Language Generation},
 pages = {374--383},
 title = {Reducing Non-Normative Text Generation from Language Models},
 year = {2020}
}

@inproceedings{groenwold2020investigating,
 author = {Groenwold, Sophie  and
Ou, Lily  and
Parekh, Aesha  and
Honnavalli, Samhita  and
Levy, Sharon  and
Mirza, Diba  and
Wang, William Yang},
 booktitle = {Proc. of EMNLP},
 pages = {5877--5883},
 title = {Investigating {A}frican-{A}merican {V}ernacular {E}nglish in Transformer-Based Text Generation},
 year = {2020}
}

@inproceedings{liang2020monolingual,
 author = {Liang, Sheng  and
Dufter, Philipp  and
Sch{\"u}tze, Hinrich},
 booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
 pages = {5082--5093},
 title = {Monolingual and Multilingual Reduction of Gender Bias in Contextualized Representations},
 year = {2020}
}

@inproceedings{may2019measuring,
 author = {May, Chandler  and
Wang, Alex  and
Bordia, Shikha  and
Bowman, Samuel R.  and
Rudinger, Rachel},
 booktitle = {Proc. of NAACL-HLT},
 pages = {622--628},
 title = {On Measuring Social Biases in Sentence Encoders},
 year = {2019}
}

@inproceedings{gonen2019lipstick,
 author = {Gonen, Hila  and
Goldberg, Yoav},
 booktitle = {Proc. of NAACL-HLT},
 pages = {609--614},
 title = {Lipstick on a Pig: {D}ebiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them},
 year = {2019}
}

@Article{caliskan2010semantics,
  author  = {Aylin Caliskan and Joanna J. Bryson and Arvind Narayanan},
  journal = {ArXiv preprint},
  title   = {Semantics derived automatically from language corpora contain human-like biases},
  year    = {2010},
  volume  = {abs/10.1126},
}

@inproceedings{bolukbasi2016man,
 author = {Tolga Bolukbasi and
Kai{-}Wei Chang and
James Y. Zou and
Venkatesh Saligrama and
Adam Tauman Kalai},
 booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
on Neural Information Processing Systems 2016, December 5-10, 2016,
Barcelona, Spain},
 editor = {Daniel D. Lee and
Masashi Sugiyama and
Ulrike von Luxburg and
Isabelle Guyon and
Roman Garnett},
 pages = {4349--4357},
 title = {Man is to Computer Programmer as Woman is to Homemaker? Debiasing
Word Embeddings},
 year = {2016}
}

@inproceedings{stanovsky2019evaluating,
 author = {Stanovsky, Gabriel  and
Smith, Noah A.  and
Zettlemoyer, Luke},
 booktitle = {Proc. of ACL},
 pages = {1679--1684},
 title = {Evaluating Gender Bias in Machine Translation},
 year = {2019}
}

@inproceedings{anastasopoulos2016unsupervised,
 author = {Anastasopoulos, Antonios  and
Chiang, David  and
Duong, Long},
 booktitle = {Proc. of EMNLP},
 pages = {1255--1263},
 title = {An Unsupervised Probability Model for Speech-to-Translation Alignment of Low-Resource Languages},
 year = {2016}
}

@inproceedings{anastasopoulos2018tied,
 author = {Anastasopoulos, Antonios  and
Chiang, David},
 booktitle = {Proc. of NAACL-HLT},
 pages = {82--91},
 title = {Tied Multitask Learning for Neural Speech Translation},
 year = {2018}
}

@inproceedings{baevski2020wav2vec,
 author = {Alexei Baevski and
Yuhao Zhou and
Abdelrahman Mohamed and
Michael Auli},
 booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
 editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
 title = {wav2vec 2.0: {A} Framework for Self-Supervised Learning of Speech
Representations},
 year = {2020}
}

@inproceedings{bahar2019comparative,
 author = {Bahar, Parnia and Bieschke, Tobias and Ney, Hermann},
 booktitle = {Proc. of ASRU},
 organization = {IEEE},
 pages = {792--799},
 title = {A comparative study on end-to-end speech to text translation},
 year = {2019}
}

@inproceedings{bahar2019using,
 author = {Bahar, Parnia  and
Zeyer, Albert  and
Schl{\"u}ter, Ralf  and
Ney, Hermann},
 booktitle = {Proceedings of the 16th International Conference on Spoken Language Translation},
 title = {On Using {S}pec{A}ugment for End-to-End Speech Translation},
 year = {2019}
}

@inproceedings{bansal2019pre,
 author = {Bansal, Sameer  and
Kamper, Herman  and
Livescu, Karen  and
Lopez, Adam  and
Goldwater, Sharon},
 booktitle = {Proc. of NAACL-HLT},
 pages = {58--68},
 title = {Pre-training on high-resource speech recognition improves low-resource speech-to-text translation},
 year = {2019}
}

@inproceedings{battenberg2017exploring,
 author = {Battenberg, Eric and Chen, Jitong and Child, Rewon and Coates, Adam and Li, Yashesh Gaur Yi and Liu, Hairong and Satheesh, Sanjeev and Sriram, Anuroop and Zhu, Zhenyao},
 booktitle = {Proc. of ASRU},
 organization = {IEEE},
 pages = {206--213},
 title = {Exploring neural transducers for end-to-end speech recognition},
 year = {2017}
}

@inproceedings{beck2019neural,
 author = {Beck, Daniel  and
Cohn, Trevor  and
Haffari, Gholamreza},
 booktitle = {Proceedings of the Thirteenth Workshop on Graph-Based Methods for Natural Language Processing (TextGraphs-13)},
 pages = {26--31},
 title = {Neural Speech Translation using Lattice Transformations and Graph Networks},
 year = {2019}
}

@inproceedings{berard2016listen,
 author = {B{\'e}rard, Alexandre and Pietquin, Olivier and Servan, Christophe and Besacier, Laurent},
 booktitle = {NIPS workshop on End-to-end Learning for Speech and Audio Processing},
 title = {Listen and translate: A proof of concept for end-to-end speech-to-text translation},
 year = {2016}
}

@inproceedings{berard2018end,
 author = {Alexandre Berard and
Laurent Besacier and
Ali Can Kocabiyikoglu and
Olivier Pietquin},
 booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
 pages = {6224--6228},
 title = {End-to-End Automatic Speech Translation of Audiobooks},
 year = {2018}
}

@inproceedings{bertoldi2005new,
 author = {Bertoldi, Nicola and Federico, Marcello},
 booktitle = {Proc. of ARSU},
 organization = {IEEE},
 pages = {86--91},
 title = {A new decoder for spoken language translation based on confusion networks},
 year = {2005}
}

@inproceedings{besacier2006towards,
 author = {Besacier, Laurent and Zhou, Bowen and Gao, Yuqing},
 booktitle = {Proc. of SLT},
 organization = {IEEE},
 pages = {222--225},
 title = {Towards speech translation of non written languages},
 year = {2006}
}

@inproceedings{biadsy2019parrotron,
 author = {Fadi Biadsy and
Ron J. Weiss and
Pedro J. Moreno and
Dimitri Kanvesky and
Ye Jia},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Gernot Kubin and
Zdravko Kacic},
 pages = {4115--4119},
 title = {Parrotron: An End-to-End Speech-to-Speech Conversion Model and its
Applications to Hearing-Impaired Speech and Speech Separation},
 year = {2019}
}

@article{binder2003neural,
 author = {Binder, Jeffrey R and McKiernan, Kristen A and Parsons, Melanie E and Westbury, Chris F and Possing, Edward T and Kaufman, Jacqueline N and Buchanan, Lori},
 journal = {Journal of cognitive neuroscience},
 number = {3},
 pages = {372--393},
 title = {Neural correlates of lexical access during visual word recognition},
 volume = {15},
 year = {2003}
}

@article{blank2002speech,
 author = {Blank, S Catrin and Scott, Sophie K and Murphy, Kevin and Warburton, Elizabeth and Wise, Richard JS},
 journal = {Brain},
 number = {8},
 pages = {1829--1838},
 title = {Speech production: Wernicke, Broca and beyond},
 volume = {125},
 year = {2002}
}

@inproceedings{callisonburch2009findings,
 author = {Callison-Burch, Chris  and
Koehn, Philipp  and
Monz, Christof  and
Schroeder, Josh},
 booktitle = {Proceedings of the Fourth Workshop on Statistical Machine Translation},
 pages = {1--28},
 title = {Findings of the 2009 {W}orkshop on {S}tatistical {M}achine {T}ranslation},
 year = {2009}
}

@inproceedings{bojar2016findings,
 author = {Bojar, Ond{\v{r}}ej  and
Chatterjee, Rajen  and
Federmann, Christian  and
Graham, Yvette  and
Haddow, Barry  and
Huck, Matthias  and
Jimeno Yepes, Antonio  and
Koehn, Philipp  and
Logacheva, Varvara  and
Monz, Christof  and
Negri, Matteo  and
N{\'e}v{\'e}ol, Aur{\'e}lie  and
Neves, Mariana  and
Popel, Martin  and
Post, Matt  and
Rubino, Raphael  and
Scarton, Carolina  and
Specia, Lucia  and
Turchi, Marco  and
Verspoor, Karin  and
Zampieri, Marcos},
 booktitle = {Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers},
 pages = {131--198},
 title = {Findings of the 2016 Conference on Machine Translation},
 year = {2016}
}

@inproceedings{cettolo2014report,
 author = {Cettolo, Mauro and Niehues, Jan and St{\"u}ker, Sebastian and Bentivogli, Luisa and Federico, Marcello},
 booktitle = {Proc. of IWSLT},
 title = {Report on the 11th iwslt evaluation campaign, iwslt 2014},
 volume = {57},
 year = {2014}
}

@article{chen2016phone,
 author = {Chen, Zhehuai and Zhuang, Yimeng and Qian, Yanmin and Yu, Kai},
 journal = {TASLP},
 number = {1},
 pages = {90--101},
 title = {Phone synchronous speech recognition with ctc lattices},
 volume = {25},
 year = {2016}
}

@inproceedings{cheng2018towards,
 author = {Cheng, Yong  and
Tu, Zhaopeng  and
Meng, Fandong  and
Zhai, Junjie  and
Liu, Yang},
 booktitle = {Proc. of ACL},
 pages = {1756--1766},
 title = {Towards Robust Neural Machine Translation},
 year = {2018}
}

@inproceedings{cheng2019breaking,
 author = {Cheng, Qiao  and
Fan, Meiyuan  and
Han, Yaqian  and
Huang, Jin  and
Duan, Yitao},
 booktitle = {Proceedings of the 16th International Conference on Spoken Language Translation},
 title = {Breaking the Data Barrier: Towards Robust Speech Translation via Adversarial Stability Training},
 year = {2019}
}

@article{collobert2011natural,
 author = {Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel},
 journal = {JMLR},
 number = {Aug},
 pages = {2493--2537},
 title = {Natural language processing (almost) from scratch},
 volume = {12},
 year = {2011}
}

@inproceedings{dai2019transformer,
 author = {Dai, Zihang  and
Yang, Zhilin  and
Yang, Yiming  and
Carbonell, Jaime  and
Le, Quoc  and
Salakhutdinov, Ruslan},
 booktitle = {Proc. of ACL},
 pages = {2978--2988},
 title = {Transformer-{XL}: Attentive Language Models beyond a Fixed-Length Context},
 year = {2019}
}

@inproceedings{devlin2019bert,
 author = {Devlin, Jacob  and
Chang, Ming-Wei  and
Lee, Kenton  and
Toutanova, Kristina},
 booktitle = {Proc. of NAACL-HLT},
 pages = {4171--4186},
 title = {{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
 year = {2019}
}

@InProceedings{gangi2019adapting,
  author    = {Mattia Antonino Di Gangi and Matteo Negri and Marco Turchi},
  booktitle = {Proc. of INTERSPEECH},
  title     = {Adapting Transformer to End-to-End Spoken Language Translation},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {1133--1137},
}

@inproceedings{digangi2019enhancing,
 author = {Di Gangi, Mattia Antonino  and
Negri, Matteo  and
Cattoni, Roldano  and
Dessi, Roberto  and
Turchi, Marco},
 booktitle = {Proceedings of Machine Translation Summit XVII: Research Track},
 pages = {21--31},
 title = {Enhancing Transformer for End-to-end Speech-to-Text Translation},
 year = {2019}
}

@inproceedings{digangi2019must,
 author = {Di Gangi, Mattia A.  and
Cattoni, Roldano  and
Bentivogli, Luisa  and
Negri, Matteo  and
Turchi, Marco},
 booktitle = {Proc. of NAACL-HLT},
 pages = {2012--2017},
 title = {{M}u{ST}-{C}: a {M}ultilingual {S}peech {T}ranslation {C}orpus},
 year = {2019}
}

@inproceedings{gangi2020instance,
 author = {Mattia Antonino Di Gangi and
Viet{-}Nhat Nguyen and
Matteo Negri and
Marco Turchi},
 booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
 pages = {7914--7918},
 title = {Instance-based Model Adaptation for Direct Speech Translation},
 year = {2020}
}

@inproceedings{dixon2011investigation,
 author = {Dixon, Paul R and Finch, Andrew and Hori, Chiori and Kashioka, Hideki},
 booktitle = {Proc. of IWSLT},
 title = {Investigation on the effects of ASR tuning on speech translation performance},
 year = {2011}
}

@inproceedings{dong2018speech,
 author = {Linhao Dong and
Shuang Xu and
Bo Xu},
 booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
 pages = {5884--5888},
 title = {Speech-Transformer: {A} No-Recurrence Sequence-to-Sequence Model for
Speech Recognition},
 year = {2018}
}

@inproceedings{dong2021consecutive,
 author = {Dong, Qianqian and Wang, Mingxuan and Zhou, Hao and Xu, Shuang and Xu, Bo and Li, Lei},
 booktitle = {Proc. of AAAI},
 title = {Consecutive Decoding for Speech-to-text Translation},
 year = {2021}
}

@inproceedings{dong2021listen,
 author = {Dong, Qianqian and Ye, Rong and Wang, Mingxuan and Zhou, Hao and Xu, Shuang and Xu, Bo and Li, Lei},
 booktitle = {Proc. of AAAI},
 number = {14},
 pages = {12749--12759},
 title = {Listen, Understand and Translate: Triple Supervision Decouples End-to-end Speech-to-text Translation},
 volume = {35},
 year = {2021}
}

@inproceedings{duong2016attentional,
 author = {Duong, Long  and
Anastasopoulos, Antonios  and
Chiang, David  and
Bird, Steven  and
Cohn, Trevor},
 booktitle = {Proc. of NAACL-HLT},
 pages = {949--959},
 title = {An Attentional Model for Speech Translation Without Transcription},
 year = {2016}
}

@inproceedings{dyer2013simple,
 author = {Dyer, Chris  and
Chahuneau, Victor  and
Smith, Noah A.},
 booktitle = {Proc. of NAACL-HLT},
 pages = {644--648},
 title = {A Simple, Fast, and Effective Reparameterization of {IBM} Model 2},
 year = {2013}
}

@inproceedings{fitzgerald2009reconstructing,
 author = {Fitzgerald, Erin  and
Hall, Keith  and
Jelinek, Frederick},
 booktitle = {Proceedings of the 12th Conference of the {E}uropean Chapter of the {ACL} ({EACL} 2009)},
 pages = {255--263},
 title = {Reconstructing False Start Errors in Spontaneous Speech Text},
 year = {2009}
}

@article{froyen2009long,
 abstract = {In transparent alphabetic languages, the expected standard for complete acquisition of letter-speech sound associations is within one year of reading instruction. The neural mechanisms underlying the acquisition of letter-speech sound associations have, however, hardly been investigated. The present article describes an ERP study with beginner and advanced readers in which the influence of letters on speech sound processing is investigated by comparing the MMN to speech sounds presented in isolation with the MMN to speech sounds accompanied by letters. Furthermore, SOA between letter and speech sound presentation was manipulated in order to investigate the development of the temporal window of integration for letter-speech sound processing. Beginner readers, despite one year of reading instruction, showed no early letter-speech sound integration, that is, no influence of the letter on the evocation of the MMN to the speech sound. Only later in the difference wave, at 650 msec, was an influence of the letter on speech sound processing revealed. Advanced readers, with 4 years of reading instruction, showed early and automatic letter-speech sound processing as revealed by an enhancement of the MMN amplitude, however, at a different temporal window of integration in comparison with experienced adult readers. The present results indicate a transition from mere association in beginner readers to more automatic, but still not "adult-like," integration in advanced readers. In contrast to general assumptions, the present study provides evidence for an extended development of letter-speech sound integration.},
 author = {Froyen, Dries J. W. and Bonte, Milene L. and van Atteveldt, Nienke and Blomert, Leo},
 issn = {0898-929X},
 issue_date = {March 2009},
 journal = {J. Cognitive Neuroscience},
 number = {3},
 numpages = {14},
 pages = {567–580},
 title = {The Long Road to Automation: Neurocognitive Development of Letter-Speech Sound Processing},
 volume = {21},
 year = {2009}
}

@phdthesis{fuegen2008system,
 author = {F{\"u}gen, Christian},
 school = {Verlag nicht ermittelbar},
 title = {A system for simultaneous translation of lectures and speeches},
 year = {2008}
}

@inproceedings{graves2006connectionist,
 author = {Alex Graves and
Santiago Fern{\'{a}}ndez and
Faustino J. Gomez and
J{\"{u}}rgen Schmidhuber},
 booktitle = {Proc. of ICML},
 editor = {William W. Cohen and
Andrew W. Moore},
 pages = {369--376},
 series = {{ACM} International Conference Proceeding Series},
 title = {Connectionist temporal classification: labelling unsegmented sequence
data with recurrent neural networks},
 volume = {148},
 year = {2006}
}

@inproceedings{he2011why,
 author = {He, Xiaodong and Deng, Li and Acero, Alex},
 booktitle = {Proc. of ICASSP},
 organization = {IEEE},
 pages = {5632--5635},
 title = {Why word error rate is not a good metric for speech recognizer training for the speech translation task?},
 year = {2011}
}

@inproceedings{inaguma2019multilingual,
 author = {Inaguma, Hirofumi and Duh, Kevin and Kawahara, Tatsuya and Watanabe, Shinji},
 booktitle = {Proc. of ASRU},
 organization = {IEEE},
 pages = {570--577},
 title = {Multilingual end-to-end speech translation},
 year = {2019}
}

@inproceedings{inaguma2020espnet,
 author = {Inaguma, Hirofumi  and
Kiyono, Shun  and
Duh, Kevin  and
Karita, Shigeki  and
Yalta, Nelson  and
Hayashi, Tomoki  and
Watanabe, Shinji},
 booktitle = {Proc. of ACL},
 pages = {302--311},
 title = {{ESP}net-{ST}: All-in-One Speech Translation Toolkit},
 year = {2020}
}

@inproceedings{indurthi2020data,
 author = {Indurthi, Sathish and Han, Houjeung and Lakumarapu, Nikhil Kumar and Lee, Beomseok and Chung, Insoo and Kim, Sangha and Kim, Chanwoo},
 booktitle = {Proc. of ICASSP},
 organization = {IEEE},
 title = {Data efficient direct speech-to-text translation with modality agnostic meta-learning},
 year = {2020}
}

@InProceedings{cettolo2015iwslt,
  author    = {Cettolo, Mauro and Niehues, Jan and St{\"u}ker, Sebastian and Bentivogli, Luisa and Cattoni, Roldano and Federico, Marcello},
  booktitle = {Proceedings of the 12th International Workshop on Spoken Language Translation: Evaluation Campaign},
  title     = {The {IWSLT} 2015 Evaluation Campaign},
  year      = {2015},
  pages     = {2--14},
}

@inproceedings{jawahar2019what,
 author = {Jawahar, Ganesh  and
Sagot, Beno{\^\i}t  and
Seddah, Djam{\'e}},
 booktitle = {Proc. of ACL},
 pages = {3651--3657},
 title = {What Does {BERT} Learn about the Structure of Language?},
 year = {2019}
}

@inproceedings{jia2019direct,
 author = {Ye Jia and
Ron J. Weiss and
Fadi Biadsy and
Wolfgang Macherey and
Melvin Johnson and
Zhifeng Chen and
Yonghui Wu},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Gernot Kubin and
Zdravko Kacic},
 pages = {1123--1127},
 title = {Direct Speech-to-Speech Translation with a Sequence-to-Sequence Model},
 year = {2019}
}

@inproceedings{jia2019leveraging,
 author = {Ye Jia and
Melvin Johnson and
Wolfgang Macherey and
Ron J. Weiss and
Yuan Cao and
Chung{-}Cheng Chiu and
Naveen Ari and
Stella Laurenzo and
Yonghui Wu},
 booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
 pages = {7180--7184},
 title = {Leveraging Weakly Supervised Data to Improve End-to-end Speech-to-text
Translation},
 year = {2019}
}

@inproceedings{kannan2018analysis,
 author = {Anjuli Kannan and
Yonghui Wu and
Patrick Nguyen and
Tara N. Sainath and
Zhifeng Chen and
Rohit Prabhavalkar},
 booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
 pages = {5824--5828},
 title = {An Analysis of Incorporating an External Language Model into a Sequence-to-Sequence
Model},
 year = {2018}
}

@inproceedings{kano2017structured,
 author = {Takatomo Kano and
Sakriani Sakti and
Satoshi Nakamura},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Francisco Lacerda},
 pages = {2630--2634},
 title = {Structured-Based Curriculum Learning for End-to-End English-Japanese
Speech Translation},
 year = {2017}
}

@inproceedings{kiros2015skip,
 author = {Ryan Kiros and
Yukun Zhu and
Ruslan Salakhutdinov and
Richard S. Zemel and
Raquel Urtasun and
Antonio Torralba and
Sanja Fidler},
 booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference
on Neural Information Processing Systems 2015, December 7-12, 2015,
Montreal, Quebec, Canada},
 editor = {Corinna Cortes and
Neil D. Lawrence and
Daniel D. Lee and
Masashi Sugiyama and
Roman Garnett},
 pages = {3294--3302},
 title = {Skip-Thought Vectors},
 year = {2015}
}

@inproceedings{kocabiyikoglu2018augmenting,
 author = {Kocabiyikoglu, Ali Can  and
Besacier, Laurent  and
Kraif, Olivier},
 booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)},
 title = {Augmenting Librispeech with {F}rench Translations: A Multimodal Corpus for Direct Speech Translation Evaluation},
 year = {2018}
}

@inproceedings{conneau2019cross,
 author = {Alexis Conneau and
Guillaume Lample},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {7057--7067},
 title = {Cross-lingual Language Model Pretraining},
 year = {2019}
}

@inproceedings{lavie1996multi,
 author = {Lavie, Alon  and
Gates, Donna  and
Gavalda, Marsal  and
Mayfield, Laura  and
Waibel, Alex  and
Levin, Lori},
 booktitle = {{COLING} 1996 Volume 1: The 16th International Conference on Computational Linguistics},
 title = {Multi-lingual Translation of Spontaneously Spoken Language in a Limited Domain},
 year = {1996}
}

@inproceedings{le2014distributed,
 author = {Quoc V. Le and
Tom{\'{a}}s Mikolov},
 booktitle = {Proc. of ICML},
 pages = {1188--1196},
 series = {{JMLR} Workshop and Conference Proceedings},
 title = {Distributed Representations of Sentences and Documents},
 volume = {32},
 year = {2014}
}

@inproceedings{le2020dual,
 author = {Le, Hang  and
Pino, Juan  and
Wang, Changhan  and
Gu, Jiatao  and
Schwab, Didier  and
Besacier, Laurent},
 booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
 pages = {3520--3533},
 title = {Dual-decoder Transformer for Joint Automatic Speech Recognition and Multilingual Speech Translation},
 year = {2020}
}

@inproceedings{lison2016opensubtitles2016,
 author = {Lison, Pierre  and
Tiedemann, J{\"o}rg},
 booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)},
 pages = {923--929},
 title = {{O}pen{S}ubtitles2016: Extracting Large Parallel Corpora from Movie and {TV} Subtitles},
 year = {2016}
}

@inproceedings{liu2003use,
 author = {Liu, Fu-Hua and Gu, Liang and Gao, Yuqing and Picheny, Michael},
 booktitle = {{Proc. of ICASSP}},
 organization = {IEEE},
 pages = {I--I},
 title = {Use of statistical N-gram models in natural language generation for machine translation},
 volume = {1},
 year = {2003}
}

@article{liu2018ustc,
 author = {Liu, Dan and Liu, Junhua and Guo, Wu and Xiong, Shifu and Ma, Zhiqiang and Song, Rui and Wu, Chongliang and Liu, Quan},
 journal = {ArXiv preprint},
 title = {The USTC-NEL Speech Translation system at Proc. of IWSLT 2018},
 volume = {abs/1812.02455},
 year = {2018}
}

@inproceedings{liu2019end,
 author = {Yuchen Liu and
Hao Xiong and
Jiajun Zhang and
Zhongjun He and
Hua Wu and
Haifeng Wang and
Chengqing Zong},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Gernot Kubin and
Zdravko Kacic},
 pages = {1128--1132},
 title = {End-to-End Speech Translation with Knowledge Distillation},
 year = {2019}
}

@article{liu2020bridging,
 author = {Liu, Yuchen and Zhu, Junnan and Zhang, Jiajun and Zong, Chengqing},
 journal = {ArXiv preprint},
 title = {Bridging the Modality Gap for Speech-to-Text Translation},
 volume = {abs/2010.14920},
 year = {2020}
}

@inproceedings{lu2018neural,
 author = {Lu, Yichao  and
Keung, Phillip  and
Ladhak, Faisal  and
Bhardwaj, Vikas  and
Zhang, Shaonan  and
Sun, Jason},
 booktitle = {Proceedings of the Third Conference on Machine Translation: Research Papers},
 pages = {84--92},
 title = {A neural interlingua for multilingual machine translation},
 year = {2018}
}

@inproceedings{lugosch2019speech,
 author = {Loren Lugosch and
Mirco Ravanelli and
Patrick Ignoto and
Vikrant Singh Tomar and
Yoshua Bengio},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Gernot Kubin and
Zdravko Kacic},
 pages = {814--818},
 title = {Speech Model Pre-Training for End-to-End Spoken Language Understanding},
 year = {2019}
}

@inproceedings{matusov2005phrase,
 author = {Matusov, Evgeny and Ney, Hermann and Schluter, Ralph},
 booktitle = {Proc. of ASRU},
 organization = {IEEE},
 pages = {110--115},
 title = {Phrase-based translation of speech recognizer word lattices using loglinear model combination},
 year = {2005}
}

@inproceedings{matusov2006automatic,
 author = {Matusov, Evgeny  and
Mauser, Arne  and
Ney, Hermann},
 booktitle = {Proceedings of the Third International Workshop on Spoken Language Translation: Papers},
 title = {Automatic sentence segmentation and punctuation prediction for spoken language translation},
 year = {2006}
}

@inproceedings{matusov2008spoken,
 author = {Matusov, Evgeny and Hoffmeister, Bj{\"o}rn and Ney, Hermann},
 booktitle = {Proc. of INTERSPEECH},
 title = {Spoken Language Translation Systems************ ASR Word Lattice Translation with Exhaustive Reordering is Possible},
 year = {2008}
}

@inproceedings{mikolov2013distributed,
 author = {Tom{\'{a}}s Mikolov and
Ilya Sutskever and
Kai Chen and
Gregory S. Corrado and
Jeffrey Dean},
 booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual
Conference on Neural Information Processing Systems 2013. Proceedings
of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
 editor = {Christopher J. C. Burges and
L{\'{e}}on Bottou and
Zoubin Ghahramani and
Kilian Q. Weinberger},
 pages = {3111--3119},
 title = {Distributed Representations of Words and Phrases and their Compositionality},
 year = {2013}
}

@inproceedings{ott2019fairseq,
 author = {Ott, Myle  and
Edunov, Sergey  and
Baevski, Alexei  and
Fan, Angela  and
Gross, Sam  and
Ng, Nathan  and
Grangier, David  and
Auli, Michael},
 booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics (Demonstrations)},
 pages = {48--53},
 title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},
 year = {2019}
}

@inproceedings{papineni2002bleu,
 author = {Papineni, Kishore  and
Roukos, Salim  and
Ward, Todd  and
Zhu, Wei-Jing},
 booktitle = {Proc. of ACL},
 pages = {311--318},
 title = {{B}leu: a Method for Automatic Evaluation of Machine Translation},
 year = {2002}
}

@inproceedings{park2019specaugment,
 author = {Daniel S. Park and
William Chan and
Yu Zhang and
Chung{-}Cheng Chiu and
Barret Zoph and
Ekin D. Cubuk and
Quoc V. Le},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Gernot Kubin and
Zdravko Kacic},
 pages = {2613--2617},
 title = {SpecAugment: {A} Simple Data Augmentation Method for Automatic Speech
Recognition},
 year = {2019}
}

@inproceedings{peitz2012spoken,
 author = {Peitz, Stephan  and
Wiesler, Simon  and
Nu{\ss}baum-Thom, Markus  and
Ney, Hermann},
 booktitle = {Proceedings of the 9th International Workshop on Spoken Language Translation: Papers},
 pages = {276--283},
 title = {Spoken language translation using automatically transcribed text in training},
 year = {2012}
}

@inproceedings{pennington2014glove,
 author = {Pennington, Jeffrey  and
Socher, Richard  and
Manning, Christopher},
 booktitle = {Proc. of EMNLP},
 pages = {1532--1543},
 title = {{G}lo{V}e: Global Vectors for Word Representation},
 year = {2014}
}

@inproceedings{peters2018deep,
 author = {Peters, Matthew E.  and
Neumann, Mark  and
Iyyer, Mohit  and
Gardner, Matt  and
Clark, Christopher  and
Lee, Kenton  and
Zettlemoyer, Luke},
 booktitle = {Proc. of NAACL-HLT},
 pages = {2227--2237},
 title = {Deep Contextualized Word Representations},
 year = {2018}
}

@article{chouldechova2020snapshot,
 added-at = {2020-06-14T12:11:19.000+0200},
 author = {Chouldechova, Alexandra and Roth, Aaron},
 journal = {Communications of the ACM},
 number = {5},
 pages = {82--89},
 title = {A snapshot of the frontiers of fairness in machine learning},
 volume = {63},
 year = {2020}
}

@inproceedings{pino2019harnessing,
 author = {Pino, Juan  and
Puzon, Liezl  and
Gu, Jiatao  and
Ma, Xutai  and
McCarthy, Arya D.  and
Gopinath, Deepak},
 booktitle = {Proceedings of the 16th International Conference on Spoken Language Translation},
 title = {Harnessing Indirect Training Data for End-to-End Automatic Speech Translation: Tricks of the Trade},
 year = {2019}
}

@inproceedings{pino2020self,
 author = {Juan Miguel Pino and
Qiantong Xu and
Xutai Ma and
Mohammad Javad Dousti and
Yun Tang},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
 pages = {1476--1480},
 title = {Self-Training for End-to-End Speech Translation},
 year = {2020}
}

@inproceedings{qi2018when,
 author = {Qi, Ye  and
Sachan, Devendra  and
Felix, Matthieu  and
Padmanabhan, Sarguna  and
Neubig, Graham},
 booktitle = {Proc. of NAACL-HLT},
 pages = {529--535},
 title = {When and Why Are Pre-Trained Word Embeddings Useful for Neural Machine Translation?},
 year = {2018}
}

@article{radford2018improving,
 author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
 journal = {OpenAI Blog},
 title = {Improving language understanding by generative pre-training},
 year = {2018}
}

@article{radford2019language,
 author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
 journal = {OpenAI Blog},
 number = {8},
 title = {Language models are unsupervised multitask learners},
 volume = {1},
 year = {2019}
}

@inproceedings{rousseau2014enhancing,
 author = {Rousseau, Anthony  and
Del{\'e}glise, Paul  and
Est{\`e}ve, Yannick},
 booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)},
 pages = {3935--3939},
 title = {Enhancing the {TED}-{LIUM} Corpus with Selected Data for Language Modeling and More {TED} Talks},
 year = {2014}
}

@inproceedings{ruiz2015adapting,
 author = {Ruiz, Nicholas and Gao, Qin and Lewis, William and Federico, Marcello},
 booktitle = {Proc. of INTERSPEECH},
 title = {Adapting machine translation models toward misrecognized speech with text-to-speech pronunciation rules and acoustic confusability},
 year = {2015}
}

@inproceedings{sak2015fast,
 author = {Sak, Ha{\c{s}}im and Senior, Andrew and Rao, Kanishka and Beaufays, Fran{\c{c}}oise},
 booktitle = {Proc. of INTERSPEECH},
 title = {Fast and accurate recurrent neural network acoustic models for speech recognition},
 year = {2015}
}

@inproceedings{salazar2019self,
 author = {Julian Salazar and
Katrin Kirchhoff and
Zhiheng Huang},
 booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
 pages = {7115--7119},
 title = {Self-attention Networks for Connectionist Temporal Classification
in Speech Recognition},
 year = {2019}
}

@inproceedings{salesky2018towards,
 author = {Salesky, Elizabeth and Burger, Susanne and Niehues, Jan and Waibel, Alex},
 booktitle = {Proc. of SLT},
 organization = {IEEE},
 pages = {921--926},
 title = {Towards fluent translations from disfluent speech},
 year = {2018}
}

@inproceedings{salesky2019fluent,
 author = {Salesky, Elizabeth  and
Sperber, Matthias  and
Waibel, Alexander},
 booktitle = {Proc. of NAACL-HLT},
 pages = {2786--2792},
 title = {Fluent Translations from Disfluent Speech in End-to-End Speech Translation},
 year = {2019}
}

@inproceedings{schultz2004using,
 author = {Schultz, Tanja and Jou, Szu-Chen and Vogel, Stephan and Saleem, Shirin},
 booktitle = {Eighth International Conference on Spoken Language Processing},
 title = {Using word latice information for a tighter coupling in speech translation systems},
 year = {2004}
}

@article{shankweiler2008reading,
 author = {Shankweiler, Donald and Mencl, W Einar and Braze, David and Tabor, Whitney and Pugh, Kenneth R and Fulbright, Robert K},
 journal = {Developmental neuropsychology},
 number = {6},
 pages = {745--775},
 title = {Reading differences and brain: Cortical integration of speech and print in sentence processing varies with reader skill},
 volume = {33},
 year = {2008}
}

@inproceedings{socher2013recursive,
 author = {Socher, Richard  and
Perelygin, Alex  and
Wu, Jean  and
Chuang, Jason  and
Manning, Christopher D.  and
Ng, Andrew  and
Potts, Christopher},
 booktitle = {Proc. of EMNLP},
 pages = {1631--1642},
 title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
 year = {2013}
}

@inproceedings{sperber2017neural,
 author = {Sperber, Matthias  and
Neubig, Graham  and
Niehues, Jan  and
Waibel, Alex},
 booktitle = {Proc. of EMNLP},
 pages = {1380--1389},
 title = {Neural Lattice-to-Sequence Models for Uncertain Inputs},
 year = {2017}
}

@inproceedings{sperber2017robust,
 author = {Sperber, Matthias  and
Niehues, Jan  and
Waibel, Alex},
 booktitle = {Proceedings of the 14th International Conference on Spoken Language Translation},
 pages = {90--96},
 title = {Toward Robust Neural Machine Translation for Noisy Input Sequences},
 year = {2017}
}

@article{sperber2019attention,
 author = {Sperber, Matthias  and
Neubig, Graham  and
Niehues, Jan  and
Waibel, Alex},
 journal = {Transactions of the Association for Computational Linguistics},
 pages = {313--325},
 title = {Attention-Passing Models for Robust and Data-Efficient End-to-End Speech Translation},
 volume = {7},
 year = {2019}
}

@inproceedings{sperber2019self,
 author = {Sperber, Matthias  and
Neubig, Graham  and
Pham, Ngoc-Quan  and
Waibel, Alex},
 booktitle = {Proc. of ACL},
 pages = {1185--1197},
 title = {Self-Attentional Models for Lattice Inputs},
 year = {2019}
}

@article{spitsyna2006converging,
 author = {Spitsyna, Galina and Warren, Jane E and Scott, Sophie K and Turkheimer, Federico E and Wise, Richard JS},
 journal = {Journal of Neuroscience},
 number = {28},
 pages = {7328--7336},
 title = {Converging language streams in the human temporal lobe},
 volume = {26},
 year = {2006}
}

@inproceedings{stoian2020analyzing,
 author = {Mihaela C. Stoian and
Sameer Bansal and
Sharon Goldwater},
 booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
 pages = {7909--7913},
 title = {Analyzing {ASR} Pretraining for Low-Resource Speech-to-Text Translation},
 year = {2020}
}

@inproceedings{sun2020ernie,
 author = {Yu Sun and
Shuohuan Wang and
Yu{-}Kun Li and
Shikun Feng and
Hao Tian and
Hua Wu and
Haifeng Wang},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {8968--8975},
 title = {{ERNIE} 2.0: {A} Continual Pre-Training Framework for Language Understanding},
 year = {2020}
}

@inproceedings{sung2019towards,
 author = {Tzu{-}Wei Sung and
Jun{-}You Liu and
Hung{-}yi Lee and
Lin{-}Shan Lee},
 booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
 pages = {7175--7179},
 title = {Towards End-to-end Speech-to-text Translation with Two-pass Decoding},
 year = {2019}
}

@inproceedings{tan2018deep,
 author = {Zhixing Tan and
Mingxuan Wang and
Jun Xie and
Yidong Chen and
Xiaodong Shi},
 booktitle = {Proc. of AAAI},
 editor = {Sheila A. McIlraith and
Kilian Q. Weinberger},
 pages = {4929--4936},
 title = {Deep Semantic Role Labeling With Self-Attention},
 year = {2018}
}

@inproceedings{tang2019understanding,
 author = {Tang, Gongbo  and
Sennrich, Rico  and
Nivre, Joakim},
 booktitle = {Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)},
 pages = {1186--1193},
 title = {Understanding Neural Machine Translation by Simplification: The Case of Encoder-free Models},
 year = {2019}
}

@inproceedings{tang2021general,
 author = {Tang, Yun and Pino, Juan and Wang, Changhan and Ma, Xutai and Genzel, Dmitriy},
 booktitle = {{Proc. of ICASSP}},
 organization = {IEEE},
 pages = {6209--6213},
 title = {A general multi-task learning framework to leverage text data for speech to text tasks},
 year = {2021}
}

@inproceedings{tsvetkov2014augmenting,
 author = {Tsvetkov, Yulia  and
Metze, Florian  and
Dyer, Chris},
 booktitle = {Proceedings of the 14th Conference of the {E}uropean Chapter of the Association for Computational Linguistics},
 pages = {616--625},
 title = {Augmenting Translation Models with Simulated Acoustic Confusions for Improved Spoken Language Translation},
 year = {2014}
}

@article{vanatteveldt2004integration,
 abstract = {Most people acquire literacy skills with remarkable ease, even though the human brain is not evolutionarily adapted to this relatively new cultural phenomenon. Associations between letters and speech sounds form the basis of reading in alphabetic scripts. We investigated the functional neuroanatomy of the integration of letters and speech sounds using functional magnetic resonance imaging (fMRI). Letters and speech sounds were presented unimodally and bimodally in congruent or incongruent combinations. Analysis of single-subject data and group data aligned on the basis of individual cortical anatomy revealed that letters and speech sounds are integrated in heteromodal superior temporal cortex. Interestingly, responses to speech sounds in a modality-specific region of the early auditory cortex were modified by simultaneously presented letters. These results suggest that efficient processing of culturally defined associations between letters and speech sounds relies on neural mechanisms similar to those naturally evolved for integrating audiovisual speech.},
 author = {Nienke {van Atteveldt} and Elia Formisano and Rainer Goebel and Leo Blomert},
 issn = {0896-6273},
 journal = {Neuron},
 number = {2},
 pages = {271-282},
 title = {Integration of Letters and Speech Sounds in the Human Brain},
 volume = {43},
 year = {2004}
}

@inproceedings{vaswani2017attention,
 author = {Ashish Vaswani and
Noam Shazeer and
Niki Parmar and
Jakob Uszkoreit and
Llion Jones and
Aidan N. Gomez and
Lukasz Kaiser and
Illia Polosukhin},
 booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
on Neural Information Processing Systems 2017, December 4-9, 2017,
Long Beach, CA, {USA}},
 editor = {Isabelle Guyon and
Ulrike von Luxburg and
Samy Bengio and
Hanna M. Wallach and
Rob Fergus and
S. V. N. Vishwanathan and
Roman Garnett},
 pages = {5998--6008},
 title = {Attention is All you Need},
 year = {2017}
}

@inproceedings{vazquez2019multilingual,
 author = {V{\'a}zquez, Ra{\'u}l  and
Raganato, Alessandro  and
Tiedemann, J{\"o}rg  and
Creutz, Mathias},
 booktitle = {Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019)},
 pages = {33--39},
 title = {Multilingual {NMT} with a Language-Independent Attention Bridge},
 year = {2019}
}

@inproceedings{vila2018end,
 author = {Vila, Laura Cross and Escolano, Carlos and Fonollosa, Jos{\'e} AR and Costa-juss{\`a}, Marta R},
 booktitle = {IberSPEECH},
 pages = {60--63},
 title = {End-to-End Speech Translation with the Transformer.},
 year = {2018}
}

@inproceedings{vydana2021jointly,
 author = {Vydana, Hari Krishna and Karafi{\'a}t, Martin and Zmolikova, Katerina and Burget, Luk{\'a}{\v{s}} and {\v{C}}ernock{\`y}, Honza},
 booktitle = {{Proc. of ICASSP}},
 organization = {IEEE},
 pages = {7513--7517},
 title = {Jointly trained transformers models for spoken language translation},
 year = {2021}
}

@inproceedings{wang2015transfer,
 author = {Wang, Dong and Zheng, Thomas Fang},
 booktitle = {APSIPA},
 organization = {IEEE},
 pages = {1225--1237},
 title = {Transfer learning for speech and language processing},
 year = {2015}
}

@inproceedings{wang2020bridging,
 author = {Chengyi Wang and
Yu Wu and
Shujie Liu and
Zhenglu Yang and
Ming Zhou},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {9161--9168},
 title = {Bridging the Gap between Pre-Training and Fine-Tuning for End-to-End
Speech Translation},
 year = {2020}
}

@inproceedings{wang2020curriculum,
 author = {Wang, Chengyi  and
Wu, Yu  and
Liu, Shujie  and
Zhou, Ming  and
Yang, Zhenglu},
 booktitle = {Proc. of ACL},
 pages = {3728--3738},
 title = {Curriculum Pre-training for End-to-End Speech Translation},
 year = {2020}
}

@inproceedings{weiss2017sequence,
 author = {Ron J. Weiss and
Jan Chorowski and
Navdeep Jaitly and
Yonghui Wu and
Zhifeng Chen},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Francisco Lacerda},
 pages = {2625--2629},
 title = {Sequence-to-Sequence Models Can Directly Translate Foreign Speech},
 year = {2017}
}

@inproceedings{woszczyna1993recent,
 author = {Woszczyna, M.  and
Coccaro, N.  and
Eisele, A.  and
Lavie, A.  and
McNair, A.  and
Polzin, T.  and
Rogina, I.  and
Rose, C. P.  and
Sloboda, T.  and
Tomita, M.  and
Tsutsumi, J.  and
Aoki-Waibel, N.  and
Waibel, A.  and
Ward, Wayne},
 booktitle = {Proceedings of the Fifth Conference on Theoretical and Methodological Issues in Machine Translation of Natural Languages},
 title = {Recent Advances in {JANUS}: A Speech Translation System},
 year = {1993}
}

@inproceedings{yang2019improving,
 author = {Yinfei Yang and
Gustavo Hern{\'{a}}ndez {\'{A}}brego and
Steve Yuan and
Mandy Guo and
Qinlan Shen and
Daniel Cer and
Yun{-}Hsuan Sung and
Brian Strope and
Ray Kurzweil},
 booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on
Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
2019},
 editor = {Sarit Kraus},
 pages = {5370--5378},
 title = {Improving Multilingual Sentence Embedding using Bi-directional Dual
Encoder with Additive Margin Softmax},
 year = {2019}
}

@inproceedings{yang2019xlnet,
 author = {Zhilin Yang and
Zihang Dai and
Yiming Yang and
Jaime G. Carbonell and
Ruslan Salakhutdinov and
Quoc V. Le},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {5754--5764},
 title = {XLNet: Generalized Autoregressive Pretraining for Language Understanding},
 year = {2019}
}

@inproceedings{yi2019ectc,
 author = {Cheng Yi and
Feng Wang and
Bo Xu},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Gernot Kubin and
Zdravko Kacic},
 pages = {4420--4424},
 title = {Ectc-Docd: An End-to-End Structure with {CTC} Encoder and {OCD} Decoder
for Speech Recognition},
 year = {2019}
}

@inproceedings{yu2018multilingual,
 author = {Yu, Katherine  and
Li, Haoran  and
Oguz, Barlas},
 booktitle = {Proceedings of The Third Workshop on Representation Learning for {NLP}},
 pages = {175--179},
 title = {Multilingual Seq2seq Training with Similarity Loss for Cross-Lingual Document Classification},
 year = {2018}
}

@inproceedings{zhang2005decoding,
 author = {Zhang, Ruiqiang  and
Kikui, Genichiro  and
Yamamoto, Hirofumi  and
Lo, Wai-Kit},
 booktitle = {Proceedings of the Second International Workshop on Spoken Language Translation},
 title = {A decoding algorithm for word lattice translation in speech translation},
 year = {2005}
}

@inproceedings{zhang2019lattice,
 author = {Zhang, Pei  and
Ge, Niyu  and
Chen, Boxing  and
Fan, Kai},
 booktitle = {Proc. of ACL},
 pages = {6475--6484},
 title = {Lattice Transformer for Speech Translation},
 year = {2019}
}

@inproceedings{zhang2020adaptive,
 author = {Zhang, Biao  and
Titov, Ivan  and
Haddow, Barry  and
Sennrich, Rico},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020},
 pages = {2533--2544},
 title = {Adaptive Feature Selection for End-to-End Speech Translation},
 year = {2020}
}

@inproceedings{zhang2020improving,
 author = {Zhang, Biao  and
Williams, Philip  and
Titov, Ivan  and
Sennrich, Rico},
 booktitle = {Proc. of ACL},
 pages = {1628--1639},
 title = {Improving Massively Multilingual Neural Machine Translation and Zero-Shot Translation},
 year = {2020}
}

@inproceedings{zhou2018syllable,
 author = {Shiyu Zhou and
Linhao Dong and
Shuang Xu and
Bo Xu},
 booktitle = {Proc. of INTERSPEECH},
 editor = {B. Yegnanarayana},
 pages = {791--795},
 title = {Syllable-Based Sequence-to-Sequence Speech Recognition with the Transformer
in Mandarin Chinese},
 year = {2018}
}

@inproceedings{zhu2020language,
 author = {Zhu, Changfeng  and
Yu, Heng  and
Cheng, Shanbo  and
Luo, Weihua},
 booktitle = {Proc. of ACL},
 pages = {1650--1655},
 title = {Language-aware Interlingua for Multilingual Neural Machine Translation},
 year = {2020}
}

@inproceedings{chen2020distilling,
 author = {Chen, Yen-Chun  and
Gan, Zhe  and
Cheng, Yu  and
Liu, Jingzhou  and
Liu, Jingjing},
 booktitle = {Proc. of ACL},
 pages = {7893--7905},
 title = {Distilling Knowledge Learned in {BERT} for Text Generation},
 year = {2020}
}

@inproceedings{chuang2020speechbert,
 author = {Yung{-}Sung Chuang and
Chi{-}Liang Liu and
Hung{-}yi Lee and
Lin{-}Shan Lee},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
 pages = {4168--4172},
 title = {SpeechBERT: An Audio-and-Text Jointly Learned Language Model for End-to-End
Spoken Question Answering},
 year = {2020}
}

@inproceedings{gu2018universal,
 author = {Gu, Jiatao  and
Hassan, Hany  and
Devlin, Jacob  and
Li, Victor O.K.},
 booktitle = {Proc. of NAACL-HLT},
 pages = {344--354},
 title = {Universal Neural Machine Translation for Extremely Low Resource Languages},
 year = {2018}
}

@inproceedings{he2019rethinking,
 author = {Kaiming He and
Ross B. Girshick and
Piotr Doll{\'{a}}r},
 booktitle = {2019 {IEEE/CVF} International Conference on Computer Vision, {ICCV}
2019, Seoul, Korea (South), October 27 - November 2, 2019},
 pages = {4917--4926},
 title = {Rethinking ImageNet Pre-Training},
 year = {2019}
}

@inproceedings{huang2016attention,
 author = {Huang, Po-Yao  and
Liu, Frederick  and
Shiang, Sz-Rung  and
Oh, Jean  and
Dyer, Chris},
 booktitle = {Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers},
 pages = {639--645},
 title = {Attention-based Multimodal Neural Machine Translation},
 year = {2016}
}

@inproceedings{huang2019unicoder,
 author = {Huang, Haoyang  and
Liang, Yaobo  and
Duan, Nan  and
Gong, Ming  and
Shou, Linjun  and
Jiang, Daxin  and
Zhou, Ming},
 booktitle = {Proc. of EMNLP},
 pages = {2485--2494},
 title = {{U}nicoder: A Universal Language Encoder by Pre-training with Multiple Cross-lingual Tasks},
 year = {2019}
}

@inproceedings{huang2021m3p,
 author = {Huang, Haoyang and Su, Lin and Qi, Di and Duan, Nan and Cui, Edward and Bharti, Taroon and Zhang, Lei and Wang, Lijuan and Gao, Jianfeng and Liu, Bei and others},
 booktitle = {Proc. of CVPR},
 title = {M3P: Learning Universal Representations via Multitask Multilingual Multimodal Pre-training},
 year = {2021}
}

@article{johnson2017googles,
 author = {Johnson, Melvin  and
Schuster, Mike  and
Le, Quoc V.  and
Krikun, Maxim  and
Wu, Yonghui  and
Chen, Zhifeng  and
Thorat, Nikhil  and
Vi{\'e}gas, Fernanda  and
Wattenberg, Martin  and
Corrado, Greg  and
Hughes, Macduff  and
Dean, Jeffrey},
 journal = {Transactions of the Association for Computational Linguistics},
 pages = {339--351},
 title = {{G}oogle{'}s Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation},
 volume = {5},
 year = {2017}
}

@inproceedings{lin2020pre,
 author = {Lin, Zehui  and
Pan, Xiao  and
Wang, Mingxuan  and
Qiu, Xipeng  and
Feng, Jiangtao  and
Zhou, Hao  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {2649--2663},
 title = {Pre-training Multilingual Neural Machine Translation by Leveraging Alignment Information},
 year = {2020}
}

@article{liu2019roberta,
 author = {Yinhan Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and Mike Lewis and Luke S. Zettlemoyer and Veselin Stoyanov},
 journal = {ArXiv preprint},
 title = {RoBERTa: A Robustly Optimized BERT Pretraining Approach},
 volume = {abs/1907.11692},
 year = {2019}
}

@article{liu2020multilingual,
 author = {Liu, Yinhan  and
Gu, Jiatao  and
Goyal, Naman  and
Li, Xian  and
Edunov, Sergey  and
Ghazvininejad, Marjan  and
Lewis, Mike  and
Zettlemoyer, Luke},
 journal = {Transactions of the Association for Computational Linguistics},
 pages = {726--742},
 title = {Multilingual Denoising Pre-training for Neural Machine Translation},
 volume = {8},
 year = {2020}
}

@article{mikolov2013exploiting,
 author = {Mikolov, Tomas and Le, Quoc V and Sutskever, Ilya},
 journal = {ArXiv preprint},
 title = {Exploiting similarities among languages for machine translation},
 volume = {abs/1309.4168},
 year = {2013}
}

@inproceedings{park2019specaugmenta,
 author = {Daniel S. Park and William Chan and Yu Zhang and Chung-Cheng Chiu and Barret Zoph and Ekin Dogus Cubuk and Quoc V. Le},
 booktitle = {Proc. of INTERSPEECH},
 title = {SpecAugment: A Simple Augmentation Method for Automatic Speech Recognition},
 year = {2019}
}

@inproceedings{pires2019how,
 author = {Pires, Telmo  and
Schlinger, Eva  and
Garrette, Dan},
 booktitle = {Proc. of ACL},
 pages = {4996--5001},
 title = {How Multilingual is Multilingual {BERT}?},
 year = {2019}
}

@inproceedings{long2021generative,
 author = {Long, Quanyu  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
 pages = {5738--5748},
 title = {Generative Imagination Elevates Machine Translation},
 year = {2021}
}

@inproceedings{ramachandran2017unsupervised,
 author = {Ramachandran, Prajit  and
Liu, Peter  and
Le, Quoc},
 booktitle = {Proc. of EMNLP},
 pages = {383--391},
 title = {Unsupervised Pretraining for Sequence to Sequence Learning},
 year = {2017}
}

@inproceedings{song2019mass,
 author = {Kaitao Song and
Xu Tan and
Tao Qin and
Jianfeng Lu and
Tie{-}Yan Liu},
 booktitle = {Proc. of ICML},
 editor = {Kamalika Chaudhuri and
Ruslan Salakhutdinov},
 pages = {5926--5936},
 series = {Proceedings of Machine Learning Research},
 title = {{MASS:} Masked Sequence to Sequence Pre-training for Language Generation},
 volume = {97},
 year = {2019}
}

@inproceedings{wang2019vatex,
 author = {Xin Wang and
Jiawei Wu and
Junkun Chen and
Lei Li and
Yuan{-}Fang Wang and
William Yang Wang},
 booktitle = {2019 {IEEE/CVF} International Conference on Computer Vision, {ICCV}
2019, Seoul, Korea (South), October 27 - November 2, 2019},
 pages = {4580--4590},
 title = {VaTeX: {A} Large-Scale, High-Quality Multilingual Dataset for Video-and-Language
Research},
 year = {2019}
}

@inproceedings{xie2020self,
 author = {Qizhe Xie and
Minh{-}Thang Luong and
Eduard H. Hovy and
Quoc V. Le},
 booktitle = {2020 {IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2020, Seattle, WA, USA, June 13-19, 2020},
 pages = {10684--10695},
 title = {Self-Training With Noisy Student Improves ImageNet Classification},
 year = {2020}
}

@inproceedings{yang2020towards,
 author = {Jiacheng Yang and
Mingxuan Wang and
Hao Zhou and
Chengqi Zhao and
Weinan Zhang and
Yong Yu and
Lei Li},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {9378--9385},
 title = {Towards Making the Most of {BERT} in Neural Machine Translation},
 year = {2020}
}

@inproceedings{zhu2020incorporating,
 author = {Jinhua Zhu and
Yingce Xia and
Lijun Wu and
Di He and
Tao Qin and
Wengang Zhou and
Houqiang Li and
Tie{-}Yan Liu},
 booktitle = {Proc. of ICLR},
 title = {Incorporating {BERT} into Neural Machine Translation},
 year = {2020}
}

@inproceedings{lin2021learning,
 author = {Lin, Zehui  and
Wu, Liwei  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {293--305},
 title = {Learning Language Specific Sub-network for Multilingual Machine Translation},
 year = {2021}
}

@inproceedings{pan2021contrastive,
 author = {Pan, Xiao  and
Wang, Mingxuan  and
Wu, Liwei  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {244--258},
 title = {Contrastive Learning for Many-to-many Multilingual Neural Machine Translation},
 year = {2021}
}

@inproceedings{qian2021glancing,
 author = {Qian, Lihua  and
Zhou, Hao  and
Bao, Yu  and
Wang, Mingxuan  and
Qiu, Lin  and
Zhang, Weinan  and
Yu, Yong  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {1993--2003},
 title = {Glancing Transformer for Non-Autoregressive Neural Machine Translation},
 year = {2021}
}

@inproceedings{wang2021unire,
 author = {Wang, Yijun  and
Sun, Changzhi  and
Wu, Yuanbin  and
Zhou, Hao  and
Li, Lei  and
Yan, Junchi},
 booktitle = {Proc. of ACL},
 pages = {220--231},
 title = {{U}ni{RE}: A Unified Label Space for Entity Relation Extraction},
 year = {2021}
}

@inproceedings{xu2021document,
 author = {Xu, Runxin  and
Liu, Tianyu  and
Li, Lei  and
Chang, Baobao},
 booktitle = {Proc. of ACL},
 pages = {3533--3546},
 title = {Document-level Event Extraction via Heterogeneous Graph-based Interaction Model with a Tracker},
 year = {2021}
}

@inproceedings{xu2021vocabulary,
 author = {Xu, Jingjing  and
Zhou, Hao  and
Gan, Chun  and
Zheng, Zaixiang  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {7361--7373},
 title = {Vocabulary Learning via Optimal Transport for Neural Machine Translation},
 year = {2021}
}

@inproceedings{han2021learning,
 author = {Han, Chi  and
Wang, Mingxuan  and
Ji, Heng  and
Li, Lei},
 booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
 pages = {2214--2225},
 title = {Learning Shared Semantic Space for Speech-to-Text Translation},
 year = {2021}
}

@inproceedings{sun2021probabilistic,
 author = {Sun, Changzhi  and
Zhang, Xinbo  and
Chen, Jiangjie  and
Gan, Chun  and
Wu, Yuanbin  and
Chen, Jiaze  and
Zhou, Hao  and
Li, Lei},
 booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
 pages = {3140--3151},
 title = {Probabilistic Graph Reasoning for Natural Proof Generation},
 year = {2021}
}

@inproceedings{wang2021contrastive,
 author = {Wang, Danqing  and
Chen, Jiaze  and
Zhou, Hao  and
Qiu, Xipeng  and
Li, Lei},
 booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
 pages = {2739--2750},
 title = {Contrastive Aligned Joint Learning for Multilingual Summarization},
 year = {2021}
}

@inproceedings{wu2021language,
 author = {Wu, Liwei  and
Cheng, Shanbo  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
 pages = {3001--3007},
 title = {Language Tags Matter for Zero-Shot Neural Machine Translation},
 year = {2021}
}

@inproceedings{zhao2021neurst,
 author = {Zhao, Chengqi  and
Wang, Mingxuan  and
Dong, Qianqian  and
Ye, Rong  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {55--62},
 title = {{N}eur{ST}: Neural Speech Translation Toolkit},
 year = {2021}
}

@inproceedings{jing2021adversarial,
 author = {Mingxuan Jing and
Wenbing Huang and
Fuchun Sun and
Xiaojian Ma and
Tao Kong and
Chuang Gan and
Lei Li},
 booktitle = {Proc. of ICML},
 editor = {Marina Meila and
Tong Zhang},
 pages = {5097--5106},
 series = {Proceedings of Machine Learning Research},
 title = {Adversarial Option-Aware Hierarchical Imitation Learning},
 volume = {139},
 year = {2021}
}

@inproceedings{wang2021cross,
 author = {Wang, Mingxuan  and
Bai, Hongxiao  and
Li, Lei  and
Zhao, Hai},
 booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers},
 pages = {89--96},
 title = {Cross-lingual Supervision Improves Unsupervised Neural Machine Translation},
 year = {2021}
}

@inproceedings{wang2021lightseq,
 author = {Wang, Xiaohui  and
Xiong, Ying  and
Wei, Yang  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers},
 pages = {113--120},
 title = {{L}ight{S}eq: A High Performance Inference Library for Transformers},
 year = {2021}
}

@InProceedings{liang2020finding,
  author  = {Jianze Liang and Chengqi Zhao and Mingxuan Wang and Xipeng Qiu and Lei Li},
  title   = {Finding Sparse Structure for Domain Specific Neural Machine Translation},
  year    = {2020},
  volume  = {abs/2012.10586},
  journal = {ArXiv preprint},
}

@inproceedings{wu2020volctrans,
 author = {Wu, Liwei  and
Pan, Xiao  and
Lin, Zehui  and
Zhu, Yaoming  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Proceedings of the Fifth Conference on Machine Translation},
 pages = {305--312},
 title = {The Volctrans Machine Translation System for {WMT}20},
 year = {2020}
}

@inproceedings{xu2020volctrans,
 author = {Xu, Runxin  and
Zhi, Zhuo  and
Cao, Jun  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Proceedings of the Fifth Conference on Machine Translation},
 pages = {985--990},
 title = {Volctrans Parallel Corpus Filtering System for {WMT} 2020},
 year = {2020}
}

@inproceedings{li2020sentence,
 author = {Li, Bohan  and
Zhou, Hao  and
He, Junxian  and
Wang, Mingxuan  and
Yang, Yiming  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {9119--9130},
 title = {On the Sentence Embeddings from Pre-trained Language Models},
 year = {2020}
}

@inproceedings{zeng2020double,
 author = {Zeng, Shuang  and
Xu, Runxin  and
Chang, Baobao  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {1630--1640},
 title = {Double Graph Based Reasoning for Document-level Relation Extraction},
 year = {2020}
}

@inproceedings{ru2020active,
 author = {Ru, Dongyu  and
Feng, Jiangtao  and
Qiu, Lin  and
Zhou, Hao  and
Wang, Mingxuan  and
Zhang, Weinan  and
Yu, Yong  and
Li, Lei},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020},
 pages = {4908--4917},
 title = {Active Sentence Learning by Adversarial Uncertainty Sampling in Discrete Space},
 year = {2020}
}

@inproceedings{zhang2020language,
 author = {Zhang, Maosen  and
Jiang, Nan  and
Li, Lei  and
Xue, Yexiang},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020},
 pages = {1286--1298},
 title = {Language Generation via Combinatorial Constraint Satisfaction: A Tree Search Enhanced {M}onte-{C}arlo Approach},
 year = {2020}
}

@inproceedings{song2020improving,
 author = {Yuxuan Song and
Ning Miao and
Hao Zhou and
Lantao Yu and
Mingxuan Wang and
Lei Li},
 booktitle = {The 23rd International Conference on Artificial Intelligence and Statistics,
{AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]},
 editor = {Silvia Chiappa and
Roberto Calandra},
 pages = {122--132},
 series = {Proceedings of Machine Learning Research},
 title = {Improving Maximum Likelihood Training for Text Generation with Density
Ratio Estimation},
 volume = {108},
 year = {2020}
}

@inproceedings{shi2020dispersed,
 author = {Wenxian Shi and
Hao Zhou and
Ning Miao and
Lei Li},
 booktitle = {Proc. of ICML},
 pages = {8840--8851},
 series = {Proceedings of Machine Learning Research},
 title = {Dispersed Exponential Family Mixture VAEs for Interpretable Text Generation},
 volume = {119},
 year = {2020}
}

@inproceedings{miao2020do,
 author = {Miao, Ning  and
Song, Yuxuan  and
Zhou, Hao  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {3436--3441},
 title = {Do you have the right scissors? Tailoring Pre-trained Language Models via {M}onte-{C}arlo Methods},
 year = {2020}
}

@inproceedings{xu2020xiaomingbot,
 author = {Xu, Runxin  and
Cao, Jun  and
Wang, Mingxuan  and
Chen, Jiaze  and
Zhou, Hao  and
Zeng, Ying  and
Wang, Yuping  and
Chen, Li  and
Yin, Xiang  and
Zhang, Xijin  and
Jiang, Songcheng  and
Wang, Yuxuan  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {1--8},
 title = {{X}iaomingbot: {A} {M}ultilingual {R}obot {N}ews {R}eporter},
 year = {2020}
}

@inproceedings{ye2020variational,
 author = {Rong Ye and
Wenxian Shi and
Hao Zhou and
Zhongyu Wei and
Lei Li},
 booktitle = {Proc. of ICLR},
 title = {Variational Template Machine for Data-to-Text Generation},
 year = {2020}
}

@inproceedings{zheng2020mirror,
 abstract = {Training neural machine translation models (NMT) requires a large amount of parallel corpus, which is scarce for many language pairs. However, raw non-parallel corpora are often easy to obtain. Existing approaches have not exploited the full potential of non-parallel bilingual data either in training or decoding. In this paper, we propose the mirror-generative NMT (MGNMT), a single unified architecture that simultaneously integrates the source to target translation model, the target to source translation model, and two language models. Both translation models and language models share the same latent semantic space, therefore both translation directions can learn from non-parallel data more effectively. Besides, the translation models and language models can collaborate together during decoding. Our experiments show that the proposed MGNMT consistently outperforms existing approaches in all a variety of scenarios and language pairs, including resource-rich and low-resource languages.},
 addendum = {Oral, 1.9\% acceptance rate},
 author = {Zaixiang Zheng and Hao Zhou and Shujian Huang and Lei Li and Xinyu Dai and Jiajun Chen},
 booktitle = {Proc. of ICLR},
 eprint = {https://openreview.net/forum?id=HkxQRTNYPH},
 owner = {lilei.02},
 title = {Mirror Generative Models for Neural Machine Translation},
 video = {https://iclr.cc/virtual_2020/poster_HkxQRTNYPH.html},
 year = {2020}
}

@inproceedings{wu2020importance,
 author = {Qingyang Wu and
Lei Li and
Hao Zhou and
Ying Zeng and
Zhou Yu},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {9282--9289},
 title = {Importance-Aware Learning for Neural Headline Editing},
 year = {2020}
}

@inproceedings{xu2021stacked,
 author = {Xu, Chen  and
Hu, Bojie  and
Li, Yanyang  and
Zhang, Yuhao  and
Huang, Shen  and
Ju, Qi  and
Xiao, Tong  and
Zhu, Jingbo},
 booktitle = {Proc. of ACL},
 pages = {2619--2630},
 title = {Stacked Acoustic-and-Textual Encoding: Integrating the Pre-trained Models into Speech Translation Encoders},
 year = {2021}
}

@inproceedings{miao2019kernelized,
 author = {Ning Miao and
Hao Zhou and
Chengqi Zhao and
Wenxian Shi and
Lei Li},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {12487--12497},
 title = {Kernelized Bayesian Softmax for Text Generation},
 year = {2019}
}

@inproceedings{wang2019towards,
 author = {Wang, Mingxuan  and
Xie, Jun  and
Tan, Zhixing  and
Su, Jinsong  and
Xiong, Deyi  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {803--812},
 title = {Towards Linear Time Neural Machine Translation with Capsule Networks},
 year = {2019}
}

@InProceedings{potapczyk2020srpols,
  author    = {Potapczyk, Tomasz and Przybysz, Pawel},
  booktitle = {Proceedings of the 17th International Conference on Spoken Language Translation},
  title     = {{SRPOL}{'}s System for the {IWSLT} 2020 End-to-End Speech Translation Task},
  year      = {2020},
  pages     = {89--94},
}

@inproceedings{ansari2020findings,
 author = {Ansari, Ebrahim  and
Axelrod, Amittai  and
Bach, Nguyen  and
Bojar, Ond{\v{r}}ej  and
Cattoni, Roldano  and
Dalvi, Fahim  and
Durrani, Nadir  and
Federico, Marcello  and
Federmann, Christian  and
Gu, Jiatao  and
Huang, Fei  and
Knight, Kevin  and
Ma, Xutai  and
Nagesh, Ajay  and
Negri, Matteo  and
Niehues, Jan  and
Pino, Juan  and
Salesky, Elizabeth  and
Shi, Xing  and
St{\"u}ker, Sebastian  and
Turchi, Marco  and
Waibel, Alexander  and
Wang, Changhan},
 booktitle = {Proceedings of the 17th International Conference on Spoken Language Translation},
 pages = {1--34},
 title = {{FINDINGS} {OF} {THE} {IWSLT} 2020 {EVALUATION} {CAMPAIGN}},
 year = {2020}
}

@inproceedings{weng2019correct,
 author = {Rongxiang Weng and
Hao Zhou and
Shujian Huang and
Lei Li and
Yifan Xia and
Jiajun Chen},
 booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on
Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
2019},
 editor = {Sarit Kraus},
 pages = {5255--5263},
 title = {Correct-and-Memorize: Learning to Translate from Interactive Revisions},
 year = {2019}
}

@inproceedings{bentivogli2021cascade,
 author = {Bentivogli, Luisa  and
Cettolo, Mauro  and
Gaido, Marco  and
Karakanta, Alina  and
Martinelli, Alberto  and
Negri, Matteo  and
Turchi, Marco},
 booktitle = {Proc. of ACL},
 pages = {2873--2887},
 title = {Cascade versus Direct Speech Translation: Do the Differences Still Make a Difference?},
 year = {2021}
}

@inproceedings{bao2019generating,
 author = {Bao, Yu  and
Zhou, Hao  and
Huang, Shujian  and
Li, Lei  and
Mou, Lili  and
Vechtomova, Olga  and
Dai, Xin-yu  and
Chen, Jiajun},
 booktitle = {Proc. of ACL},
 pages = {6008--6019},
 title = {Generating Sentences from Disentangled Syntactic and Semantic Spaces},
 year = {2019}
}

@inproceedings{qiu2019dynamically,
 author = {Qiu, Lin  and
Xiao, Yunxuan  and
Qu, Yanru  and
Zhou, Hao  and
Li, Lei  and
Zhang, Weinan  and
Yu, Yong},
 booktitle = {Proc. of ACL},
 pages = {6140--6150},
 title = {Dynamically Fused Graph Network for Multi-hop Reasoning},
 year = {2019}
}

@inproceedings{zhang2019generating,
 author = {Zhang, Huangzhao  and
Zhou, Hao  and
Miao, Ning  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {5564--5569},
 title = {Generating Fluent Adversarial Examples for Natural Languages},
 year = {2019}
}

@inproceedings{wu2019unified,
 author = {Hao Wu and
Jiayuan Mao and
Yufeng Zhang and
Yuning Jiang and
Lei Li and
Weiwei Sun and
Wei{-}Ying Ma},
 booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
 pages = {6609--6618},
 title = {Unified Visual-Semantic Embeddings: Bridging Vision and Language With
Structured Meaning Representations},
 year = {2019}
}

@inproceedings{miao2019cgmh,
 author = {Ning Miao and
Hao Zhou and
Lili Mou and
Rui Yan and
Lei Li},
 booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
2019, The Thirty-First Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
USA, January 27 - February 1, 2019},
 pages = {6834--6842},
 title = {{CGMH:} Constrained Sentence Generation by Metropolis-Hastings Sampling},
 year = {2019}
}

@inproceedings{shi2018tree,
 author = {Shi, Haoyue  and
Zhou, Hao  and
Chen, Jiaze  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {4631--4641},
 title = {On Tree-Based Neural Sentence Modeling},
 year = {2018}
}

@inproceedings{kingma2015adam,
 author = {Diederik P. Kingma and
Jimmy Ba},
 booktitle = {Proc. of ICLR},
 editor = {Yoshua Bengio and
Yann LeCun},
 title = {Adam: {A} Method for Stochastic Optimization},
 year = {2015}
}

@inproceedings{aharoni2019massively,
 author = {Aharoni, Roee  and
Johnson, Melvin  and
Firat, Orhan},
 booktitle = {Proc. of NAACL-HLT},
 pages = {3874--3884},
 title = {Massively Multilingual Neural Machine Translation},
 year = {2019}
}

@inproceedings{alshedivat2019consistency,
 author = {Al-Shedivat, Maruan  and
Parikh, Ankur},
 booktitle = {Proc. of NAACL-HLT},
 pages = {1184--1197},
 title = {Consistency by Agreement in Zero-Shot Neural Machine Translation},
 year = {2019}
}

@article{arivazhagan2019massively,
 author = {Naveen Arivazhagan and
Ankur Bapna and
Orhan Firat and
Dmitry Lepikhin and
Melvin Johnson and
Maxim Krikun and
Mia Xu Chen and
Yuan Cao and
George F. Foster and
Colin Cherry and
Wolfgang Macherey and
Zhifeng Chen and
Yonghui Wu},
 journal = {ArXiv preprint},
 title = {Massively Multilingual Neural Machine Translation in the Wild: Findings
and Challenges},
 volume = {abs/1907.05019},
 year = {2019}
}

@inproceedings{bahdanau2015neural,
 author = {Dzmitry Bahdanau and
Kyunghyun Cho and
Yoshua Bengio},
 booktitle = {Proc. of ICLR},
 editor = {Yoshua Bengio and
Yann LeCun},
 title = {Neural Machine Translation by Jointly Learning to Align and Translate},
 year = {2015}
}

@inproceedings{bapna2019simple,
 author = {Bapna, Ankur  and
Firat, Orhan},
 booktitle = {Proc. of EMNLP},
 pages = {1538--1548},
 title = {Simple, Scalable Adaptation for Neural Machine Translation},
 year = {2019}
}

@inproceedings{sennrich2016neural,
 author = {Sennrich, Rico  and
Haddow, Barry  and
Birch, Alexandra},
 booktitle = {Proc. of ACL},
 pages = {1715--1725},
 title = {Neural Machine Translation of Rare Words with Subword Units},
 year = {2016}
}

@inproceedings{chen2017teacher,
 author = {Chen, Yun  and
Liu, Yang  and
Cheng, Yong  and
Li, Victor O.K.},
 booktitle = {Proc. of ACL},
 pages = {1925--1935},
 title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
 year = {2017}
}

@inproceedings{chen2018zero,
 author = {Yun Chen and
Yang Liu and
Victor O. K. Li},
 booktitle = {Proc. of AAAI},
 editor = {Sheila A. McIlraith and
Kilian Q. Weinberger},
 pages = {5086--5093},
 title = {Zero-Resource Neural Machine Translation with Multi-Agent Communication
Game},
 year = {2018}
}

@inproceedings{chen2020simple,
 author = {Ting Chen and
Simon Kornblith and
Mohammad Norouzi and
Geoffrey E. Hinton},
 booktitle = {Proc. of ICML},
 pages = {1597--1607},
 series = {Proceedings of Machine Learning Research},
 title = {A Simple Framework for Contrastive Learning of Visual Representations},
 volume = {119},
 year = {2020}
}

@inproceedings{choi2018improving,
 author = {Choi, Gyu-Hyeon  and
Shin, Jong-Hun  and
Kim, Young-Kil},
 booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)},
 title = {Improving a Multi-Source Neural Machine Translation Model with Corpus Extension for Low-Resource Languages},
 year = {2018}
}

@article{chu2019multilingual,
 author = {Chenhui Chu and Raj Dabre},
 journal = {ArXiv preprint},
 title = {Multilingual Multi-Domain Adaptation Approaches for Neural Machine Translation},
 volume = {abs/1906.07978},
 year = {2019}
}

@inproceedings{tran2020cross,
 author = {Chau Tran and
Yuqing Tang and
Xian Li and
Jiatao Gu},
 booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
 editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
 title = {Cross-lingual Retrieval for Iterative Self-Supervised Training},
 year = {2020}
}

@inproceedings{currey2019zero,
 author = {Currey, Anna  and
Heafield, Kenneth},
 booktitle = {Proceedings of the 3rd Workshop on Neural Generation and Translation},
 pages = {99--107},
 title = {Zero-Resource Neural Machine Translation with Monolingual Pivot Data},
 year = {2019}
}

@article{dabre2017enabling,
 author = {Raj Dabre and
Fabien Cromier{\`{e}}s and
Sadao Kurohashi},
 journal = {ArXiv preprint},
 title = {Enabling Multi-Source Neural Machine Translation By Concatenating
Source Sentences In Multiple Languages},
 volume = {abs/1702.06135},
 year = {2017}
}

@inproceedings{post2018call,
 author = {Post, Matt},
 booktitle = {Proceedings of the Third Conference on Machine Translation: Research Papers},
 pages = {186--191},
 title = {A Call for Clarity in Reporting {BLEU} Scores},
 year = {2018}
}

@inproceedings{wang2019learning,
 author = {Wang, Qiang  and
Li, Bei  and
Xiao, Tong  and
Zhu, Jingbo  and
Li, Changliang  and
Wong, Derek F.  and
Chao, Lidia S.},
 booktitle = {Proc. of ACL},
 pages = {1810--1822},
 title = {Learning Deep Transformer Models for Machine Translation},
 year = {2019}
}

@inproceedings{dong2015multi,
 author = {Dong, Daxiang  and
Wu, Hua  and
He, Wei  and
Yu, Dianhai  and
Wang, Haifeng},
 booktitle = {Proc. of ACL},
 pages = {1723--1732},
 title = {Multi-Task Learning for Multiple Language Translation},
 year = {2015}
}

@article{escolano2020training,
 author = {Carlos Escolano and
Marta R. Costa{-}juss{\`{a}} and
Jos{\'{e}} A. R. Fonollosa and
Mikel Artetxe},
 journal = {ArXiv preprint},
 title = {Training Multilingual Machine Translation by Alternately Freezing
Language-Specific Encoders-Decoders},
 volume = {abs/2006.01594},
 year = {2020}
}

@article{fan2020english,
 author = {Angela Fan and Shruti Bhosale and Holger Schwenk and Zhiyi Ma and Ahmed El{-}Kishky and Siddharth Goyal and Mandeep Baines and Onur Celebi and Guillaume Wenzek and Vishrav Chaudhary and Naman Goyal and Tom Birch and Vitaliy Liptchinsky and Sergey Edunov and Edouard Grave and Michael Auli and Armand Joulin},
 journal = {ArXiv preprint},
 title = {Beyond English-Centric Multilingual Machine Translation},
 volume = {abs/2010.11125},
 year = {2020}
}

@article{fang2020cert,
 author = {Hongchao Fang and
Pengtao Xie},
 journal = {ArXiv preprint},
 title = {{CERT:} Contrastive Self-supervised Learning for Language Understanding},
 volume = {abs/2005.12766},
 year = {2020}
}

@inproceedings{gu2019improved,
 author = {Gu, Jiatao  and
Wang, Yong  and
Cho, Kyunghyun  and
Li, Victor O.K.},
 booktitle = {Proc. of ACL},
 pages = {1258--1268},
 title = {Improved Zero-shot Neural Machine Translation via Ignoring Spurious Correlations},
 year = {2019}
}

@inproceedings{ha2017effective,
 author = {Ha, Thanh-Le  and
Niehues, Jan  and
Waibel, Alexander},
 booktitle = {Proceedings of the 14th International Conference on Spoken Language Translation},
 pages = {105--112},
 title = {Effective Strategies in Zero-Shot Neural Machine Translation},
 year = {2017}
}

@inproceedings{he2020momentum,
 author = {Kaiming He and
Haoqi Fan and
Yuxin Wu and
Saining Xie and
Ross B. Girshick},
 booktitle = {2020 {IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2020, Seattle, WA, USA, June 13-19, 2020},
 pages = {9726--9735},
 title = {Momentum Contrast for Unsupervised Visual Representation Learning},
 year = {2020}
}

@inproceedings{ji2020cross,
 author = {Baijun Ji and
Zhirui Zhang and
Xiangyu Duan and
Min Zhang and
Boxing Chen and
Weihua Luo},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {115--122},
 title = {Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine
Translation},
 year = {2020}
}

@inproceedings{kim2019effective,
 author = {Kim, Yunsu  and
Gao, Yingbo  and
Ney, Hermann},
 booktitle = {Proc. of ACL},
 pages = {1246--1257},
 title = {Effective Cross-lingual Transfer of Neural Machine Translation Models without Shared Vocabularies},
 year = {2019}
}

@inproceedings{lample2018unsupervised,
 author = {Guillaume Lample and
Alexis Conneau and
Ludovic Denoyer and
Marc'Aurelio Ranzato},
 booktitle = {Proc. of ICLR},
 title = {Unsupervised Machine Translation Using Monolingual Corpora Only},
 year = {2018}
}

@inproceedings{misra2020self,
 author = {Ishan Misra and
Laurens van der Maaten},
 booktitle = {2020 {IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2020, Seattle, WA, USA, June 13-19, 2020},
 pages = {6706--6716},
 title = {Self-Supervised Learning of Pretext-Invariant Representations},
 year = {2020}
}

@inproceedings{siddhant2020leveraging,
 author = {Siddhant, Aditya  and
Bapna, Ankur  and
Cao, Yuan  and
Firat, Orhan  and
Chen, Mia  and
Kudugunta, Sneha  and
Arivazhagan, Naveen  and
Wu, Yonghui},
 booktitle = {Proc. of ACL},
 pages = {2827--2835},
 title = {Leveraging Monolingual Data with Self-Supervision for Multilingual Neural Machine Translation},
 year = {2020}
}

@inproceedings{tan2019multilingual,
 author = {Xu Tan and
Yi Ren and
Di He and
Tao Qin and
Zhou Zhao and
Tie{-}Yan Liu},
 booktitle = {Proc. of ICLR},
 title = {Multilingual Neural Machine Translation with Knowledge Distillation},
 year = {2019}
}

@article{artetxe2019massively,
 author = {Artetxe, Mikel  and
Schwenk, Holger},
 journal = {Transactions of the Association for Computational Linguistics},
 pages = {597--610},
 title = {Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond},
 volume = {7},
 year = {2019}
}

@inproceedings{tian2020contrastive,
 author = {Yonglong Tian and Dilip Krishnan and Phillip Isola},
 booktitle = {Proc. of ECCV},
 editor = {Andrea Vedaldi and Horst Bischof and Thomas Brox and Jan{-}Michael Frahm},
 pages = {776--794},
 series = {Lecture Notes in Computer Science},
 title = {Contrastive Multiview Coding},
 volume = {12356},
 year = {2020}
}

@inproceedings{wang2019compact,
 author = {Wang, Yining  and
Zhou, Long  and
Zhang, Jiajun  and
Zhai, Feifei  and
Xu, Jingfang  and
Zong, Chengqing},
 booktitle = {Proc. of ACL},
 pages = {1213--1223},
 title = {A Compact and Language-Sensitive Multilingual Translation Method},
 year = {2019}
}

@article{wu2016googles,
 author = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V and Norouzi, Mohammad and Macherey, Wolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and others},
 journal = {ArXiv preprint},
 title = {Google's neural machine translation system: Bridging the gap between human and machine translation},
 volume = {abs/1609.08144},
 year = {2016}
}

@article{wu2020clear,
 author = {Zhuofeng Wu and
Sinong Wang and
Jiatao Gu and
Madian Khabsa and
Fei Sun and
Hao Ma},
 journal = {ArXiv preprint},
 title = {{CLEAR:} Contrastive Learning for Sentence Representation},
 volume = {abs/2012.15466},
 year = {2020}
}

@inproceedings{zhuang2019local,
 author = {Chengxu Zhuang and
Alex Lin Zhai and
Daniel Yamins},
 booktitle = {2019 {IEEE/CVF} International Conference on Computer Vision, {ICCV}
2019, Seoul, Korea (South), October 27 - November 2, 2019},
 pages = {6001--6011},
 title = {Local Aggregation for Unsupervised Learning of Visual Embeddings},
 year = {2019}
}

@inproceedings{alinejad2020effectively,
 author = {Alinejad, Ashkan  and
Sarkar, Anoop},
 booktitle = {Proc. of EMNLP},
 pages = {8014--8020},
 title = {Effectively pretraining a speech translation decoder with Machine Translation data},
 year = {2020}
}

@inproceedings{baevski2020vq,
 author = {Alexei Baevski and
Steffen Schneider and
Michael Auli},
 booktitle = {Proc. of ICLR},
 title = {vq-wav2vec: Self-Supervised Learning of Discrete Speech Representations},
 year = {2020}
}

@inproceedings{berard2016listena,
 author = {B{\'e}rard, Alexandre and Pietquin, Olivier and Besacier, Laurent and Servan, Christophe},
 booktitle = {NIPS Workshop on end-to-end learning for speech and audio processing},
 title = {Listen and Translate: A Proof of Concept for End-to-End Speech-to-Text Translation},
 year = {2016}
}

@InProceedings{kano2017structureda,
  author    = {Takatomo Kano and Sakriani Sakti and Satoshi Nakamura},
  booktitle = {Proc. of INTERSPEECH},
  title     = {Structured-Based Curriculum Learning for End-to-End English-Japanese Speech Translation},
  year      = {2017},
  editor    = {Francisco Lacerda},
  pages     = {2630--2634},
}

@inproceedings{kudo2018sentencepiece,
 author = {Kudo, Taku  and
Richardson, John},
 booktitle = {Proc. of EMNLP},
 pages = {66--71},
 title = {{S}entence{P}iece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing},
 year = {2018}
}

@inproceedings{lewis2020bart,
 author = {Lewis, Mike  and
Liu, Yinhan  and
Goyal, Naman  and
Ghazvininejad, Marjan  and
Mohamed, Abdelrahman  and
Levy, Omer  and
Stoyanov, Veselin  and
Zettlemoyer, Luke},
 booktitle = {Proc. of ACL},
 pages = {7871--7880},
 title = {{BART}: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension},
 year = {2020}
}

@inproceedings{nguyen2020investigating,
 author = {Ha Nguyen and
Fethi Bougares and
Natalia A. Tomashenko and
Yannick Est{\`{e}}ve and
Laurent Besacier},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
 pages = {1466--1470},
 title = {Investigating Self-Supervised Pre-Training for End-to-End Speech Translation},
 year = {2020}
}

@inproceedings{panayotov2015librispeech,
 author = {Vassil Panayotov and
Guoguo Chen and
Daniel Povey and
Sanjeev Khudanpur},
 booktitle = {2015 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia,
April 19-24, 2015},
 pages = {5206--5210},
 title = {Librispeech: An {ASR} corpus based on public domain audio books},
 year = {2015}
}

@inproceedings{pino2020selfa,
 author = {Juan Miguel Pino and
Qiantong Xu and
Xutai Ma and
Mohammad Javad Dousti and
Yun Tang},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
 pages = {1476--1480},
 title = {Self-Training for End-to-End Speech Translation},
 year = {2020}
}

@inproceedings{schneider2019wav2vec,
 author = {Steffen Schneider and
Alexei Baevski and
Ronan Collobert and
Michael Auli},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Gernot Kubin and
Zdravko Kacic},
 pages = {3465--3469},
 title = {wav2vec: Unsupervised Pre-Training for Speech Recognition},
 year = {2019}
}

@article{tang2020multilingual,
 author = {Tang, Yuqing and Tran, Chau and Li, Xian and Chen, Peng-Jen and Goyal, Naman and Chaudhary, Vishrav and Gu, Jiatao and Fan, Angela},
 journal = {ArXiv preprint},
 title = {Multilingual translation with extensible multilingual pretraining and finetuning},
 volume = {abs/2008.00401},
 year = {2020}
}

@inproceedings{li2021multilingual,
 author = {Xian Li and Changhan Wang and Yun Tang and Chau Tran and Yuqing Tang and Juan Pino and Alexei Baevski and Alexis Conneau and Michael Auli},
 booktitle = {Proc. of ACL},
 title = {Multilingual Speech Translation with Efficient Finetuning of Pretrained Models},
 year = {2021}
}

@inproceedings{wang2021unispeech,
 author = {Chengyi Wang and
Yu Wu and
Yao Qian and
Ken'ichi Kumatani and
Shujie Liu and
Furu Wei and
Michael Zeng and
Xuedong Huang},
 booktitle = {Proc. of ICML},
 editor = {Marina Meila and
Tong Zhang},
 pages = {10937--10947},
 series = {Proceedings of Machine Learning Research},
 title = {UniSpeech: Unified Speech Representation Learning with Labeled and
Unlabeled Data},
 volume = {139},
 year = {2021}
}

@inproceedings{wu2020self,
 author = {Anne Wu and
Changhan Wang and
Juan Miguel Pino and
Jiatao Gu},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
 pages = {1491--1495},
 title = {Self-Supervised Representations Improve End-to-End Speech Translation},
 year = {2020}
}

@article{yi2021applying,
 archiveprefix = {arXiv},
 author = {Cheng Yi and Jianzhong Wang and Ning Cheng and Shiyu Zhou and Bo Xu},
 eprint = {2012.12121},
 primaryclass = {cs.CL},
 title = {Applying Wav2vec2.0 to Speech Recognition in Various Low-resource Languages},
 year = {2021}
}

@inproceedings{zheng2021fused,
 author = {Renjie Zheng and
Junkun Chen and
Mingbo Ma and
Liang Huang},
 booktitle = {Proc. of ICML},
 editor = {Marina Meila and
Tong Zhang},
 pages = {12736--12746},
 series = {Proceedings of Machine Learning Research},
 title = {Fused Acoustic and Text Encoding for Multimodal Bilingual Pretraining
and Speech Translation},
 volume = {139},
 year = {2021}
}

@article{kschischang2001factor,
 author = {F. R. {Kschischang} and B. J. {Frey} and H. -. {Loeliger}},
 journal = {IEEE Transactions on Information Theory},
 number = {2},
 pages = {498-519},
 title = {Factor graphs and the sum-product algorithm},
 volume = {47},
 year = {2001}
}

@inproceedings{abboud2020learning,
 author = {Ralph Abboud and
{\.I}smail {\.I}lkan Ceylan and
Thomas Lukasiewicz},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {3097--3104},
 title = {Learning to Reason: Leveraging Neural Networks for Approximate {DNF}
Counting},
 year = {2020}
}

@article{abdelaziz2020experimental,
 author = {Abdelaziz, Ibrahim and Thost, Veronika and Crouse, Maxwell and Fokoue, Achille},
 journal = {ArXiv preprint},
 title = {An Experimental Study of Formula Embeddings for Automated Theorem Proving in First-Order Logic},
 volume = {abs/2002.00423},
 year = {2020}
}

@inproceedings{bengio2015scheduled,
 author = {Samy Bengio and
Oriol Vinyals and
Navdeep Jaitly and
Noam Shazeer},
 booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference
on Neural Information Processing Systems 2015, December 7-12, 2015,
Montreal, Quebec, Canada},
 editor = {Corinna Cortes and
Neil D. Lawrence and
Daniel D. Lee and
Masashi Sugiyama and
Roman Garnett},
 pages = {1171--1179},
 title = {Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks},
 year = {2015}
}

@inproceedings{berant2013semantic,
 author = {Berant, Jonathan  and
Chou, Andrew  and
Frostig, Roy  and
Liang, Percy},
 booktitle = {Proc. of EMNLP},
 pages = {1533--1544},
 title = {Semantic Parsing on {F}reebase from Question-Answer Pairs},
 year = {2013}
}

@inproceedings{berant2014semantic,
 author = {Berant, Jonathan  and
Liang, Percy},
 booktitle = {Proc. of ACL},
 pages = {1415--1425},
 title = {Semantic Parsing via Paraphrasing},
 year = {2014}
}

@article{ceri1989what,
 author = {Ceri, Stefano and Gottlob, Georg and Tanca, Letizia and others},
 journal = {TKDE},
 number = {1},
 pages = {146--166},
 title = {What you always wanted to know about Datalog(and never dared to ask)},
 volume = {1},
 year = {1989}
}

@inbook{clark1978negation,
 author = {Clark, Keith L.},
 booktitle = {Logic and Data Bases},
 editor = {Gallaire, Herv{\'e} and Minker, Jack},
 isbn = {978-1-4684-3384-5},
 pages = {293--322},
 title = {Negation as Failure},
 year = {1978}
}

@inproceedings{clark2020transformers,
 author = {Peter Clark and
Oyvind Tafjord and
Kyle Richardson},
 booktitle = {Proceedings of the Twenty-Ninth International Joint Conference on
Artificial Intelligence, {IJCAI} 2020},
 editor = {Christian Bessiere},
 pages = {3882--3890},
 title = {Transformers as Soft Reasoners over Language},
 year = {2020}
}

@article{dagan2013recognizing,
 author = {Dagan, Ido and Roth, Dan and Sammons, Mark and Zanzotto, Fabio Massimo},
 journal = {Synthesis Lectures on Human Language Technologies},
 number = {4},
 pages = {1--220},
 title = {Recognizing textual entailment: Models and applications},
 volume = {6},
 year = {2013}
}

@inproceedings{salakhutdinov2014deep,
 author = {Ruslan Salakhutdinov},
 booktitle = {Proc. of KDD},
 editor = {Sofus A. Macskassy and
Claudia Perlich and
Jure Leskovec and
Wei Wang and
Rayid Ghani},
 pages = {1973},
 title = {Deep learning},
 year = {2014}
}

@book{koller2009probabilistic,
 author = {Koller, Daphne and Friedman, Nir},
 title = {Probabilistic graphical models: principles and techniques},
 year = {2009}
}

@inproceedings{lin2019reasoning,
 author = {Lin, Kevin  and
Tafjord, Oyvind  and
Clark, Peter  and
Gardner, Matt},
 booktitle = {Proceedings of the 2nd Workshop on Machine Reading for Question Answering},
 pages = {58--62},
 title = {Reasoning Over Paragraph Effects in Situations},
 year = {2019}
}

@inproceedings{maccartney2009extended,
 author = {MacCartney, Bill  and
Manning, Christopher D.},
 booktitle = {Proceedings of the Eight International Conference on Computational Semantics},
 pages = {140--156},
 title = {An extended model of natural logic},
 year = {2009}
}

@incollection{maccartney2014natural,
 author = {MacCartney, Bill and Manning, Christopher D},
 booktitle = {Computing meaning},
 pages = {129--147},
 title = {Natural logic and natural language inference},
 year = {2014}
}

@book{mccarthy1960programs,
 author = {McCarthy, John and others},
 title = {Programs with common sense},
 year = {1960}
}

@inproceedings{murphy1999loopy,
 author = {Murphy, Kevin and Weiss, Yair and Jordan, Michael I},
 booktitle = {Proc. of UAI},
 title = {Loopy belief propagation for approximate inference: An empirical study},
 year = {1999}
}

@incollection{musen1988brittleness,
 author = {Musen, Mark A and Van der Lei, Johan},
 booktitle = {Machine Intelligence and Pattern Recognition},
 pages = {335--352},
 title = {Of brittleness and bottlenecks: Challenges in the creation of pattern-recognition and expert-system models},
 volume = {7},
 year = {1988}
}

@incollection{neal1998view,
 author = {Neal, Radford M and Hinton, Geoffrey E},
 booktitle = {Learning in graphical models},
 pages = {355--368},
 title = {A view of the EM algorithm that justifies incremental, sparse, and other variants},
 year = {1998}
}

@inproceedings{neelakantan2016neural,
 author = {Arvind Neelakantan and
Quoc V. Le and
Ilya Sutskever},
 booktitle = {Proc. of ICLR},
 editor = {Yoshua Bengio and
Yann LeCun},
 title = {Neural Programmer: Inducing Latent Programs with Gradient Descent},
 year = {2016}
}

@article{newell1956logic,
 author = {Newell, Allen and Simon, Herbert},
 journal = {IRE Transactions on information theory},
 number = {3},
 pages = {61--79},
 title = {The logic theory machine--A complex information processing system},
 volume = {2},
 year = {1956}
}

@book{opper2001advanced,
 author = {Opper, Manfred and Saad, David},
 title = {Advanced mean field methods: Theory and practice},
 year = {2001}
}

@inproceedings{reed2016neural,
 author = {Scott E. Reed and
Nando de Freitas},
 booktitle = {Proc. of ICLR},
 editor = {Yoshua Bengio and
Yann LeCun},
 title = {Neural Programmer-Interpreters},
 year = {2016}
}

@article{richardson2006markov,
 author = {Richardson, Matthew and Domingos, Pedro},
 journal = {Machine learning},
 number = {1-2},
 pages = {107--136},
 title = {Markov logic networks},
 volume = {62},
 year = {2006}
}

@inproceedings{saha2020prover,
 author = {Saha, Swarnadeep  and
Ghosh, Sayan  and
Srivastava, Shashank  and
Bansal, Mohit},
 booktitle = {Proc. of EMNLP},
 pages = {122--136},
 title = {{PR}over: Proof Generation for Interpretable Reasoning over Rules},
 year = {2020}
}

@inproceedings{selsam2019learning,
 author = {Daniel Selsam and
Matthew Lamm and
Benedikt B{\"{u}}nz and
Percy Liang and
Leonardo de Moura and
David L. Dill},
 booktitle = {Proc. of ICLR},
 title = {Learning a {SAT} Solver from Single-Bit Supervision},
 year = {2019}
}

@inproceedings{tafjord2019quartz,
 author = {Tafjord, Oyvind  and
Gardner, Matt  and
Lin, Kevin  and
Clark, Peter},
 booktitle = {Proc. of EMNLP},
 pages = {5941--5946},
 title = {{Q}ua{RT}z: An Open-Domain Dataset of Qualitative Relationship Questions},
 year = {2019}
}

@inproceedings{tafjord2021proofwriter,
 author = {Tafjord, Oyvind  and
Dalvi, Bhavana  and
Clark, Peter},
 booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
 pages = {3621--3634},
 title = {{P}roof{W}riter: Generating Implications, Proofs, and Abductive Statements over Natural Language},
 year = {2021}
}

@inproceedings{weber2019nlprolog,
 author = {Weber, Leon  and
Minervini, Pasquale  and
M{\"u}nchmeyer, Jannes  and
Leser, Ulf  and
Rockt{\"a}schel, Tim},
 booktitle = {Proc. of ACL},
 pages = {6151--6161},
 title = {{NLP}rolog: Reasoning with Weak Unification for Question Answering in Natural Language},
 year = {2019}
}

@inproceedings{weston2016towards,
 author = {Jason Weston and
Antoine Bordes and
Sumit Chopra and
Tom{\'{a}}s Mikolov},
 booktitle = {Proc. of ICLR},
 editor = {Yoshua Bengio and
Yann LeCun},
 title = {Towards AI-Complete Question Answering: {A} Set of Prerequisite Toy
Tasks},
 year = {2016}
}

@inproceedings{yang2018hotpotqa,
 author = {Yang, Zhilin  and
Qi, Peng  and
Zhang, Saizheng  and
Bengio, Yoshua  and
Cohen, William  and
Salakhutdinov, Ruslan  and
Manning, Christopher D.},
 booktitle = {Proc. of EMNLP},
 pages = {2369--2380},
 title = {{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering},
 year = {2018}
}

@inproceedings{zettlemoyer2005learning,
 author = {Zettlemoyer, Luke S and Collins, Michael},
 booktitle = {Proc. of UAI},
 pages = {658--666},
 title = {Learning to map sentences to logical form: Structured classification with probabilistic categorial grammars},
 year = {2005}
}

@article{ando2005framework,
 acmid = {1194905},
 author = {Ando, Rie Kubota and Zhang, Tong},
 issn = {1532-4435},
 issue_date = {12/1/2005},
 journal = {JMLR},
 numpages = {37},
 pages = {1817--1853},
 title = {A Framework for Learning Predictive Structures from Multiple Tasks and Unlabeled Data},
 volume = {6},
 year = {2005}
}

@inproceedings{andrew2007scalable,
 author = {Andrew, Galen and Gao, Jianfeng},
 booktitle = {Proc. of ICML},
 pages = {33--40},
 title = {Scalable training of {L1}-regularized log-linear models},
 year = {2007}
}

@incollection{bengio2007scaling,
 author = {Bengio, Yoshua and LeCun, Yann},
 booktitle = {Large Scale Kernel Machines},
 title = {Scaling Learning Algorithms Towards {AI}},
 year = {2007}
}

@inproceedings{deng2020cascaded,
 author = {Yuntian Deng and
Alexander M. Rush},
 booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
 editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
 title = {Cascaded Text Generation with Markov Transformers},
 year = {2020}
}

@article{cho2015describing,
 author = {Cho, Kyunghyun and Courville, Aaron and Bengio, Yoshua},
 journal = {IEEE Transactions on Multimedia},
 number = {11},
 pages = {1875--1886},
 title = {Describing multimedia content using attention-based encoder-decoder networks},
 volume = {17},
 year = {2015}
}

@article{cho2016noisy,
 author = {Cho, Kyunghyun},
 journal = {ArXiv preprint},
 title = {Noisy parallel approximate decoding for conditional recurrent language model},
 volume = {abs/1605.03835},
 year = {2016}
}

@inproceedings{bao2021non,
 author = {Bao, Yu  and
Huang, Shujian  and
Xiao, Tong  and
Wang, Dongqi  and
Dai, Xinyu  and
Chen, Jiajun},
 booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
 pages = {5749--5759},
 title = {Non-Autoregressive Translation by Learning Target Categorical Codes},
 year = {2021}
}

@inproceedings{krizhevsky2012imagenet,
 author = {Alex Krizhevsky and
Ilya Sutskever and
Geoffrey E. Hinton},
 booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual
Conference on Neural Information Processing Systems 2012. Proceedings
of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
 editor = {Peter L. Bartlett and
Fernando C. N. Pereira and
Christopher J. C. Burges and
L{\'{e}}on Bottou and
Kilian Q. Weinberger},
 pages = {1106--1114},
 title = {ImageNet Classification with Deep Convolutional Neural Networks},
 year = {2012}
}

@inproceedings{gehring2017convolutional,
 author = {Jonas Gehring and
Michael Auli and
David Grangier and
Denis Yarats and
Yann N. Dauphin},
 booktitle = {Proc. of ICML},
 editor = {Doina Precup and
Yee Whye Teh},
 pages = {1243--1252},
 series = {Proceedings of Machine Learning Research},
 title = {Convolutional Sequence to Sequence Learning},
 volume = {70},
 year = {2017}
}

@inproceedings{bengio2009curriculum,
 author = {Yoshua Bengio and
J{\'{e}}r{\^{o}}me Louradour and
Ronan Collobert and
Jason Weston},
 booktitle = {Proceedings of the 26th Annual International Conference on Machine
Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
 editor = {Andrea Pohoreckyj Danyluk and
L{\'{e}}on Bottou and
Michael L. Littman},
 pages = {41--48},
 series = {{ACM} International Conference Proceeding Series},
 title = {Curriculum learning},
 volume = {382},
 year = {2009}
}

@inproceedings{li2017dailydialog,
 author = {Li, Yanran  and
Su, Hui  and
Shen, Xiaoyu  and
Li, Wenjie  and
Cao, Ziqiang  and
Niu, Shuzi},
 booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
 pages = {986--995},
 title = {{D}aily{D}ialog: A Manually Labelled Multi-turn Dialogue Dataset},
 year = {2017}
}

@inproceedings{kasai2020non,
 author = {Jungo Kasai and
James Cross and
Marjan Ghazvininejad and
Jiatao Gu},
 booktitle = {Proc. of ICML},
 pages = {5144--5155},
 series = {Proceedings of Machine Learning Research},
 title = {Non-autoregressive Machine Translation with Disentangled Context Transformer},
 volume = {119},
 year = {2020}
}

@inproceedings{guo2019non,
 author = {Junliang Guo and
Xu Tan and
Di He and
Tao Qin and
Linli Xu and
Tie{-}Yan Liu},
 booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
2019, The Thirty-First Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
USA, January 27 - February 1, 2019},
 pages = {3723--3730},
 title = {Non-Autoregressive Neural Machine Translation with Enhanced Decoder
Input},
 year = {2019}
}

@inproceedings{ma2019flowseq,
 author = {Ma, Xuezhe  and
Zhou, Chunting  and
Li, Xian  and
Neubig, Graham  and
Hovy, Eduard},
 booktitle = {Proc. of EMNLP},
 pages = {4282--4292},
 title = {{F}low{S}eq: Non-Autoregressive Conditional Sequence Generation with Generative Flow},
 year = {2019}
}

@inproceedings{graves2013speech,
 author = {Graves, Alex and Mohamed, Abdel-rahman and Hinton, Geoffrey},
 booktitle = {Proc. of ICASSP},
 organization = {IEEE},
 pages = {6645--6649},
 title = {Speech recognition with deep recurrent neural networks},
 year = {2013}
}

@inproceedings{gu2016incorporating,
 author = {Gu, Jiatao  and
Lu, Zhengdong  and
Li, Hang  and
Li, Victor O.K.},
 booktitle = {Proc. of ACL},
 pages = {1631--1640},
 title = {Incorporating Copying Mechanism in Sequence-to-Sequence Learning},
 year = {2016}
}

@inproceedings{gupta2018deep,
 author = {Ankush Gupta and
Arvind Agarwal and
Prawaan Singh and
Piyush Rai},
 booktitle = {Proc. of AAAI},
 editor = {Sheila A. McIlraith and
Kilian Q. Weinberger},
 pages = {5149--5156},
 title = {A Deep Generative Framework for Paraphrase Generation},
 year = {2018}
}

@article{hamming1950error,
 author = {Hamming, Richard W},
 journal = {The Bell system technical journal},
 number = {2},
 pages = {147--160},
 title = {Error detecting and error correcting codes},
 volume = {29},
 year = {1950}
}

@inproceedings{li2019hint,
 author = {Li, Zhuohan and He, Di and Tian, Fei and Qin, Tao and Wang, Liwei and Liu, Tie-Yan},
 booktitle = {Proc. of EMNLP},
 title = {Hint-based Training for Non-Autoregressive Translation},
 year = {2019}
}

@article{hinton2006fast,
 author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee Whye},
 journal = {Neural Computation},
 pages = {1527--1554},
 title = {A Fast Learning Algorithm for Deep Belief Nets},
 volume = {18},
 year = {2006}
}

@inproceedings{hinton2015distilling,
 author = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
 booktitle = {NIPS Deep Learning and Representation Learning Workshop},
 title = {Distilling the knowledge in a neural network},
 year = {2015}
}

@inproceedings{wei2019imitation,
 author = {Wei, Bingzhen  and
Wang, Mingxuan  and
Zhou, Hao  and
Lin, Junyang  and
Sun, Xu},
 booktitle = {Proc. of ACL},
 pages = {1304--1312},
 title = {Imitation Learning for Non-Autoregressive Neural Machine Translation},
 year = {2019}
}

@inproceedings{saharia2020non,
 author = {Saharia, Chitwan  and
Chan, William  and
Saxena, Saurabh  and
Norouzi, Mohammad},
 booktitle = {Proc. of EMNLP},
 pages = {1098--1108},
 title = {Non-Autoregressive Machine Translation with Latent Alignments},
 year = {2020}
}

@inproceedings{lee2018deterministic,
 author = {Lee, Jason  and
Mansimov, Elman  and
Cho, Kyunghyun},
 booktitle = {Proc. of EMNLP},
 pages = {1173--1182},
 title = {Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement},
 year = {2018}
}

@inproceedings{guo2020jointly,
 author = {Guo, Junliang  and
Xu, Linli  and
Chen, Enhong},
 booktitle = {Proc. of ACL},
 pages = {376--385},
 title = {Jointly Masked Sequence-to-Sequence Model for Non-Autoregressive Neural Machine Translation},
 year = {2020}
}

@inproceedings{kim2016sequence,
 author = {Kim, Yoon  and
Rush, Alexander M.},
 booktitle = {Proc. of EMNLP},
 pages = {1317--1327},
 title = {Sequence-Level Knowledge Distillation},
 year = {2016}
}

@article{li2020lava,
 author = {Li, Xiaoya and Meng, Yuxian and Yuan, Arianna and Wu, Fei and Li, Jiwei},
 journal = {ArXiv preprint},
 title = {LAVA NAT: A non-autoregressive translation model with look-around decoding and vocabulary attention},
 volume = {abs/2002.03084},
 year = {2020}
}

@inproceedings{hu2015lcsts,
 author = {Hu, Baotian  and
Chen, Qingcai  and
Zhu, Fangze},
 booktitle = {Proc. of EMNLP},
 pages = {1967--1972},
 title = {{LCSTS}: A Large Scale {C}hinese Short Text Summarization Dataset},
 year = {2015}
}

@inproceedings{levenshtein1966binary,
 author = {Levenshtein, Vladimir I},
 booktitle = {Soviet physics doklady},
 pages = {707--710},
 title = {Binary codes capable of correcting deletions, insertions, and reversals},
 year = {1966}
}

@inproceedings{gu2019levenshtein,
 author = {Jiatao Gu and
Changhan Wang and
Junbo Zhao},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {11179--11189},
 title = {Levenshtein Transformer},
 year = {2019}
}

@inproceedings{kaiser2018fast,
 author = {Lukasz Kaiser and
Samy Bengio and
Aurko Roy and
Ashish Vaswani and
Niki Parmar and
Jakob Uszkoreit and
Noam Shazeer},
 booktitle = {Proc. of ICML},
 editor = {Jennifer G. Dy and
Andreas Krause},
 pages = {2395--2404},
 series = {Proceedings of Machine Learning Research},
 title = {Fast Decoding in Sequence Models Using Discrete Latent Variables},
 volume = {80},
 year = {2018}
}

@InProceedings{shu2020latenta,
  author    = {Raphael Shu and Jason Lee and Hideki Nakayama and Kyunghyun Cho},
  booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020},
  title     = {Latent-Variable Non-Autoregressive Neural Machine Translation with Deterministic Inference Using a Delta Posterior},
  year      = {2020},
  pages     = {8846--8853},
}

@inproceedings{ghazvininejad2019mask,
 author = {Ghazvininejad, Marjan  and
Levy, Omer  and
Liu, Yinhan  and
Zettlemoyer, Luke},
 booktitle = {Proc. of EMNLP},
 pages = {6112--6121},
 title = {Mask-Predict: Parallel Decoding of Conditional Masked Language Models},
 year = {2019}
}

@inproceedings{mihaylova2019scheduled,
 author = {Mihaylova, Tsvetomila  and
Martins, Andr{\'e} F. T.},
 booktitle = {Proc. of ACL},
 pages = {351--356},
 title = {Scheduled Sampling for Transformers},
 year = {2019}
}

@inproceedings{koehn2007moses,
 author = {Koehn, Philipp  and
Hoang, Hieu  and
Birch, Alexandra  and
Callison-Burch, Chris  and
Federico, Marcello  and
Bertoldi, Nicola  and
Cowan, Brooke  and
Shen, Wade  and
Moran, Christine  and
Zens, Richard  and
Dyer, Chris  and
Bojar, Ond{\v{r}}ej  and
Constantin, Alexandra  and
Herbst, Evan},
 booktitle = {Proc. of ACL},
 pages = {177--180},
 title = {{M}oses: Open Source Toolkit for Statistical Machine Translation},
 year = {2007}
}

@inproceedings{gu2018non,
 author = {Jiatao Gu and
James Bradbury and
Caiming Xiong and
Victor O. K. Li and
Richard Socher},
 booktitle = {Proc. of ICLR},
 title = {Non-Autoregressive Neural Machine Translation},
 year = {2018}
}

@inproceedings{sun2019fast,
 author = {Zhiqing Sun and
Zhuohan Li and
Haoqing Wang and
Di He and
Zi Lin and
Zhi{-}Hong Deng},
 booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, December
8-14, 2019, Vancouver, BC, Canada},
 editor = {Hanna M. Wallach and
Hugo Larochelle and
Alina Beygelzimer and
Florence d'Alch{\'{e}}{-}Buc and
Emily B. Fox and
Roman Garnett},
 pages = {3011--3020},
 title = {Fast Structured Decoding for Sequence Models},
 year = {2019}
}

@inproceedings{ghazvininejad2020aligned,
 author = {Marjan Ghazvininejad and
Vladimir Karpukhin and
Luke Zettlemoyer and
Omer Levy},
 booktitle = {Proc. of ICML},
 pages = {3515--3523},
 series = {Proceedings of Machine Learning Research},
 title = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
 volume = {119},
 year = {2020}
}

@inproceedings{shao2020minimizing,
 author = {Chenze Shao and
Jinchao Zhang and
Yang Feng and
Fandong Meng and
Jie Zhou},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {198--205},
 title = {Minimizing the Bag-of-Ngrams Difference for Non-Autoregressive Neural
Machine Translation},
 year = {2020}
}

@inproceedings{libovicky2018end,
 author = {Libovick{\'y}, Jind{\v{r}}ich  and
Helcl, Jind{\v{r}}ich},
 booktitle = {Proc. of EMNLP},
 pages = {3016--3021},
 title = {End-to-End Non-Autoregressive Neural Machine Translation with Connectionist Temporal Classification},
 year = {2018}
}

@inproceedings{sun2020em,
 author = {Zhiqing Sun and
Yiming Yang},
 booktitle = {Proc. of ICML},
 pages = {9249--9258},
 series = {Proceedings of Machine Learning Research},
 title = {An {EM} Approach to Non-autoregressive Conditional Sequence Generation},
 volume = {119},
 year = {2020}
}

@inproceedings{guo2020fine,
 author = {Junliang Guo and
Xu Tan and
Linli Xu and
Tao Qin and
Enhong Chen and
Tie{-}Yan Liu},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {7839--7846},
 title = {Fine-Tuning by Curriculum Learning for Non-Autoregressive Neural Machine
Translation},
 year = {2020}
}

@inproceedings{wang2019non,
 author = {Yiren Wang and
Fei Tian and
Di He and
Tao Qin and
ChengXiang Zhai and
Tie{-}Yan Liu},
 booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
2019, The Thirty-First Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
USA, January 27 - February 1, 2019},
 pages = {5377--5384},
 title = {Non-Autoregressive Machine Translation with Auxiliary Regularization},
 year = {2019}
}

@inproceedings{oord2018parallel,
 author = {A{\"{a}}ron van den Oord and
Yazhe Li and
Igor Babuschkin and
Karen Simonyan and
Oriol Vinyals and
Koray Kavukcuoglu and
George van den Driessche and
Edward Lockhart and
Luis C. Cobo and
Florian Stimberg and
Norman Casagrande and
Dominik Grewe and
Seb Noury and
Sander Dieleman and
Erich Elsen and
Nal Kalchbrenner and
Heiga Zen and
Alex Graves and
Helen King and
Tom Walters and
Dan Belov and
Demis Hassabis},
 booktitle = {Proc. of ICML},
 editor = {Jennifer G. Dy and
Andreas Krause},
 pages = {3915--3923},
 series = {Proceedings of Machine Learning Research},
 title = {Parallel WaveNet: Fast High-Fidelity Speech Synthesis},
 volume = {80},
 year = {2018}
}

@article{bao2019non,
 author = {Bao, Yu and Zhou, Hao and Feng, Jiangtao and Wang, Mingxuan and Huang, Shujian and Chen, Jiajun and Li, Lei},
 journal = {ArXiv preprint},
 title = {Non-autoregressive Transformer by Position Learning},
 volume = {abs/1911.10677},
 year = {2019}
}

@inproceedings{vinyals2015pointer,
 author = {Oriol Vinyals and
Meire Fortunato and
Navdeep Jaitly},
 booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference
on Neural Information Processing Systems 2015, December 7-12, 2015,
Montreal, Quebec, Canada},
 editor = {Corinna Cortes and
Neil D. Lawrence and
Daniel D. Lee and
Masashi Sugiyama and
Roman Garnett},
 pages = {2692--2700},
 title = {Pointer Networks},
 year = {2015}
}

@article{rasooli2015yara,
 author = {Mohammad Sadegh Rasooli and Joel R. Tetreault},
 journal = {ArXiv preprint},
 title = {Yara Parser: {A} Fast and Accurate Dependency Parser},
 volume = {abs/1503.06733},
 year = {2015}
}

@article{ran2019guiding,
 author = {Ran, Qiu and Lin, Yankai and Li, Peng and Zhou, Jie},
 journal = {ArXiv preprint},
 title = {Guiding Non-Autoregressive Neural Machine Translation Decoding with Reordering Information},
 volume = {abs/1911.02215},
 year = {2019}
}

@inproceedings{shao2019retrieving,
 author = {Shao, Chenze  and
Feng, Yang  and
Zhang, Jinchao  and
Meng, Fandong  and
Chen, Xilin  and
Zhou, Jie},
 booktitle = {Proc. of ACL},
 pages = {3013--3024},
 title = {Retrieving Sequential Information for Non-Autoregressive Neural Machine Translation},
 year = {2019}
}

@inproceedings{shaw2018self,
 author = {Shaw, Peter  and
Uszkoreit, Jakob  and
Vaswani, Ashish},
 booktitle = {Proc. of NAACL-HLT},
 pages = {464--468},
 title = {Self-Attention with Relative Position Representations},
 year = {2018}
}

@inproceedings{shen2017conditional,
 author = {Shen, Xiaoyu  and
Su, Hui  and
Li, Yanran  and
Li, Wenjie  and
Niu, Shuzi  and
Zhao, Yang  and
Aizawa, Akiko  and
Long, Guoping},
 booktitle = {Proc. of ACL},
 pages = {504--509},
 title = {A Conditional Variational Framework for Dialog Generation},
 year = {2017}
}

@inproceedings{shen2018straight,
 author = {Shen, Yikang  and
Lin, Zhouhan  and
Jacob, Athul Paul  and
Sordoni, Alessandro  and
Courville, Aaron  and
Bengio, Yoshua},
 booktitle = {Proc. of ACL},
 pages = {1171--1180},
 title = {Straight to the Tree: Constituency Parsing with Neural Syntactic Distance},
 year = {2018}
}

@inproceedings{shu2020latent,
 author = {Raphael Shu and
Jason Lee and
Hideki Nakayama and
Kyunghyun Cho},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {8846--8853},
 title = {Latent-Variable Non-Autoregressive Neural Machine Translation with
Deterministic Inference Using a Delta Posterior},
 year = {2020}
}

@article{ghazvininejad2020semi,
 author = {Ghazvininejad, Marjan and Levy, Omer and Zettlemoyer, Luke},
 journal = {ArXiv preprint},
 title = {Semi-autoregressive training improves mask-predict decoding},
 volume = {abs/2001.08785},
 year = {2020}
}

@inproceedings{sundermeyer2012lstm,
 author = {Sundermeyer, Martin and Schl{\"u}ter, Ralf and Ney, Hermann},
 booktitle = {Proc. of INTERSPEECH},
 title = {LSTM neural networks for language modeling},
 year = {2012}
}

@article{roy2018towards,
 author = {Roy, Aurko and Vaswani, Ashish and Parmar, Niki and Neelakantan, Arvind},
 journal = {arXiv},
 title = {Towards a better understanding of Vector Quantized Autoencoders},
 year = {2018}
}

@article{wainwright2008graphical,
 author = {Wainwright, Martin J and Jordan, Michael I and others},
 journal = {Foundations and Trends{\textregistered} in Machine Learning},
 number = {1--2},
 pages = {1--305},
 title = {Graphical models, exponential families, and variational inference},
 volume = {1},
 year = {2008}
}

@inproceedings{ye2021end,
 author = {Rong Ye and
Mingxuan Wang and
Lei Li},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Hynek Hermansky and
Honza Cernock{\'{y}} and
Luk{\'{a}}s Burget and
Lori Lamel and
Odette Scharenborg and
Petr Motl{\'{\i}}cek},
 pages = {2267--2271},
 title = {End-to-End Speech Translation via Cross-Modal Progressive Training},
 year = {2021}
}

@inproceedings{cao2018faithful,
 author = {Ziqiang Cao and
Furu Wei and
Wenjie Li and
Sujian Li},
 booktitle = {Proc. of AAAI},
 editor = {Sheila A. McIlraith and
Kilian Q. Weinberger},
 pages = {4784--4791},
 title = {Faithful to the Original: Fact Aware Neural Abstractive Summarization},
 year = {2018}
}

@article{chandra1981alternation,
 author = {Ashok K. Chandra and Dexter C. Kozen and Larry J. Stockmeyer},
 journal = {JACM},
 number = {1},
 pages = {114--133},
 title = {Alternation},
 volume = {28},
 year = {1981}
}

@inproceedings{chen2016thorough,
 author = {Chen, Danqi  and
Bolton, Jason  and
Manning, Christopher D.},
 booktitle = {Proc. of ACL},
 pages = {2358--2367},
 title = {A Thorough Examination of the {CNN}/{D}aily {M}ail Reading Comprehension Task},
 year = {2016}
}

@inproceedings{cho2014learning,
 author = {Cho, Kyunghyun  and
van Merri{\"e}nboer, Bart  and
Gulcehre, Caglar  and
Bahdanau, Dzmitry  and
Bougares, Fethi  and
Schwenk, Holger  and
Bengio, Yoshua},
 booktitle = {Proc. of EMNLP},
 pages = {1724--1734},
 title = {Learning Phrase Representations using {RNN} Encoder{--}Decoder for Statistical Machine Translation},
 year = {2014}
}

@inproceedings{durmus2020feqa,
 author = {Durmus, Esin  and
He, He  and
Diab, Mona},
 booktitle = {Proc. of ACL},
 pages = {5055--5070},
 title = {{FEQA}: A Question Answering Evaluation Framework for Faithfulness Assessment in Abstractive Summarization},
 year = {2020}
}

@inproceedings{durrett2016learning,
 author = {Durrett, Greg  and
Berg-Kirkpatrick, Taylor  and
Klein, Dan},
 booktitle = {Proc. of ACL},
 pages = {1998--2008},
 title = {Learning-Based Single-Document Summarization with Compression and Anaphoricity Constraints},
 year = {2016}
}

@inproceedings{filippova2020controlled,
 author = {Filippova, Katja},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020},
 pages = {864--870},
 title = {Controlled Hallucinations: Learning to Generate Faithfully from Noisy Data},
 year = {2020}
}

@inproceedings{gao2019abstractive,
 author = {Shen Gao and
Xiuying Chen and
Piji Li and
Zhaochun Ren and
Lidong Bing and
Dongyan Zhao and
Rui Yan},
 booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
2019, The Thirty-First Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
USA, January 27 - February 1, 2019},
 pages = {6399--6406},
 title = {Abstractive Text Summarization by Incorporating Reader Comments},
 year = {2019}
}

@inproceedings{gao2019how,
 author = {Gao, Shen  and
Chen, Xiuying  and
Li, Piji  and
Chan, Zhangming  and
Zhao, Dongyan  and
Yan, Rui},
 booktitle = {Proc. of EMNLP},
 pages = {3741--3751},
 title = {How to Write Summaries with Patterns? Learning towards Abstractive Summarization through Prototype Editing},
 year = {2019}
}

@inproceedings{grusky2018newsroom,
 author = {Grusky, Max  and
Naaman, Mor  and
Artzi, Yoav},
 booktitle = {Proc. of NAACL-HLT},
 pages = {708--719},
 title = {{N}ewsroom: A Dataset of 1.3 Million Summaries with Diverse Extractive Strategies},
 year = {2018}
}

@book{gusfield1997algorithms,
 author = {Dan Gusfield},
 title = {Algorithms on Strings, Trees and Sequences},
 year = {1997}
}

@inproceedings{hermann2015teaching,
 author = {Karl Moritz Hermann and
Tom{\'{a}}s Kocisk{\'{y}} and
Edward Grefenstette and
Lasse Espeholt and
Will Kay and
Mustafa Suleyman and
Phil Blunsom},
 booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference
on Neural Information Processing Systems 2015, December 7-12, 2015,
Montreal, Quebec, Canada},
 editor = {Corinna Cortes and
Neil D. Lawrence and
Daniel D. Lee and
Masashi Sugiyama and
Roman Garnett},
 pages = {1693--1701},
 title = {Teaching Machines to Read and Comprehend},
 year = {2015}
}

@inproceedings{hua2017overview,
 author = {Hua, Lifeng and Wan, Xiaojun and Li, Lei},
 booktitle = {Proc. of NLPCC},
 organization = {Springer},
 pages = {942--947},
 title = {Overview of the NLPCC 2017 shared task: single document summarization},
 year = {2017}
}

@inproceedings{huang2020generating,
 author = {Huang, Kuan-Hao  and
Li, Chen  and
Chang, Kai-Wei},
 booktitle = {Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing},
 pages = {609--615},
 title = {Generating Sports News from Live Commentary: A {C}hinese Dataset for Sports Game Summarization},
 year = {2020}
}

@inproceedings{kryscinski2019neural,
 author = {Kryscinski, Wojciech  and
Keskar, Nitish Shirish  and
McCann, Bryan  and
Xiong, Caiming  and
Socher, Richard},
 booktitle = {Proc. of EMNLP},
 pages = {540--551},
 title = {Neural Text Summarization: A Critical Evaluation},
 year = {2019}
}

@inproceedings{kryscinski2020evaluating,
 author = {Kryscinski, Wojciech  and
McCann, Bryan  and
Xiong, Caiming  and
Socher, Richard},
 booktitle = {Proc. of EMNLP},
 pages = {9332--9346},
 title = {Evaluating the Factual Consistency of Abstractive Text Summarization},
 year = {2020}
}

@article{lin2004rouge,
 author = {Lin, Chin-Yew},
 journal = {Text Summarization Branches Out},
 title = {Rouge: A package for automatic evaluation of summarie1},
 year = {2004}
}

@article{liu2019robertaa,
 author = {Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis, Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
 title = {RoBERTa: A Robustly Optimized BERT Pretraining Approach},
 year = {2019}
}

@inproceedings{liu2019text,
 author = {Liu, Yang  and
Lapata, Mirella},
 booktitle = {Proc. of EMNLP},
 pages = {3730--3740},
 title = {Text Summarization with Pretrained Encoders},
 year = {2019}
}

@inproceedings{liu2020clts,
 author = {Xiaojun Liu and Chuang Zhang and X. Chen and Yanan Cao and Jinpeng Li},
 booktitle = {Proc. of NLPCC},
 title = {CLTS: A New Chinese Long Text Summarization Dataset},
 year = {2020}
}

@inproceedings{maynez2020faithfulness,
 author = {Maynez, Joshua  and
Narayan, Shashi  and
Bohnet, Bernd  and
McDonald, Ryan},
 booktitle = {Proc. of ACL},
 pages = {1906--1919},
 title = {On Faithfulness and Factuality in Abstractive Summarization},
 year = {2020}
}

@inproceedings{mihalcea2004textrank,
 author = {Mihalcea, Rada  and
Tarau, Paul},
 booktitle = {Proc. of EMNLP},
 pages = {404--411},
 title = {{T}ext{R}ank: Bringing Order into Text},
 year = {2004}
}

@inproceedings{nallapati2017summarunner,
 author = {Ramesh Nallapati and
Feifei Zhai and
Bowen Zhou},
 booktitle = {Proc. of AAAI},
 editor = {Satinder P. Singh and
Shaul Markovitch},
 pages = {3075--3081},
 title = {SummaRuNNer: {A} Recurrent Neural Network Based Sequence Model for
Extractive Summarization of Documents},
 year = {2017}
}

@inproceedings{narayan2018dont,
 author = {Narayan, Shashi  and
Cohen, Shay B.  and
Lapata, Mirella},
 booktitle = {Proc. of EMNLP},
 pages = {1797--1807},
 title = {Don{'}t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization},
 year = {2018}
}

@article{narayan2019what,
 author = {Narayan, Shashi and Cohen, Shay B and Lapata, Mirella},
 journal = {JAIR},
 pages = {243--278},
 title = {What is this Article about? Extreme Summarization with Topic-aware Convolutional Neural Networks},
 volume = {66},
 year = {2019}
}

@inproceedings{paulus2018deep,
 author = {Romain Paulus and
Caiming Xiong and
Richard Socher},
 booktitle = {Proc. of ICLR},
 title = {A Deep Reinforced Model for Abstractive Summarization},
 year = {2018}
}

@article{sandhaus2008new,
 author = {Sandhaus, Evan},
 journal = {Linguistic Data Consortium, Philadelphia},
 number = {12},
 pages = {e26752},
 title = {The new york times annotated corpus},
 volume = {6},
 year = {2008}
}

@inproceedings{see2017get,
 author = {See, Abigail  and
Liu, Peter J.  and
Manning, Christopher D.},
 booktitle = {Proc. of ACL},
 pages = {1073--1083},
 title = {Get To The Point: Summarization with Pointer-Generator Networks},
 year = {2017}
}

@article{xi2020global,
 articleno = {84},
 author = {Xi, Xuefeng and Pi, Zhou and Zhou, Guodong},
 issn = {2375-4699},
 issue_date = {November 2020},
 journal = {ACM Trans. Asian Low-Resour. Lang. Inf. Process.},
 keywords = {corpus building, Text summarization, long Chinese text},
 number = {6},
 numpages = {17},
 title = {Global Encoding for Long Chinese Text Summarization},
 volume = {19},
 year = {2020}
}

@inproceedings{xu2020clue,
 author = {Xu, Liang  and
Hu, Hai  and
Zhang, Xuanwei  and
Li, Lu  and
Cao, Chenjie  and
Li, Yudong  and
Xu, Yechen  and
Sun, Kai  and
Yu, Dian  and
Yu, Cong  and
Tian, Yin  and
Dong, Qianqian  and
Liu, Weitang  and
Shi, Bo  and
Cui, Yiming  and
Li, Junyi  and
Zeng, Jun  and
Wang, Rongzhao  and
Xie, Weijian  and
Li, Yanting  and
Patterson, Yina  and
Tian, Zuoyu  and
Zhang, Yiwen  and
Zhou, He  and
Liu, Shaoweihua  and
Zhao, Zhe  and
Zhao, Qipeng  and
Yue, Cong  and
Zhang, Xinrui  and
Yang, Zhengliang  and
Richardson, Kyle  and
Lan, Zhenzhong},
 booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
 pages = {4762--4772},
 title = {{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark},
 year = {2020}
}

@inproceedings{zhang2018abstractiveness,
 author = {Zhang, Fangfang  and
Yao, Jin-ge  and
Yan, Rui},
 booktitle = {Proc. of EMNLP},
 pages = {785--790},
 title = {On the Abstractiveness of Neural Document Summarization},
 year = {2018}
}

@inproceedings{zhong2020extractive,
 author = {Zhong, Ming  and
Liu, Pengfei  and
Chen, Yiran  and
Wang, Danqing  and
Qiu, Xipeng  and
Huang, Xuanjing},
 booktitle = {Proc. of ACL},
 pages = {6197--6208},
 title = {Extractive Summarization as Text Matching},
 year = {2020}
}

@inproceedings{zhou2018neural,
 author = {Zhou, Qingyu  and
Yang, Nan  and
Wei, Furu  and
Huang, Shaohan  and
Zhou, Ming  and
Zhao, Tiejun},
 booktitle = {Proc. of ACL},
 pages = {654--663},
 title = {Neural Document Summarization by Jointly Learning to Score and Select Sentences},
 year = {2018}
}

@inproceedings{firat2016multi,
 author = {Firat, Orhan  and
Cho, Kyunghyun  and
Bengio, Yoshua},
 booktitle = {Proc. of NAACL-HLT},
 pages = {866--875},
 title = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention Mechanism},
 year = {2016}
}

@inproceedings{firat2016zero,
 author = {Firat, Orhan  and
Sankaran, Baskaran  and
Al-onaizan, Yaser  and
Yarman Vural, Fatos T.  and
Cho, Kyunghyun},
 booktitle = {Proc. of EMNLP},
 pages = {268--277},
 title = {Zero-Resource Translation with Multi-Lingual Neural Machine Translation},
 year = {2016}
}

@inproceedings{blackwood2018multilingual,
 author = {Blackwood, Graeme  and
Ballesteros, Miguel  and
Ward, Todd},
 booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
 pages = {3112--3122},
 title = {Multilingual Neural Machine Translation with Task-Specific Attention},
 year = {2018}
}

@inproceedings{platanios2018contextual,
 author = {Platanios, Emmanouil Antonios  and
Sachan, Mrinmaya  and
Neubig, Graham  and
Mitchell, Tom},
 booktitle = {Proc. of EMNLP},
 pages = {425--435},
 title = {Contextual Parameter Generation for Universal Neural Machine Translation},
 year = {2018}
}

@inproceedings{wang2019multilingual,
 author = {Xinyi Wang and
Hieu Pham and
Philip Arthur and
Graham Neubig},
 booktitle = {Proc. of ICLR},
 title = {Multilingual Neural Machine Translation With Soft Decoupled Encoding},
 year = {2019}
}

@inproceedings{tan2019multilinguala,
 author = {Tan, Xu  and
Chen, Jiale  and
He, Di  and
Xia, Yingce  and
Qin, Tao  and
Liu, Tie-Yan},
 booktitle = {Proc. of EMNLP},
 pages = {963--973},
 title = {Multilingual Neural Machine Translation with Language Clustering},
 year = {2019}
}

@inproceedings{wang2020multi,
 author = {Wang, Yiren  and
Zhai, ChengXiang  and
Hassan, Hany},
 booktitle = {Proc. of EMNLP},
 pages = {1022--1034},
 title = {Multi-task Learning for Multilingual Neural Machine Translation},
 year = {2020}
}

@inproceedings{vincent2008extracting,
 author = {Pascal Vincent and
Hugo Larochelle and
Yoshua Bengio and
Pierre{-}Antoine Manzagol},
 booktitle = {Proc. of ICML},
 editor = {William W. Cohen and
Andrew McCallum and
Sam T. Roweis},
 pages = {1096--1103},
 series = {{ACM} International Conference Proceeding Series},
 title = {Extracting and composing robust features with denoising autoencoders},
 volume = {307},
 year = {2008}
}

@article{qiu2020pre,
 author = {Qiu, Xipeng and Sun, Tianxiang and Xu, Yige and Shao, Yunfan and Dai, Ning and Huang, Xuanjing},
 journal = {Science China Technological Sciences},
 title = {Pre-trained models for natural language processing: A survey},
 year = {2020}
}

@article{radford2018improvinga,
 author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
 title = {Improving language understanding with unsupervised learning},
 year = {2018}
}

@article{radford2019languagea,
 author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
 title = {Language Models are Unsupervised Multitask Learners},
 year = {2019}
}

@inproceedings{brown2020language,
 author = {Tom B. Brown and
Benjamin Mann and
Nick Ryder and
Melanie Subbiah and
Jared Kaplan and
Prafulla Dhariwal and
Arvind Neelakantan and
Pranav Shyam and
Girish Sastry and
Amanda Askell and
Sandhini Agarwal and
Ariel Herbert{-}Voss and
Gretchen Krueger and
Tom Henighan and
Rewon Child and
Aditya Ramesh and
Daniel M. Ziegler and
Jeffrey Wu and
Clemens Winter and
Christopher Hesse and
Mark Chen and
Eric Sigler and
Mateusz Litwin and
Scott Gray and
Benjamin Chess and
Jack Clark and
Christopher Berner and
Sam McCandlish and
Alec Radford and
Ilya Sutskever and
Dario Amodei},
 booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
 editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
 title = {Language Models are Few-Shot Learners},
 year = {2020}
}

@inproceedings{conneau2020unsupervised,
 author = {Conneau, Alexis  and
Khandelwal, Kartikay  and
Goyal, Naman  and
Chaudhary, Vishrav  and
Wenzek, Guillaume  and
Guzm{\'a}n, Francisco  and
Grave, Edouard  and
Ott, Myle  and
Zettlemoyer, Luke  and
Stoyanov, Veselin},
 booktitle = {Proc. of ACL},
 pages = {8440--8451},
 title = {Unsupervised Cross-lingual Representation Learning at Scale},
 year = {2020}
}

@article{raffel2020exploring,
 author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
 journal = {JMLR},
 title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
 year = {2020}
}

@inproceedings{xue2021mt5,
 author = {Xue, Linting  and
Constant, Noah  and
Roberts, Adam  and
Kale, Mihir  and
Al-Rfou, Rami  and
Siddhant, Aditya  and
Barua, Aditya  and
Raffel, Colin},
 booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
 pages = {483--498},
 title = {m{T}5: A Massively Multilingual Pre-trained Text-to-Text Transformer},
 year = {2021}
}

@article{caruana1997multitask,
 author = {Caruana, Rich},
 journal = {Machine learning},
 number = {1},
 pages = {41--75},
 title = {Multitask learning},
 volume = {28},
 year = {1997}
}

@article{goodfellow2013empirical,
 author = {Goodfellow, Ian J and Mirza, Mehdi and Xiao, Da and Courville, Aaron and Bengio, Yoshua},
 journal = {ArXiv preprint},
 title = {An empirical investigation of catastrophic forgetting in gradient-based neural networks},
 volume = {abs/1312.6211},
 year = {2013}
}

@inproceedings{yang2020cspcode,
 author = {Yang, Zhen  and
Hu, Bojie  and
Han, Ambyera  and
Huang, Shen  and
Ju, Qi},
 booktitle = {Proc. of EMNLP},
 pages = {2624--2636},
 title = {{CSP}:Code-Switching Pre-training for Neural Machine Translation},
 year = {2020}
}

@inproceedings{pan2021contrastivea,
 author = {Pan, Xiao  and
Wang, Mingxuan  and
Wu, Liwei  and
Li, Lei},
 booktitle = {Proc. of ACL},
 pages = {244--258},
 title = {Contrastive Learning for Many-to-many Multilingual Neural Machine Translation},
 year = {2021}
}

@inproceedings{guo2020incorporating,
 author = {Junliang Guo and
Zhirui Zhang and
Linli Xu and
Hao{-}Ran Wei and
Boxing Chen and
Enhong Chen},
 booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
 editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
 title = {Incorporating {BERT} into Parallel Sequence Decoding with Adapters},
 year = {2020}
}

@article{rothe2020leveraging,
 author = {Rothe, Sascha  and
Narayan, Shashi  and
Severyn, Aliaksei},
 journal = {Transactions of the Association for Computational Linguistics},
 pages = {264--280},
 title = {Leveraging Pre-trained Checkpoints for Sequence Generation Tasks},
 volume = {8},
 year = {2020}
}

@inproceedings{weng2020acquiring,
 author = {Rongxiang Weng and
Heng Yu and
Shujian Huang and
Shanbo Cheng and
Weihua Luo},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {9266--9273},
 title = {Acquiring Knowledge from Pre-Trained Model to Neural Machine Translation},
 year = {2020}
}

@inproceedings{luo2021veco,
 author = {Luo, Fuli  and
Wang, Wei  and
Liu, Jiahao  and
Liu, Yijia  and
Bi, Bin  and
Huang, Songfang  and
Huang, Fei  and
Si, Luo},
 booktitle = {Proc. of ACL},
 pages = {3980--3994},
 title = {{VECO}: Variable and Flexible Cross-lingual Pre-training for Language Understanding and Generation},
 year = {2021}
}

@article{ma2020xlm,
 author = {Ma, Shuming and Yang, Jian and Huang, Haoyang and Chi, Zewen and Dong, Li and Zhang, Dongdong and Awadalla, Hany Hassan and Muzio, Alexandre and Eriguchi, Akiko and Singhal, Saksham and others},
 journal = {ArXiv preprint},
 title = {XLM-T: Scaling up Multilingual Machine Translation with Pretrained Cross-lingual Transformer Encoders},
 volume = {abs/2012.15547},
 year = {2020}
}

@inproceedings{yang2020alternating,
 author = {Jian Yang and
Shuming Ma and
Dongdong Zhang and
Shuangzhi Wu and
Zhoujun Li and
Ming Zhou},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {9386--9393},
 title = {Alternating Language Modeling for Cross-Lingual Pre-Training},
 year = {2020}
}

@inproceedings{chen2021zero,
 author = {Chen, Guanhua  and
Ma, Shuming  and
Chen, Yun  and
Dong, Li  and
Zhang, Dongdong  and
Pan, Jia  and
Wang, Wenping  and
Wei, Furu},
 booktitle = {Proc. of EMNLP},
 pages = {15--26},
 title = {Zero-Shot Cross-Lingual Transfer of Neural Machine Translation with Multilingual Pretrained Encoders},
 year = {2021}
}

@inproceedings{sutskever2014sequence,
 author = {Ilya Sutskever and
Oriol Vinyals and
Quoc V. Le},
 booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference
on Neural Information Processing Systems 2014, December 8-13 2014,
Montreal, Quebec, Canada},
 editor = {Zoubin Ghahramani and
Max Welling and
Corinna Cortes and
Neil D. Lawrence and
Kilian Q. Weinberger},
 pages = {3104--3112},
 title = {Sequence to Sequence Learning with Neural Networks},
 year = {2014}
}

@inproceedings{sennrich2016improving,
 author = {Sennrich, Rico  and
Haddow, Barry  and
Birch, Alexandra},
 booktitle = {Proc. of ACL},
 pages = {86--96},
 title = {Improving Neural Machine Translation Models with Monolingual Data},
 year = {2016}
}

@inproceedings{edunov2018understanding,
 author = {Edunov, Sergey  and
Ott, Myle  and
Auli, Michael  and
Grangier, David},
 booktitle = {Proc. of EMNLP},
 pages = {489--500},
 title = {Understanding Back-Translation at Scale},
 year = {2018}
}

@inproceedings{hoang2018iterative,
 author = {Hoang, Vu Cong Duy  and
Koehn, Philipp  and
Haffari, Gholamreza  and
Cohn, Trevor},
 booktitle = {Proceedings of the 2nd Workshop on Neural Machine Translation and Generation},
 pages = {18--24},
 title = {Iterative Back-Translation for Neural Machine Translation},
 year = {2018}
}

@inproceedings{xiong2020layer,
 author = {Ruibin Xiong and
Yunchang Yang and
Di He and
Kai Zheng and
Shuxin Zheng and
Chen Xing and
Huishuai Zhang and
Yanyan Lan and
Liwei Wang and
Tie{-}Yan Liu},
 booktitle = {Proc. of ICML},
 pages = {10524--10533},
 series = {Proceedings of Machine Learning Research},
 title = {On Layer Normalization in the Transformer Architecture},
 volume = {119},
 year = {2020}
}

@article{hendrycks2016gaussian,
 author = {Hendrycks, Dan and Gimpel, Kevin},
 journal = {ArXiv preprint},
 title = {Gaussian error linear units ({GELU}s)},
 volume = {abs/1606.08415},
 year = {2016}
}

@article{sergeev2018horovod,
 author = {Sergeev, Alexander and Del Balso, Mike},
 journal = {ArXiv preprint},
 title = {Horovod: fast and easy distributed deep learning in TensorFlow},
 volume = {abs/1802.05799},
 year = {2018}
}

@inproceedings{micikevicius2018mixed,
 author = {Paulius Micikevicius and
Sharan Narang and
Jonah Alben and
Gregory F. Diamos and
Erich Elsen and
David Garc{\'{\i}}a and
Boris Ginsburg and
Michael Houston and
Oleksii Kuchaiev and
Ganesh Venkatesh and
Hao Wu},
 booktitle = {Proc. of ICLR},
 title = {Mixed Precision Training},
 year = {2018}
}

@inproceedings{szegedy2016rethinking,
 author = {Christian Szegedy and
Vincent Vanhoucke and
Sergey Ioffe and
Jonathon Shlens and
Zbigniew Wojna},
 booktitle = {2016 {IEEE} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2016, Las Vegas, NV, USA, June 27-30, 2016},
 pages = {2818--2826},
 title = {Rethinking the Inception Architecture for Computer Vision},
 year = {2016}
}

@article{srivastava2014dropout,
 author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
 journal = {JMLR},
 number = {1},
 pages = {1929--1958},
 title = {Dropout: a simple way to prevent neural networks from overfitting},
 volume = {15},
 year = {2014}
}

@inproceedings{he2016deep,
 author = {Kaiming He and
Xiangyu Zhang and
Shaoqing Ren and
Jian Sun},
 booktitle = {2016 {IEEE} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2016, Las Vegas, NV, USA, June 27-30, 2016},
 pages = {770--778},
 title = {Deep Residual Learning for Image Recognition},
 year = {2016}
}

@Article{berard2016listenb,
  author  = {B{\'e}rard, Alexandre and Pietquin, Olivier and Servan, Christophe and Besacier, Laurent},
  journal = {ArXiv preprint},
  title   = {Listen and translate: A proof of concept for end-to-end speech-to-text translation},
  year    = {2016},
  volume  = {abs/1612.01744},
}

@Article{sak2015fasta,
  author  = {Sak, Ha{\c{s}}im and Senior, Andrew and Rao, Kanishka and Beaufays, Fran{\c{c}}oise},
  journal = {ArXiv preprint},
  title   = {Fast and accurate recurrent neural network acoustic models for speech recognition},
  year    = {2015},
  volume  = {abs/1507.06947},
}

@Article{inaguma2019multilinguala,
  author  = {Inaguma, Hirofumi and Duh, Kevin and Kawahara, Tatsuya and Watanabe, Shinji},
  journal = {ArXiv preprint},
  title   = {Multilingual end-to-end speech translation},
  year    = {2019},
  volume  = {abs/1910.00254},
}

@inproceedings{ma2020simulmt,
 author = {Ma, Xutai  and
Pino, Juan  and
Koehn, Philipp},
 booktitle = {Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing},
 pages = {582--587},
 title = {{S}imul{MT} to {S}imul{ST}: Adapting Simultaneous Text Translation to End-to-End Simultaneous Speech Translation},
 year = {2020}
}

@inproceedings{dong2020cif,
 author = {Linhao Dong and
Bo Xu},
 booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
 pages = {6079--6083},
 title = {{CIF:} Continuous Integrate-And-Fire for End-To-End Speech Recognition},
 year = {2020}
}

@article{lapique1907recherches,
 author = {Lapique, Louis},
 journal = {Journal of Physiology and Pathololgy},
 pages = {620--635},
 title = {Recherches quantitatives sur l'excitation electrique des nerfs traitee comme une polarization.},
 volume = {9},
 year = {1907}
}

@Article{abbott1999lapicque’s,
  author  = {Abbott, Larry F},
  journal = {Brain research bulletin},
  title   = {Lapicque’s introduction of the integrate-and-fire model neuron (1907)},
  year    = {1999},
  number  = {5-6},
  pages   = {303--304},
  volume  = {50},
}

@inproceedings{ren2020simulspeech,
 author = {Ren, Yi  and
Liu, Jinglin  and
Tan, Xu  and
Zhang, Chen  and
Qin, Tao  and
Zhao, Zhou  and
Liu, Tie-Yan},
 booktitle = {Proc. of ACL},
 pages = {3787--3796},
 title = {{S}imul{S}peech: End-to-End Simultaneous Speech to Text Translation},
 year = {2020}
}

@inproceedings{ma2020simuleval,
 author = {Ma, Xutai  and
Dousti, Mohammad Javad  and
Wang, Changhan  and
Gu, Jiatao  and
Pino, Juan},
 booktitle = {Proc. of EMNLP},
 pages = {144--150},
 title = {{SIMULEVAL}: An Evaluation Toolkit for Simultaneous Translation},
 year = {2020}
}

@article{cho2016can,
 author = {Cho, Kyunghyun and Esipova, Masha},
 journal = {ArXiv preprint},
 title = {Can neural machine translation do simultaneous translation?},
 volume = {abs/1606.02012},
 year = {2016}
}

@article{ma2019stacl,
 author = {Ma, Mingbo and Huang, Liang and Xiong, Hao and Liu, Kaibo and Zhang, Chuanqiang and He, Zhongjun and Liu, Hairong and Li, Xing and Wang, Haifeng},
 booktitle = {Proc. of ACL},
 pages = {3025--3036},
 title = {Stacl: Simultaneous translation with integrated anticipation and controllable latency},
 year = {2019}
}

@article{cherry2019thinking,
 author = {Cherry, Colin and Foster, George},
 journal = {ArXiv preprint},
 title = {Thinking slow about latency evaluation for simultaneous machine translation},
 volume = {abs/1906.00048},
 year = {2019}
}

@article{san2021leveraging,
 author = {San, Nay and Bartelds, Martijn and Browne, Mitchell and Clifford, Lily and Gibson, Fiona and Mansfield, John and Nash, David and Simpson, Jane and Turpin, Myfany and Vollmer, Maria and others},
 journal = {ArXiv preprint},
 title = {Leveraging neural representations for facilitating access to untranscribed speech from endangered languages},
 volume = {abs/2103.14583},
 year = {2021}
}

@article{chen2020mam,
 author = {Chen, Junkun and Ma, Mingbo and Zheng, Renjie and Huang, Liang},
 journal = {ArXiv preprint},
 title = {MAM: Masked Acoustic Modeling for End-to-End Speech-to-Text Translation},
 volume = {abs/2010.11445},
 year = {2020}
}

@Article{ma2021streaminga,
  author       = {Ma, Xutai and Wang, Yongqiang and Dousti, Mohammad Javad and Koehn, Philipp and Pino, Juan},
  title        = {Streaming Simultaneous Speech Translation with Augmented Memory Transformer},
  year         = {2021},
  pages        = {7523--7527},
  booktitle    = {ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  organization = {IEEE},
}

@inproceedings{indurthi2020end,
 author = {Sathish Reddy Indurthi and
Houjeung Han and
Nikhil Kumar Lakumarapu and
Beomseok Lee and
Insoo Chung and
Sangha Kim and
Chanwoo Kim},
 booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
 pages = {7904--7908},
 title = {End-end Speech-to-Text Translation with Modality Agnostic Meta-Learning},
 year = {2020}
}

@InProceedings{digangi2019data,
  author    = {Di Gangi, Mattia A. and Negri, Matteo and Nguyen, Viet Nhat and Tebbifakhr, Amirhossein and Turchi, Marco},
  booktitle = {Proceedings of the 16th International Conference on Spoken Language Translation},
  title     = {Data Augmentation for End-to-End Speech Translation: {FBK}@{IWSLT} {`}19},
  year      = {2019},
}

@inproceedings{mccarthy2020skinaugment,
 author = {Arya D. McCarthy and
Liezl Puzon and
Juan Pino},
 booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
 pages = {7924--7928},
 title = {SkinAugment: Auto-Encoding Speaker Conversions for Automatic Speech
Translation},
 year = {2020}
}

@inproceedings{liu2020synchronous,
 author = {Yuchen Liu and
Jiajun Zhang and
Hao Xiong and
Long Zhou and
Zhongjun He and
Hua Wu and
Haifeng Wang and
Chengqing Zong},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {8417--8424},
 title = {Synchronous Speech Recognition and Speech-to-Text Translation with
Interactive Decoding},
 year = {2020}
}

@Article{fuegen2007simultaneous,
  author  = {F{\"u}gen, Christian and Waibel, Alex and Kolss, Muntsin},
  journal = {Machine translation},
  title   = {Simultaneous translation of lectures and speeches},
  year    = {2007},
  number  = {4},
  pages   = {209--252},
  volume  = {21},
}

@inproceedings{oda2014optimizing,
 author = {Oda, Yusuke  and
Neubig, Graham  and
Sakti, Sakriani  and
Toda, Tomoki  and
Nakamura, Satoshi},
 booktitle = {Proc. of ACL},
 pages = {551--556},
 title = {Optimizing Segmentation Strategies for Simultaneous Speech Translation},
 year = {2014}
}

@inproceedings{dalvi2018incremental,
 author = {Dalvi, Fahim  and
Durrani, Nadir  and
Sajjad, Hassan  and
Vogel, Stephan},
 booktitle = {Proc. of NAACL-HLT},
 pages = {493--499},
 title = {Incremental Decoding and Training Methods for Simultaneous Translation in Neural Machine Translation},
 year = {2018}
}

@inproceedings{ma2021streaming,
 author = {Ma, Xutai and Wang, Yongqiang and Dousti, Mohammad Javad and Koehn, Philipp and Pino, Juan},
 booktitle = {ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
 organization = {IEEE},
 pages = {7523--7527},
 title = {Streaming Simultaneous Speech Translation with Augmented Memory Transformer},
 year = {2021}
}

@inproceedings{wang2020fairseq,
 author = {Wang, Changhan  and
Tang, Yun  and
Ma, Xutai  and
Wu, Anne  and
Okhonko, Dmytro  and
Pino, Juan},
 booktitle = {Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: System Demonstrations},
 pages = {33--39},
 title = {Fairseq {S}2{T}: Fast Speech-to-Text Modeling with Fairseq},
 year = {2020}
}

@inproceedings{zheng2020fluent,
 author = {Zheng, Renjie  and
Ma, Mingbo  and
Zheng, Baigong  and
Liu, Kaibo  and
Yuan, Jiahong  and
Church, Kenneth  and
Huang, Liang},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020},
 pages = {3928--3937},
 title = {Fluent and Low-latency Simultaneous Speech-to-Speech Translation with Self-adaptive Training},
 year = {2020}
}

@inproceedings{zheng2019simpler,
 author = {Zheng, Baigong  and
Zheng, Renjie  and
Ma, Mingbo  and
Huang, Liang},
 booktitle = {Proc. of EMNLP},
 pages = {1349--1354},
 title = {Simpler and Faster Learning of Adaptive Policies for Simultaneous Translation},
 year = {2019}
}

@inproceedings{zhang2020learning,
 author = {Zhang, Ruiqing  and
Zhang, Chuanqiang  and
He, Zhongjun  and
Wu, Hua  and
Wang, Haifeng},
 booktitle = {Proc. of EMNLP},
 pages = {2280--2289},
 title = {Learning Adaptive Segmentation Policy for Simultaneous Translation},
 year = {2020}
}

@inproceedings{zhang2020dynamic,
 author = {Zhang, Ruiqing  and
Zhang, Chuanqiang},
 booktitle = {Proceedings of the First Workshop on Automatic Simultaneous Translation},
 pages = {1--9},
 title = {Dynamic Sentence Boundary Detection for Simultaneous Translation},
 year = {2020}
}

@inproceedings{elbayad2020efficient,
 author = {Maha Elbayad and
Laurent Besacier and
Jakob Verbeek},
 booktitle = {Proc. of INTERSPEECH},
 editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
 pages = {1461--1465},
 title = {Efficient Wait-k Models for Simultaneous Machine Translation},
 year = {2020}
}

@inproceedings{zheng2020opportunistic,
 author = {Zheng, Renjie  and
Ma, Mingbo  and
Zheng, Baigong  and
Liu, Kaibo  and
Huang, Liang},
 booktitle = {Proc. of ACL},
 pages = {437--442},
 title = {Opportunistic Decoding with Timely Correction for Simultaneous Translation},
 year = {2020}
}

@InProceedings{arivazhagan2020rea,
  author    = {Arivazhagan, Naveen and Cherry, Colin and Macherey, Wolfgang and Foster, George},
  booktitle = {Proceedings of the 17th International Conference on Spoken Language Translation},
  title     = {Re-translation versus Streaming for Simultaneous Translation},
  year      = {2020},
  pages     = {220--227},
}

@inproceedings{arivazhagan2020re,
 author = {Naveen Arivazhagan and
Colin Cherry and
I Te and
Wolfgang Macherey and
Pallavi Baljekar and
George F. Foster},
 booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
 pages = {7919--7923},
 title = {Re-Translation Strategies for Long Form, Simultaneous, Spoken Language
Translation},
 year = {2020}
}

@inproceedings{arivazhagan2019monotonic,
 author = {Arivazhagan, Naveen  and
Cherry, Colin  and
Macherey, Wolfgang  and
Chiu, Chung-Cheng  and
Yavuz, Semih  and
Pang, Ruoming  and
Li, Wei  and
Raffel, Colin},
 booktitle = {Proc. of ACL},
 pages = {1313--1323},
 title = {Monotonic Infinite Lookback Attention for Simultaneous Machine Translation},
 year = {2019}
}

@InProceedings{ma2020monotonic,
  author    = {Xutai Ma and Juan Miguel Pino and James Cross and Liezl Puzon and Jiatao Gu},
  booktitle = {Proc. of ICLR},
  title     = {Monotonic Multihead Attention},
  year      = {2020},
}

@inproceedings{schneider2020towards,
 author = {Schneider, Felix  and
Waibel, Alexander},
 booktitle = {Proceedings of the 17th International Conference on Spoken Language Translation},
 pages = {228--236},
 title = {Towards Stream Translation: Adaptive Computation Time for Simultaneous Machine Translation},
 year = {2020}
}

@article{heller1999general,
 author = {Heller, Peter Niels and Karp, Tanja and Nguyen, Truong Q},
 journal = {IEEE Transactions on Signal Processing},
 number = {4},
 pages = {986--1002},
 title = {A general formulation of modulated filter banks},
 volume = {47},
 year = {1999}
}

@inproceedings{pardede2019generalized,
 author = {Pardede, Hilman F and Zilvan, Vicky and Krisnandi, Dikdik and Heryana, Ana and Kusumo, R Budiarianto S},
 booktitle = {2019 International Conference on Computer, Control, Informatics and its Applications (IC3INA)},
 organization = {IEEE},
 pages = {19--24},
 title = {Generalized Filter-bank Features for Robust Speech Recognition Against Reverberation},
 year = {2019}
}

@InProceedings{zhang2021sparsifying,
  author    = {Zhang, Biao and Titov, Ivan and Sennrich, Rico},
  booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
  title     = {On Sparsifying Encoder Outputs in Sequence-to-Sequence Models},
  year      = {2021},
  pages     = {2888--2900},
}

@inproceedings{chen2021direct,
 author = {Chen, Junkun  and
Ma, Mingbo  and
Zheng, Renjie  and
Huang, Liang},
 booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
 pages = {4618--4624},
 title = {Direct Simultaneous Speech-to-Text Translation Assisted by Synchronized Streaming {ASR}},
 year = {2021}
}

@InProceedings{le2021lightweight,
  author    = {Le, Hang and Pino, Juan and Wang, Changhan and Gu, Jiatao and Schwab, Didier and Besacier, Laurent},
  booktitle = {Proc. of ACL},
  title     = {Lightweight Adapter Tuning for Multilingual Speech Translation},
  year      = {2021},
  pages     = {817--824},
}

@article{yi2021efficiently,
 author = {Yi, Cheng and Zhou, Shiyu and Xu, Bo},
 journal = {IEEE Signal Processing Letters},
 pages = {788--792},
 title = {Efficiently fusing pretrained acoustic and linguistic encoders for low-resource speech recognition},
 volume = {28},
 year = {2021}
}

@InProceedings{chen2021mtg,
  author  = {Chen, Yiran and Song, Zhenqiao and Wu, Xianze and Wang, Danqing and Xu, Jingjing and Chen, Jiaze and Zhou, Hao and Li, Lei},
  title   = {{MTG}: A Benchmark Suite for Multilingual Text Generation},
  year    = {2021},
  volume  = {abs/2108.07140},
  journal = {ArXiv preprint},
}

@inproceedings{ye2022cross,
 abstract = {How to learn similar representations for spoken utterances and their written text? We believe a unified and aligned representation of speech and text will lead to improvement in speech translation. To this end, we propose ConST, a cross-modal contrastive learning method for end-to-end speech-to-text translation. We evaluate ConST and a variety of previous baselines on multiple language directions (En-De/Fr/Ru) of a popular benchmark MuST-C. Experiments show that the proposed ConST consistently outperforms all previous methods, and achieves the state-of-the-art average BLEU of 28.5. The analysis further verifies that ConST indeed closes the representation gap of different modalities --- its learned representation improves the accuracy of cross-modal text retrieval from 4% to 88%.},
 author = {Ye, Rong and Wang, Mingxuan and Li, Lei},
 booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)},
 owner = {lilei.02},
 title = {Cross-modal Contrastive Learning for Speech Translation},
 year = {2022}
}

@inproceedings{zhao2022provably,
 abstract = {Large language models are shown to memorize privacy information such as social security numbers in training data. Given the sheer scale of the training corpus, it is challenging to screen and filter these privacy data, either manually or automatically. In this paper, we propose Confidentially Redacted Training (CRT), a method to train language generation models while protecting the confidential segments. We borrow ideas from differential privacy (which solves a related but distinct problem) and show that our method is able to provably prevent unintended memorization by randomizing parts of the training process. Moreover, we show that redaction with an approximately correct screening policy amplifies the confidentiality guarantee. We implement the method for both LSTM and GPT language models. Our experimental results show that the models trained by CRT obtain almost the same perplexity while preserving strong confidentiality.},
 author = {Zhao, Xuandong and Li, Lei and Wang, Yu-Xiang},
 booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)},
 owner = {lilei.02},
 title = {Provably Confidential Language Modelling},
 year = {2022}
}

@inproceedings{li2022learning,
 abstract = {Can a robot autonomously learn to design and construct a bridge from varying-sized blocks without a blueprint? It is a challenging task with long horizon and sparse reward – the robot has to figure out physically stable design schemes and feasible actions to manipulate and transport blocks. Due to diverse block sizes, the state space and action trajectories are vast to explore. In this paper, we propose a hierarchical approach for this problem. It consists of a reinforcement-learning designer to propose high-level building instructions and a motion-planning-based action generator to manipulate blocks at the low level. For high-level learning, we develop a novel technique, prioritized memory resetting (PMR) to improve exploration. PMR adaptively resets the state to those most critical configurations from a replay buffer so that the robot can resume training on partial architectures instead of from scratch. Furthermore, we augment PMR with auxiliary training objectives and fine-tune the designer with the locomotion generator. Our experiments in simulation and on a real deployed robotic system demonstrate that it is able to effectively construct bridges with blocks of varying sizes at a high success rate. Demos can be found at https://sites.google. com/view/bridge-pmr.},
 author = {Yunfei Li and Tao Kong and Lei Li and Yi Wu},
 booktitle = {{IEEE} International Conference on Robotics and Automation ({ICRA})},
 code = {https://github.com/IrisLi17/bridge_construction},
 thumbnail = {li2022bridge_robot.jpg},
 title = {Learning Design and Construction with Varying-Sized Materials via Prioritized Memory Resets},
 year = {2022}
}

@inproceedings{bao2022latent,
 abstract = {Recently, parallel text generation has received widespread attention due to its success in generation efficiency. Although many advanced techniques are proposed to improve its generation quality, they still need the help of an autoregressive model for training to overcome the one-to-many multi-modal phenomenon in the dataset, limiting their applications. In this paper, we propose latent-GLAT, which employs the discrete latent variables to capture word categorical information and invoke an advanced curriculum learning technique, alleviating the multi-modality problem. Experiment results show that our method outperforms strong baselines without the help of an autoregressive model, which further broadens the application scenarios of the parallel decoding paradigm.},
 author = {Yu Bao and Hao Zhou and Shujian Huang and Dongqi Wang and Lihua Qian and Xinyu Dai and Jiajun Chen and Lei Li},
 booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
 code = {https://github.com/baoy-nlp/Latent-GLAT},
 eprint = {https://openreview.net/forum?id=y4xCe0MSoWx},
 title = {latent-{GLAT}: Glancing at Latent Variables for Parallel Text Generation},
 year = {2022}
}

@inproceedings{dong2022learning,
 abstract = {How to find proper moments to generate partial sentence translation given a streaming speech input? Existing approaches waiting-and-translating for a fixed duration often break the acoustic units in speech, since the boundaries between acoustic units in speech are not even. In this paper, we propose MoSST, a simple yet effective method for translating streaming speech content. Given a usually long speech sequence, we develop an efficient monotonic segmentation module inside an encoder-decoder model to accumulate acoustic information incrementally and detect proper speech unit boundaries for the input in speech translation task. Experiments on multiple translation directions of the MuST-C dataset show that MoSST outperforms existing methods and achieves the best trade-off between translation quality (BLEU) and latency.},
 author = {Qianqian Dong and Yaoming Zhu and Mingxuan Wang and Lei Li},
 booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
 code = {https://github.com/dqqcasia/mosst},
 eprint = {https://openreview.net/forum?id=mBz73IzOI6},
 title = {Learning When to Translate for Streaming Speech},
 year = {2022}
}

@inproceedings{fang2022stemm,
 abstract = {How to learn a better speech representation for end-to-end speech-to-text translation (ST) with limited labeled data? Existing techniques often attempt to transfer powerful machine translation (MT) capabilities to ST, but neglect the representation discrepancy across modalities. In this paper, we propose the Speech-TExt Manifold Mixup (STEMM) method to calibrate such discrepancy. Specifically, we mix up the representation sequences of different modalities, and take both unimodal speech sequences and multimodal mixed sequences as input to the translation model in parallel, and regularize their output predictions with a self-learning framework. Experiments on MuST- C speech translation benchmark and further analysis show that our method effectively alleviates the cross-modal representation discrepancy, and achieves significant improvements over a strong baseline on eight translation directions.},
 author = {Qingkai Fang and Rong Ye and Lei Li and Yang Feng and Mingxuan Wang},
 booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
 code = {https://github.com/ictnlp/STEMM},
 eprint = {https://openreview.net/forum?id=kazCgft9cCH},
 title = {{STEMM}: Self-learning with Speech-text Manifold Mixup for Speech Translation},
 year = {2022}
}

@inproceedings{fu2022contextual,
 abstract = {How do masked language models (MLMs) such as BERT learn contextual representations? In this work, we analyze the learning dynamics of MLMs. We find that MLMs adopt sampled embeddings as anchors to estimate and inject contextual semantics to representations, which limits the efficiency and effectiveness of MLMs. To address these issues, we propose TACO, a simple yet effective representation learning approach to directly model global semantics. TACO extracts and aligns contextual semantics hidden in contextualized representations to encourage models to attend global semantics when generating contextualized representations. Experiments on the GLUE benchmark show that TACO achieves up to 5x speedup and up to 1.2 points average improvement over existing MLMs.},
 author = {Zhiyi Fu and Wangchunshu Zhou and Jingjing Xu and Hao Zhou and Lei Li},
 booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL)},
 code = {https:// github.com/FUZHIYI/TACO},
 eprint = {https://openreview.net/forum?id=KWL_ElhUejN},
 title = {Contextual Representation Learning beyond Masked Language Modeling},
 year = {2022}
}

@inproceedings{chen2022e,
 abstract = {The ability to recognize analogies is fundamental to human cognition. Existing benchmarks to test word analogy do not reveal the underneath process of analogical reasoning of neural models. Holding the belief that models capable of reasoning should be right for the right reasons, we propose a first-of-its- kind Explainable Knowledge-intensive Analogical Reasoning benchmark (E-KAR). Our benchmark consists of 1,655 (in Chinese) and 1,251 (in English) problems sourced from the Civil Service Exams, which require intensive background knowledge to solve. More importantly, we design a free-text explanation scheme to explain whether an analogy should be drawn, and manually annotate them for each and every question and candidate answer. Empirical results suggest that this benchmark is very challenging for some state-of-the-art models for both explanation generation and analogical question answering tasks, which invites further research in this area. Project page of E-KAR can be found at https:// ekar-leaderboard.github.io.},
 author = {Jiangjie Chen and Rui Xu and Ziquan Fu and Wei Shi and Zhongqiao Li and Xinbo Zhang and Changzhi Sun and Lei Li and Yanghua Xiao and Hao Zhou},
 booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL) - Findings},
 eprint = {https://openreview.net/forum?id=9kXOFRtrEj},
 title = {{E-KAR}: A Benchmark for Rationalizing Natural Language Analogical Reasoning},
 year = {2022}
}

@inproceedings{sun2022rethinking,
 abstract = {This paper does not aim at introducing a novel model for document-level neural machine translation. Instead, we head back to the original Transformer model and hope to answer the following question: Is the capacity of current models strong enough for document-level translation? Interestingly, we observe that the original Transformer with appropriate training techniques can achieve strong results for document translation, even with a length of 2000 words. We evaluate this model and several recent approaches on nine document-level datasets and two sentence-level datasets across six languages. Experiments show that document-level Transformer models outperforms sentence-level ones and many previous methods in a comprehensive set of metrics, including BLEU, four lexical indices, three newly proposed assistant linguistic indicators, and human evaluation. Our new datasets and evaluation scripts are in https://github. com/sunzewei2715/Doc2Doc_NMT.},
 author = {Zewei Sun and Mingxuan Wang and Hao Zhou and Chengqi Zhao and Shujian Huang and Jiajun Chen and Lei Li},
 booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL) - Findings},
 code = {https://github. com/sunzewei2715/Doc2Doc_NMT},
 eprint = {https://openreview.net/forum?id=sU9fYzNZ3xX},
 title = {Rethinking Document-level Neural Machine Translation},
 year = {2022}
}

@inproceedings{zhao2022compressing,
 author = {Xuandong Zhao and Zhiguo Yu and Ming Wu and Lei Li},
 booktitle = {the 60th Annual Meeting of the Association for Computational Linguistics (ACL) - Findings},
 eprint = {https://openreview.net/forum?id=n3cvM4Phez9},
 title = {Compressing Sentence Representation via Homomorphic Projective Distillation},
 year = {2022}
}

@inproceedings{song2022switch,
 abstract = {Multilingual machine translation aims to develop a single model for multiple language directions. However, existing multilingual models based on Transformer are limited in terms of both translation performance and inference speed. In this paper, we propose switch-GLAT, a non-autoregressive multilingual machine translation model with a code-switch decoder. It can generate contextual code- switched translations for a given source sentence, and perform code-switch back- translation, greatly boosting multilingual translation performance. In addition, its inference is highly efficient thanks to its parallel decoder. Experiments show that our proposed switch-GLAT outperform the multilingual Transformer with as much as 1.16 BLEU improvement and 6.6x faster decoding speed in inference.},
 author = {Zhenqiao Song and Hao Zhou and Lihua Qian and Jingjing Xu and Shanbo Cheng and Mingxuan Wang and Lei Li},
 booktitle = {International Conference on Learning Representations (ICLR)},
 eprint = {https://openreview.net/forum?id=5HvpvYd68b},
 owner = {lilei.02},
 title = {{switch-GLAT}: Multilingual Parallel Machine Translation via Code-switch Decoder},
 year = {2022}
}

@inproceedings{yang2022enhancing,
 author = {Huiyun Yang and Huadong Chen and Hao Zhou and Lei Li},
 booktitle = {International Conference on Learning Representations (ICLR)},
 eprint = {https://openreview.net/forum?id=OjPmfr9GkVv},
 owner = {lilei.02},
 title = {Enhancing Cross-lingual Transfer by Manifold Mixup},
 year = {2022}
}

@InProceedings{chen2020loren,
  author  = {Jiangjie Chen and Qiaoben Bao and Changzhi Sun and Xinbo Zhang and Jiaze Chen and Hao Zhou and Yanghua Xiao and Lei Li},
  title   = {{LOREN}: Logic-Regularized Reasoning for Interpretable Fact Verification},
  year    = {2020},
  volume  = {abs/2012.13577},
  journal = {ArXiv preprint},
}

@InProceedings{chen2021unsupervised,
  author  = {Jiangjie Chen and Chun Gan and Sijie Cheng and Hao Zhou and Yanghua Xiao and Lei Li},
  title   = {Unsupervised Editing for Counterfactual Stories},
  year    = {2021},
  volume  = {abs/2112.05417},
  journal = {ArXiv preprint},
}

@InProceedings{huang2021non,
  author  = {Chenyang Huang and Hao Zhou and Osmar Zaiane and Lili Mou and Lei Li},
  title   = {Non-Autoregressive Translation with Layer-Wise Prediction and Deep Supervision},
  year    = {2021},
  volume  = {abs/2110.07515},
  journal = {ArXiv preprint},
}

@article{chu2021icm,
 author = {Ruihang Chu and Yukang Chen and Tao Kong and Lu Qi and Lei Li},
 journal = {ArXiv preprint},
 title = {{ICM-3D}: Instantiated Category Modeling for 3D Instance Segmentation},
 volume = {abs/2108.11771},
 year = {2021}
}

@article{wang2021solo,
 author = {Xinlong Wang and Rufeng Zhang and Chunhua Shen and Tao Kong and Lei Li},
 journal = {ArXiv preprint},
 title = {SOLO: A Simple Framework for Instance Segmentation},
 volume = {abs/2106.15947},
 year = {2021}
}

@inproceedings{zheng2021duplex,
 author = {Zaixiang Zheng and Hao Zhou and Shujian Huang and Jiajun Chen and Jingjing Xu and Lei Li},
 journal = {ArXiv preprint},
 title = {Duplex Sequence-to-Sequence Learning for Reversible Machine Translation},
 volume = {abs/2105.03458},
 year = {2021}
}

@inproceedings{qian2021volctrans,
 author = {Qian, Lihua  and
Zhou, Yi  and
Zheng, Zaixiang  and
Zhu, Yaoming  and
Lin, Zehui  and
Feng, Jiangtao  and
Cheng, Shanbo  and
Li, Lei  and
Wang, Mingxuan  and
Zhou, Hao},
 booktitle = {Proceedings of the Sixth Conference on Machine Translation},
 pages = {187--196},
 title = {The Volctrans {GLAT} System: Non-autoregressive Translation Meets {WMT}21},
 year = {2021}
}

@inproceedings{jiang2021learning,
 author = {Jiang, Qingnan  and
Wang, Mingxuan  and
Cao, Jun  and
Cheng, Shanbo  and
Huang, Shujian  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {7280--7290},
 title = {Learning Kernel-Smoothed Machine Translation with Retrieved Examples},
 year = {2021}
}

@inproceedings{ru2021learning,
 author = {Ru, Dongyu  and
Sun, Changzhi  and
Feng, Jiangtao  and
Qiu, Lin  and
Zhou, Hao  and
Zhang, Weinan  and
Yu, Yong  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {1239--1250},
 title = {Learning Logic Rules for Document-Level Relation Extraction},
 year = {2021}
}

@inproceedings{zeng2021gradient,
 author = {Zeng, Zhiyuan  and
Chen, Jiaze  and
Xu, Weiran  and
Li, Lei},
 booktitle = {Proc. of EMNLP},
 pages = {4102--4108},
 title = {Gradient-Based Adversarial Factual Consistency Evaluation for Abstractive Summarization},
 year = {2021}
}

@inproceedings{sun2021multilingual,
 author = {Sun, Zewei  and
Wang, Mingxuan  and
Li, Lei},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
 pages = {2735--2747},
 title = {Multilingual Translation via Grafting Pre-trained Language Models},
 year = {2021}
}

@inproceedings{wang2021secoco,
 author = {Wang, Tao  and
Zhao, Chengqi  and
Wang, Mingxuan  and
Li, Lei  and
Li, Hang  and
Xiong, Deyi},
 booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
 pages = {4639--4644},
 title = {Secoco: Self-Correcting Encoding for Neural Machine Translation},
 year = {2021}
}

@inproceedings{wang2021cnewsum,
 author = {Danqing Wang and Jiaze Chen and Xianze Wu and Hao Zhou and Lei Li},
 journal = {ArXiv preprint},
 title = {{CNewSum}: A Large-scale Chinese News Summarization Dataset with Human-annotated Adequacy and Deducibility Level},
 volume = {abs/2110.10874},
 year = {2021}
}

@inproceedings{li2021learning,
 author = {Yunfei Li and Tao Kong and Lei Li and Yifeng Li and Yi Wu},
 journal = {ArXiv preprint},
 title = {Learning to Design and Construct Bridge without Blueprint},
 volume = {abs/2108.02439},
 year = {2021}
}

@inproceedings{li2021simultaneous,
 author = {Yiming Li and Tao Kong and Ruihang Chu and Yifeng Li and Peng Wang and Lei Li},
 journal = {ArXiv preprint},
 title = {Simultaneous Semantic and Collision Learning for 6-DoF Grasp Pose Estimation},
 volume = {abs/2108.02425},
 year = {2021}
}

@inproceedings{shi2021follow,
 author = {Wenxian Shi and Yuxuan Song and Hao Zhou and Bohan Li and Lei Li},
 journal = {ArXiv preprint},
 title = {Follow Your Path: a Progressive Method for Knowledge Distillation},
 volume = {abs/2107.09305},
 year = {2021}
}

@inproceedings{zhao2021volctrans,
 author = {Zhao, Chengqi  and
Liu, Zhicheng  and
Tong, Jian  and
Wang, Tao  and
Wang, Mingxuan  and
Ye, Rong  and
Dong, Qianqian  and
Cao, Jun  and
Li, Lei},
 booktitle = {Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021)},
 pages = {64--74},
 title = {The Volctrans Neural Speech Translation System for {IWSLT} 2021},
 year = {2021}
}

@inproceedings{wang2021autocorrect,
 author = {Wang, Tao  and
Zhao, Chengqi  and
Wang, Mingxuan  and
Li, Lei  and
Xiong, Deyi},
 booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers},
 pages = {105--112},
 title = {Autocorrect in the Process of Translation {---} Multi-task Learning Improves Dialogue Machine Translation},
 year = {2021}
}

@inproceedings{chen2021scale,
 author = {Yukang Chen and Yanwei Li and Tao Kong and Lu Qi and Ruihang Chu and Lei Li and Jiaya Jia},
 journal = {ArXiv preprint},
 title = {Scale-aware Automatic Augmentation for Object Detection},
 volume = {abs/2103.17220},
 year = {2021}
}

@inproceedings{jing2021locate,
 author = {Ya Jing and Tao Kong and Wei Wang and Liang Wang and Lei Li and Tieniu Tan},
 journal = {ArXiv preprint},
 title = {Locate then Segment: A Strong Pipeline for Referring Image Segmentation},
 volume = {abs/2103.16284},
 year = {2021}
}

@InProceedings{sun2020sparse,
  author  = {Peize Sun and Rufeng Zhang and Yi Jiang and Tao Kong and Chenfeng Xu and Wei Zhan and Masayoshi Tomizuka and Lei Li and Zehuan Yuan and Changhu Wang and Ping Luo},
  title   = {Sparse {R-CNN}: End-to-End Object Detection with Learnable Proposals},
  year    = {2020},
  volume  = {abs/2011.12450},
  journal = {ArXiv preprint},
}

@InProceedings{wang2020dense,
  author  = {Xinlong Wang and Rufeng Zhang and Chunhua Shen and Tao Kong and Lei Li},
  title   = {Dense Contrastive Learning for Self-Supervised Visual Pre-Training},
  year    = {2020},
  volume  = {abs/2011.09157},
  journal = {ArXiv preprint},
}

@inproceedings{xie2021mars,
 author = {Yutong Xie and
Chence Shi and
Hao Zhou and
Yuwei Yang and
Weinan Zhang and
Yong Yu and
Lei Li},
 booktitle = {Proc. of ICLR},
 title = {{MARS:} Markov Molecular Sampling for Multi-objective Drug Discovery},
 year = {2021}
}

@InProceedings{wang2021enparenhancing,
  author    = {Wang, Yijun and Sun, Changzhi and Wu, Yuanbin and Zhou, Hao and Li, Lei and Yan, Junchi},
  booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume},
  title     = {{ENPAR}:Enhancing Entity and Entity Pair Representations for Joint Entity Relation Extraction},
  year      = {2021},
  pages     = {2877--2887},
}

@patent{chen2021methoda,
 author = {Chen, Yangyu and He, Yi and Li, Lei},
 day = {20},
 nationality = {US},
 number = {16/981,240},
 title = {Method and Device for determining geometric transformation relation for images},
 year = {2021},
 yearfiled = {2018}
}

@patent{he2021image,
 author = {He, Yi and Li, Gen and Li, Lei},
 comment = {图像处理方法、装置、电子设备及计算机可读存储介质(一种概率的图片哈希表达方法)},
 day = {29},
 dayfiled = {10},
 nationality = {CN},
 number = {201910498629.6},
 title = {Image processing method and device},
 year = {2021},
 yearfiled = {2019}
}

@patent{huang2021target,
 author = {Huang, Xunpeng and Liu, Zhengyang and Li, Lei},
 comment = {目标对象的分类方法、装置和电子设备( 一种快速的近似二阶牛顿优化算法 )},
 day = {20},
 dayfiled = {17},
 nationality = {CN},
 number = {202010057296.6},
 title = {Target Object Classification method and device},
 year = {2021},
 yearfiled = {2020}
}

@patent{li2021method,
 assignee = {Beijing Bytedance Tech Co. Ltd},
 author = {Li, Lei and Chen, Jiaze and Chen, Jiamin and Ma, Weiying and Hua, Lifeng},
 comment = {用于处理文本的方法和装置},
 day = {13},
 dayfiled = {1},
 nationality = {CN},
 number = {201811455645.9},
 owner = {lilei.02},
 title = {Method and Device for generating information},
 year = {2021},
 yearfiled = {2019}
}

@inproceedings{song2021triangular,
 author = {Zhenqiao Song and Jiaze Chen and Hao Zhou and Lei Li},
 journal = {ArXiv preprint},
 title = {Triangular Bidword Generation for Sponsored Search Auction},
 volume = {abs/2101.11349},
 year = {2021}
}

@patent{she2021method,
 author = {She, Heng and Wang, Yang and Guo, Yinuo and Zhang, Huiru and Li, Yitan and Li, Lei and Li, Hang},
 comment = {用于推送信息的方法和装置},
 day = {19},
 dayfiled = {18},
 nationality = {CN},
 number = {201811562666.0},
 title = {Method and Device for Push-Notifying Information},
 year = {2021},
 yearfiled = {2018}
}

@InProceedings{huang2020acmo,
  author  = {Xunpeng Huang and Runxin Xu and Hao Zhou and Zhe Wang and Zhengyang Liu and Lei Li},
  title   = {ACMo: Angle-Calibrated Moment Methods for Stochastic Optimization},
  year    = {2020},
  volume  = {abs/2006.07065},
  journal = {ArXiv preprint},
}

@InProceedings{wu2020textgail,
  author  = {Qingyang Wu and Lei Li and Zhou Yu},
  title   = {{TextGAIL}: Generative Adversarial Imitation Learning for Text Generation},
  year    = {2020},
  volume  = {abs/2004.13796},
  journal = {ArXiv preprint},
}

@inproceedings{zhang2021taxonomy,
 author = {Jieyu Zhang and Xiangchen Song and Ying Zeng and Jiaze Chen and Jiaming Shen and Yuning Mao and Lei Li},
 journal = {ArXiv preprint},
 title = {Taxonomy Completion via Triplet Matching Network},
 volume = {abs/2101.01896},
 year = {2021}
}

@patent{chen2021method,
 author = {Chen, Jiaze and Li, Lei and Zeng, Ying and Ma, Weiying},
 comment = {生成物品描述信息的方法和装置},
 day = {29},
 dayfiled = {2},
 nationality = {CN},
 number = {201811457980.2},
 title = {Method and Device for generation product description information},
 year = {2021},
 yearfiled = {2018}
}

@patent{deng2021method,
 assignee = {Beijing Bytedance Network Tech Co. Ltd},
 author = {Deng, Jiangdong and Li, Lei and Ma, Weiying},
 comment = {股票的筛选方法及装置、计算机设备及可读存储介质},
 day = {1},
 dayfiled = {10},
 nationality = {CN},
 number = {201810910344.4},
 owner = {lilei.02},
 title = {Method and device for stock selection},
 year = {2021},
 yearfiled = {2018}
}

@patent{he2021duplicate,
 assignee = {Beijing Bytedance Network Tech Co.},
 author = {He, Yi and Yang, Cheng and Li, Gen and Li, Yitan and Li, Lei},
 comment = {一种重复视频的判断方法及装置},
 day = {19},
 dayfiled = {29},
 nationality = {China},
 number = {201810273706.3},
 owner = {lilei.02},
 title = {Duplicate video detection method and device},
 year = {2021},
 yearfiled = {2018}
}

@inproceedings{wang2020solov2,
 author = {Xinlong Wang and
Rufeng Zhang and
Tao Kong and
Lei Li and
Chunhua Shen},
 booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
 editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
 title = {SOLOv2: Dynamic and Fast Instance Segmentation},
 year = {2020}
}

@patent{deng2020sentiment,
 author = {Jiangdong Deng and Lei Li and Weiying Ma},
 comment = {舆情指数的预测方法及装置、计算机设备和可读存储介质},
 day = {18},
 dayfiled = {10},
 nationality = {China},
 number = {201810909879.X},
 owner = {lilei.02},
 title = {Sentiment Prediction Method and Device},
 type = {patent},
 year = {2020},
 yearfiled = {2018}
}

@InProceedings{wang2019solo,
  author  = {Xinlong Wang and Tao Kong and Chunhua Shen and Yuning Jiang and Lei Li},
  title   = {{SOLO}: {S}egmenting {O}bjects by {L}ocations},
  year    = {2019},
  volume  = {abs/1912.04488},
  journal = {ArXiv preprint},
}

@inproceedings{ru2020quachie,
 author = {Dongyu Ru and
Zhenghui Wang and
Lin Qiu and
Hao Zhou and
Lei Li and
Weinan Zhang and
Yong Yu},
 booktitle = {Proc. of SIGIR},
 editor = {Jimmy Huang and
Yi Chang and
Xueqi Cheng and
Jaap Kamps and
Vanessa Murdock and
Ji{-}Rong Wen and
Yiqun Liu},
 pages = {2177--2180},
 title = {QuAChIE: Question Answering based Chinese Information Extraction System},
 year = {2020}
}

@inproceedings{hua2020xref,
 abstract = {Automatic identification of mentioned entities in social media posts facilitates quick digestion of trending topics and popular opinions. Nonetheless, this remains a challenging task due to limited context and diverse name variations. In this paper, we study the problem of entity linking for Chinese news comments given mentions’ spans. We hypothesize that comments often refer to entities in the corresponding news article, as well as topics involving the entities. We therefore propose a novel model, XREF, that leverages attention mechanisms to (1) pinpoint relevant context within comments, and (2) detect supporting entities from the news article. To improve training, we make two contributions: (a) we propose a supervised attention loss in addition to the standard cross entropy, and (b) we develop a weakly supervised training scheme to utilize the large-scale unlabeled corpus. Two new datasets in entertainment and product domains are collected and annotated for experiments. Our proposed method outperforms previous methods on both datasets.},
 author = {Xinyu Hua and Lei Li and Lifeng Hua and Lu Wang},
 booktitle = {Automated Knowledge Base Construction (AKBC)},
 title = {{XREF}: Entity Linking for {Chinese} News Comments with Supplementary Article Reference},
 year = {2020}
}

@Article{kong2019foveabox,
  author  = {Tao {Kong} and Fuchun {Sun} and Huaping {Liu} and Yuning {Jiang} and Lei {Li} and Jianbo {Shi}},
  journal = {ArXiv preprint},
  title   = {{FoveaBox}: Beyound Anchor-based Object Detection},
  year    = {2019},
  volume  = {abs/1904.03797},
}

@article{wu2020towards,
 author = {Wu, Fei and Lu, Cewu and Zhu, Mingjie and Chen, Hao and Zhu, Jun and Yu, Kai and Li, Lei and Li, Ming and Chen, Qianfeng and Li, Xi and Cao, Xudong and Wang, Zhongyuan and Zha, Zhengjun and Zhuang, Yueting and Pan, Yunhe},
 eprint = {https://rdcu.be/b5vk7},
 journal = {Nature Machine Intelligence},
 pages = {312-316},
 title = {Towards a new generation of artificial intelligence in {China}},
 volume = {2},
 year = {2020}
}

@patent{yu2020method,
 assignee = {Beijing Bytedance Tech Co. Ltd},
 author = {Yu, Linyun and Li, Lei and Yin, Haibin and Zhu, Wenjia and Jiang, Dong},
 comment = {用于生成图像的方法和装置},
 day = {28},
 dayfiled = {26},
 nationality = {CN},
 number = {201810668219.7},
 owner = {lilei.02},
 title = {Method and Apparatus for generating image},
 year = {2020},
 yearfiled = {2018}
}

@patent{deng2020method,
 assignee = {Beijing Bytedance Network Tech Co. Ltd},
 author = {Deng, Jiangdong and Peng, Qu and Li, Lei and Ma, Weiying},
 comment = {用于输出信息的方法和装置},
 day = {27},
 dayfiled = {4},
 nationality = {CN},
 number = {201811074033.5},
 owner = {lilei.02},
 title = {A method for outputing information},
 year = {2020},
 yearfiled = {2018}
}

@patent{li2020systems,
 assignee = {Baidu USA LLC},
 author = {Li, Lei and Dai, Zihang and Xu, Wei},
 day = {31},
 dayfiled = {23},
 nationality = {US},
 number = {US10606846B2},
 owner = {lilei.02},
 title = {Systems and methods for human inspired simple question answering (HISQA)},
 year = {2020},
 yearfiled = {2016}
}

@inproceedings{huang2020span,
 author = {Xunpeng Huang and
Xianfeng Liang and
Zhengyang Liu and
Lei Li and
Yue Yu and
Yitan Li},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {1520--1527},
 title = {{SPAN:} {A} Stochastic Projected Approximate Newton Method},
 year = {2020}
}

@inproceedings{wang2020task,
 author = {Xinlong Wang and
Wei Yin and
Tao Kong and
Yuning Jiang and
Lei Li and
Chunhua Shen},
 booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
2020, The Thirty-Second Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
February 7-12, 2020},
 pages = {12257--12264},
 title = {Task-Aware Monocular Depth Estimation for 3D Object Detection},
 year = {2020}
}

@Patent{he2020method,
  nationality = {CN},
  number      = {201811060981.3},
  year        = {2020},
  yearfiled   = {2018},
  assignee    = {Beijing Bytedance Network Tech Co. Ltd},
  author      = {He, Yi and Li, Lei and Zong, Xianzi and Tang, Hao and Zheng, Guangguo},
  day         = {4},
  dayfiled    = {12},
  title       = {Method and Device for searching information},
  comment     = {用于搜索信息的方法和装置},
  owner       = {lilei.02},
}

@patent{yu2020methoda,
 assignee = {Beijing Bytedance Tech Co. Ltd},
 author = {Yu, Linyun and Li, Lei and Yin, Haibin and Jiang, Dong},
 comment = {用于生成图像的方法和装置},
 day = {7},
 dayfiled = {26},
 nationality = {CN},
 number = {201810669838.8},
 owner = {lilei.02},
 title = {Method and apparatus for generating image},
 year = {2020},
 yearfiled = {2018}
}

@inproceedings{zhao2019what,
 author = {Zhichen Zhao and
Lei Li and
Bowen Zhang and
Meng Wang and
Yuning Jiang and
Li Xu and
Fengkun Wang and
Wei{-}Ying Ma},
 booktitle = {Proceedings of the 28th {ACM} International Conference on Information
and Knowledge Management, {CIKM} 2019, Beijing, China, November 3-7,
2019},
 editor = {Wenwu Zhu and
Dacheng Tao and
Xueqi Cheng and
Peng Cui and
Elke A. Rundensteiner and
David Carmel and
Qi He and
Jeffrey Xu Yu},
 pages = {2605--2613},
 title = {What You Look Matters?: Offline Evaluation of Advertising Creatives
for Cold-start Problem},
 year = {2019}
}

@patent{chen2019method,
 assignee = {Beijing Bytedance Network Tech Co. Ltd},
 author = {Chen, Yangyu and He, Yi and Li, Lei},
 comment = {用于确定图像间的几何变换关系的方法和装置},
 day = {12},
 dayfiled = {12},
 nationality = {CN},
 number = {201811060837.X},
 owner = {lilei.02},
 title = {A method and apparatus for determining a geometric transformation relationship between images},
 year = {2019},
 yearfiled = {2018}
}

@inproceedings{jiang2019svd,
 author = {Qing{-}Yuan Jiang and
Yi He and
Gen Li and
Jian Lin and
Lei Li and
Wu{-}Jun Li},
 booktitle = {2019 {IEEE/CVF} International Conference on Computer Vision, {ICCV}
2019, Seoul, Korea (South), October 27 - November 2, 2019},
 pages = {5280--5288},
 title = {{SVD:} {A} Large-Scale Short Video Dataset for Near-Duplicate Video
Retrieval},
 year = {2019}
}

@inproceedings{fu2019rethinking,
 author = {Fu, Yao  and
Zhou, Hao  and
Chen, Jiaze  and
Li, Lei},
 booktitle = {Proceedings of the 12th International Conference on Natural Language Generation},
 pages = {24--33},
 title = {Rethinking Text Attribute Transfer: A Lexical Analysis},
 year = {2019}
}

@inproceedings{lu2019uncovering,
 author = {Yunfei Lu and
Linyun Yu and
Peng Cui and
Chengxi Zang and
Renzhe Xu and
Yihao Liu and
Lei Li and
Wenwu Zhu},
 booktitle = {Proceedings of the 25th {ACM} {SIGKDD} International Conference on
Knowledge Discovery {\&} Data Mining, {KDD} 2019, Anchorage, AK, USA,
August 4-8, 2019},
 editor = {Ankur Teredesai and
Vipin Kumar and
Ying Li and
R{\'{o}}mer Rosales and
Evimaria Terzi and
George Karypis},
 pages = {3093--3101},
 title = {Uncovering the Co-driven Mechanism of Social and Content Links in
User Churn Phenomena},
 year = {2019}
}

@inproceedings{sun2019graspsnooker,
 author = {Zhaoyue Sun and
Jiaze Chen and
Hao Zhou and
Deyu Zhou and
Lei Li and
Mingmin Jiang},
 booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on
Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
2019},
 editor = {Sarit Kraus},
 pages = {6569--6571},
 title = {GraspSnooker: Automatic Chinese Commentary Generation for Snooker
Videos},
 year = {2019}
}

@patent{jiang2019construction,
 assignee = {Beijing Bytedance Network Tech Co.},
 author = {Jiang, Dong and Zhao, Yanbin and Hou, Shuang and Xia, Xuhong and Li, Lei and Hong, Dingkun},
 comment = {一种语音分类模型的构建方法及装置},
 day = {5},
 dayfiled = {27},
 nationality = {CN},
 number = {201710388497.2},
 owner = {lilei.02},
 title = {Construction method and device of voice classification model.},
 year = {2019},
 yearfiled = {2017}
}

@inproceedings{cao2018brits,
 author = {Wei Cao and
Dong Wang and
Jian Li and
Hao Zhou and
Lei Li and
Yitan Li},
 booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference
on Neural Information Processing Systems 2018, NeurIPS 2018, December
3-8, 2018, Montr{\'{e}}al, Canada},
 editor = {Samy Bengio and
Hanna M. Wallach and
Hugo Larochelle and
Kristen Grauman and
Nicol{\`{o}} Cesa{-}Bianchi and
Roman Garnett},
 pages = {6776--6786},
 title = {{BRITS:} Bidirectional Recurrent Imputation for Time Series},
 year = {2018}
}

@inproceedings{li2018overview,
 author = {Lei Li and Xiaojun Wan},
 booktitle = {Proc. of NLPCC},
 title = {Overview of the NLPCC 2018 shared task: Single document summarization},
 year = {2018}
}

@inproceedings{li2018jersey,
 abstract = {It is still a challenging task to recognize the jersey number 
of players on the court in soccer match videos, as the
jersey numbers are very small in the object detection task
and annotated data are not easy to collect. Based on the
object detection results of all the players on the court, a
CNN model is first introduced to classify these numbers on
the deteced players’ images. To localize the jersey number
more precisely without involving another digit detector and
extra consumption, we then improve the former network to
an end-to-end framework by fusing with the spatial transformer
network (STN). To further improve the accuracy, we
bring extra supervision to STN and upgrade the model to
a semi-supervised multi-task learning system, by labeling a
small portion of the number areas in the dataset by quadrangle.
Extensive experiments illustrate the effectiveness of
the proposed framework.},
 author = {Li, Gen and Xu, Shikun and Liu, Xiang and Li, Lei and Wang, Changhu},
 booktitle = {IEEE Conference on Computer Vision and Pattern Recognition workshops, Computer Vision in Sports},
 entrysubtype = {workshop},
 pages = {1864 --1871},
 title = {Jersey Number Recognition with Semi-Supervised Spatial Transformer Network},
 year = {2018}
}

@inproceedings{wu2018reinforced,
 author = {Wu, Jiawei  and
Li, Lei  and
Wang, William Yang},
 booktitle = {Proc. of NAACL-HLT},
 pages = {1252--1262},
 title = {Reinforced Co-Training},
 year = {2018}
}

@inproceedings{erol2017nearly,
 author = {Yusuf Bugra Erol and
Yi Wu and
Lei Li and
Stuart J. Russell},
 booktitle = {Proc. of AAAI},
 editor = {Satinder P. Singh and
Shaul Markovitch},
 pages = {1861--1869},
 title = {A Nearly-Black-Box Online Algorithm for Joint Parameter and State
Estimation in Temporal Models},
 year = {2017}
}

@article{matsubara2017non,
 abstract = {The recent explosion in the adoption of search engines and new media such as blogs and Twitter have facilitated the faster propagation of news and rumors. How quickly does a piece of news spread over these media? How does its popularity diminish over time? Does the rising and falling pattern follow a simple universal law? In this paper, we propose SPIKEM, a concise yet flexible analytical model of the rise and fall patterns of information diffusion. Our model has the following advantages: (a) unification power: it explains earlier empirical observations and generalizes theoretical models including the SI and SIR models. We provide the threshold of the take-off vs. die-out conditions for SPIKEM, and discuss the generality of our model, by applying it to an arbitrary graph topology; (b) practicality: it matches the observed behavior of diverse sets of real data; (c) parsimony: it requires only a handful of parameters; and (d) usefulness: it makes it possible to perform analytic tasks such as forecasting, spotting anomalies, and interpretation by reverse engineering the system parameters of interest (e.g. quality of news, number of interested bloggers, etc.). We also introduce an efficient and effective algorithm for the real-time monitoring of information diffusion, namely, SPIKESTREAM, which identifies multiple diffusion patterns in a large collection of online event streams. Extensive experiments on real datasets demonstrate that SPIKEM accurately and succinctly describes all the patterns of the rise-and-fall spikes in social networks.},
 author = {Matsubara, Yasuko and Sakurai, Yasushi and Prakash, B. Aditya and Li, Lei and Faloutsos, Christos},
 comment = {The earlier version of the paper appeared in KDD'12. This version includes significant extension.},
 journal = {ACM Transactions on the Web},
 number = {1},
 title = {Non-linear Dynamics of Information Diffusion in Social Networks},
 volume = {11},
 year = {2017}
}

@inproceedings{wu2016swift,
 author = {Yi Wu and
Lei Li and
Stuart J. Russell and
Rastislav Bod{\'{\i}}k},
 booktitle = {Proceedings of the Twenty-Fifth International Joint Conference on
Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July
2016},
 editor = {Subbarao Kambhampati},
 pages = {3637--3645},
 title = {Swift: Compiled Inference for Probabilistic Programming Languages},
 year = {2016}
}

@inproceedings{dai2016cfo,
 author = {Dai, Zihang  and
Li, Lei  and
Xu, Wei},
 booktitle = {Proc. of ACL},
 pages = {800--810},
 title = {{CFO}: Conditional Focused Neural Question Answering with Large-scale Knowledge Bases},
 year = {2016}
}

@misc{lu2015twisted,
 author = {Lu, Zefu and Li, Lei and Xu, Wei},
 booktitle = {Bay Area Machine Learning Symposium},
 owner = {leili},
 title = {Twisted Recurrent Network for Named Entity Recognition},
 year = {2015}
}

@misc{pham2015optimization,
 author = {Pham, Hieu and Dai, Zihang and Li, Lei},
 booktitle = {Bay Area Machine Learning Symposium},
 owner = {leili},
 title = {On Optimization Algorithms for Recurrent Networks with Long Short-Term Memory},
 year = {2015}
}

@inproceedings{wu2014bfit,
 abstract = {In recent years, several probabilistic programming languages (PPLs) have emerged, such as Bayesian Logic (BLOG), Church, and Figaro. These languages can be classified into two categories: PPLs interpreted using possible-world se- mantics and ones using random-evaluation semantics. In this paper, we explic- itly analyze the equivalence between these two semantics in the context of open- universe probability models (OUPMs). We propose a novel dynamic memoization technique to construct OUPMs using procedural instructions in random-evaluation based PPLs. We implemented a translator named BFiT, which converts code in BLOG (possible-world based) to Figaro (random-evaluation based). The trans- lated program in Figaro exhibits a merely constant blowup factor in program size while yielding the same inference results as the original model in BLOG.},
 author = {Wu, Yi and Li, Lei and Russell, Stuart J.},
 booktitle = {Neural Information Processing Systems, Probabilistic Programming workshop},
 entrysubtype = {workshop},
 file = {:pubs/wu2014bfit - BFiT_ From Possible-World Semantics to Random-Evaluation Semantics in Open Universe.pdf:PDF},
 owner = {leili},
 slides = {pubs/wu-2014-bfit-poster.pdf},
 title = {{BFiT}: From Possible-World Semantics to Random-Evaluation Semantics in Open Universe},
 year = {2014}
}

@inproceedings{du2014maxios,
 abstract = {Nonnegative matrix factorization proved useful in many applications, including collaborative filtering – from existing ratings data one would like to predict new product ratings by users. However, factorizing a user-product score matrix is computation and memory intensive. We propose Maxios, a novel approach to fill missing values for large scale and highly sparse matrices efficiently and ac- curately. We formulate the matrix-completion problem as weighted nonnegative matrix factorization. In addition, we develop distributed update rules using alter- nating direction method of multipliers. We have implemented the Maxios system on top of Spark, a distributed in-memory computation framework. Experiments on commercial clusters show that Maxios is competitive in terms of scalability and accuracy against the existing solutions on a variety of datasets.},
 author = {Du, Simon Shaolei and Liu, Yilin and Chen, Boyi and Li, Lei},
 booktitle = {Neural Information Processing Systems, workshop on Distributed Machine Learning and Matrix Computations},
 entrysubtype = {workshop},
 file = {:pubs/du2014maxios - Maxios_ Large Scale Nonnegative Matrix Factorization for Collaborative Filtering.pdf:PDF},
 owner = {leili},
 slides = {pubs/du-2014-maxios-poster.pdf},
 title = {Maxios: Large Scale Nonnegative Matrix Factorization for Collaborative Filtering},
 year = {2014}
}

@inproceedings{juan2014poisson,
 author = {Juan, Da-Cheng and Li, Lei and Peng, Huan-Kai and Marculescu, Diana and Faloutsos, Christos},
 booktitle = {The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)},
 file = {:pubs/juan2014poisson - Beyond Poisson_ Modeling Inter-Arrival Times of Requests in a Datacenter.pdf:PDF},
 owner = {leili},
 title = {Beyond {Poisson}: Modeling Inter-Arrival Times of Requests in a Datacenter},
 year = {2014}
}

@inproceedings{li2013dynamic,
 author = {Lei Li and
Bharath Ramsundar and
Stuart J. Russell},
 booktitle = {Proceedings of the Sixteenth International Conference on Artificial
Intelligence and Statistics, {AISTATS} 2013, Scottsdale, AZ, USA,
April 29 - May 1, 2013},
 pages = {397--405},
 series = {{JMLR} Workshop and Conference Proceedings},
 title = {Dynamic Scaled Sampling for Deterministic Constraints},
 volume = {31},
 year = {2013}
}

@inproceedings{vikram2013handwriting,
 abstract = {Recent technologies in vision sensors are capable of capturing 3D finger positions and movements. We propose a novel way to control and interact with computers by moving fingers in the air. The positions of fingers are precisely captured by a computer vision device. By tracking the moving patterns of fingers, we can then recognize users’ intended control commands or input information. We demonstrate this human input approach through an example application of handwriting recognition. By treating the input as a time series of 3D positions, we propose a fast algorithm using dynamic time warping to recognize characters in online fashion. We employ various optimization techniques to recognize in real time as one writes. Experiments show promising recognition performance and speed.},
 author = {Vikram, Sharad and Li, Lei and Russell, Stuart},
 booktitle = {ACM Conference on Human Factors in Computing Systems (CHI) Extended Abstracts},
 file = {:pubs/vikram2013handwriting - Handwriting and Gestures in the Air, Recognizing on the Fly.pdf:PDF},
 owner = {leili},
 title = {Handwriting and Gestures in the Air, Recognizing on the Fly},
 year = {2013}
}

@inproceedings{liu2013hibernating,
 author = {Liu, Siyuan and Li, Lei and Krishnan, Ramayya},
 booktitle = {IEEE International Conference on Data Mining (ICDM)},
 file = {:pubs/liu2013hibernating - Hibernating Process_ Modelling Mobile Calls at Multiple Scales.pdf:PDF},
 owner = {leili},
 title = {Hibernating Process: Modelling Mobile Calls at Multiple Scales},
 year = {2013}
}

@inproceedings{erol2013extended,
 author = {Yusuf Erol and
Lei Li and
Bharath Ramsundar and
Stuart J. Russell},
 booktitle = {Proc. of ICML},
 pages = {1103--1111},
 series = {{JMLR} Workshop and Conference Proceedings},
 title = {The Extended Parameter Filter},
 volume = {28},
 year = {2013}
}

@inproceedings{fu2013why,
 author = {Bin Fu and
Jialiu Lin and
Lei Li and
Christos Faloutsos and
Jason I. Hong and
Norman M. Sadeh},
 booktitle = {Proc. of KDD},
 editor = {Inderjit S. Dhillon and
Yehuda Koren and
Rayid Ghani and
Ted E. Senator and
Paul Bradley and
Rajesh Parekh and
Jingrui He and
Robert L. Grossman and
Ramasamy Uthurusamy},
 pages = {1276--1284},
 title = {Why people hate your app: making sense of user feedback in a mobile
app store},
 year = {2013}
}

@inproceedings{rogers2013multilinear,
 author = {Mark Rogers and
Lei Li and
Stuart J. Russell},
 booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual
Conference on Neural Information Processing Systems 2013. Proceedings
of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
 editor = {Christopher J. C. Burges and
L{\'{e}}on Bottou and
Zoubin Ghahramani and
Kilian Q. Weinberger},
 pages = {2634--2642},
 title = {Multilinear Dynamical Systems for Tensor Time Series},
 year = {2013}
}

@inproceedings{matsubara2013f,
 author = {Matsubara, Yasuko and Li, Lei and Papalexakis, Evangelos E. and Lo, David and Sakurai, Yasushi and Faloutsos, Christos},
 booktitle = {The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)},
 file = {:pubs/matsubara-The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)13-ftrail.pdf:PDF},
 pages = {86--98},
 title = {{F-Trail}: Finding Patterns in Taxi Trajectories},
 year = {2013}
}

@TechReport{li2013blog,
  author   = {Li, Lei and Russell, Stuart J.},
  title    = {The BLOG Language Reference},
  year     = {2013},
  number   = {UCB/EECS-2013-51},
  abstract = {This document introduces the syntax of BLOG, a probabilistic programming language, for describing random variables and their probabilistic dependencies. BLOG defines probabilistic generative models over first-order structures. For example, all Bayesian networks can be easily described by BLOG. BLOG has the following features: (a) it employs open-universe semantics; (b) it can describe relational uncertainty; (c) it can handle identity uncertainty; and (d) it is empowered by first-order logic. The syntax as described in this document corresponds to BLOG version 0.6. The current version represents a significant redesign and extension to previous versions of BLOG, based on the principles of usability and implementation efficiency.},
  file     = {:pubs/li2013blog - The BLOG Language Reference.pdf:PDF;Tech report:http\:/www.eecs.berkeley.edu/Pubs/TechRpts/2013/EECS-2013-51.html:URL},
  school   = {EECS Department, University of California, Berkeley},
}

@inproceedings{henderson2012rolx,
 author = {Henderson, Keith and Gallagher, Brian and Eliassi-Rad, Tina and Tong, Hanghang and Basu, Sugato and Akoglu, Leman and Koutra, Danai and Faloutsos, Christos and Li, Lei},
 booktitle = {Proceeding of the 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD)},
 file = {:pubs/henderson2012rolx - RolX_ Structural Role Extraction and Mining in Large Graphs.pdf:PDF},
 owner = {leili},
 title = {{RolX}: Structural Role Extraction and Mining in Large Graphs},
 year = {2012}
}

@inproceedings{matsubara2012rise,
 author = {Yasuko Matsubara and
Yasushi Sakurai and
B. Aditya Prakash and
Lei Li and
Christos Faloutsos},
 booktitle = {Proc. of KDD},
 editor = {Qiang Yang and
Deepak Agarwal and
Jian Pei},
 pages = {6--14},
 title = {Rise and fall patterns of information diffusion: model and implications},
 year = {2012}
}

@inproceedings{liu2011mobile,
 author = {Liu, Siyuan and Li, Lei and Faloutsos, Christos and Ni, Lionel},
 booktitle = {IEEE International Conference on Data Mining, workshop on Data Mining Technologies for Computational Collective Intelligence},
 entrysubtype = {workshop},
 file = {:pubs/liu2011mobile - Mobile Phone Graph Evolution_ Findings, Model and Interpretation.pdf:PDF},
 owner = {leili},
 title = {Mobile Phone Graph Evolution: Findings, Model and Interpretation},
 year = {2011}
}

@inproceedings{henderson2011its,
 author = {Keith Henderson and
Brian Gallagher and
Lei Li and
Leman Akoglu and
Tina Eliassi{-}Rad and
Hanghang Tong and
Christos Faloutsos},
 booktitle = {Proceedings of the 17th {ACM} {SIGKDD} International Conference on
Knowledge Discovery and Data Mining, San Diego, CA, USA, August 21-24,
2011},
 editor = {Chid Apt{\'{e}} and
Joydeep Ghosh and
Padhraic Smyth},
 pages = {663--671},
 title = {It's who you know: graph mining using recursive structural features},
 year = {2011}
}

@inproceedings{li2011thermocast,
 author = {Lei Li and
Chieh{-}Jan Mike Liang and
Jie Liu and
Suman Nath and
Andreas Terzis and
Christos Faloutsos},
 booktitle = {Proceedings of the 17th {ACM} {SIGKDD} International Conference on
Knowledge Discovery and Data Mining, San Diego, CA, USA, August 21-24,
2011},
 editor = {Chid Apt{\'{e}} and
Joydeep Ghosh and
Padhraic Smyth},
 pages = {1370--1378},
 title = {ThermoCast: a cyber-physical forecasting model for datacenters},
 year = {2011}
}

@inproceedings{li2011time,
 author = {Lei Li and
B. Aditya Prakash},
 booktitle = {Proc. of ICML},
 editor = {Lise Getoor and
Tobias Scheffer},
 pages = {185--192},
 title = {Time Series Clustering: Complex is Simpler!},
 year = {2011}
}

@inproceedings{sakurai2011windmine,
 author = {Yasushi Sakurai and
Lei Li and
Yasuko Matsubara and
Christos Faloutsos},
 booktitle = {Proceedings of the Eleventh {SIAM} International Conference on Data
Mining, {SDM} 2011, April 28-30, 2011, Mesa, Arizona, {USA}},
 pages = {759--770},
 title = {WindMine: Fast and Effective Mining of Web-click Sequences},
 year = {2011}
}

@phdthesis{li2011fast,
 author = {Li, Lei},
 howpublished = {Available as technical report CMU-CS-11-127},
 owner = {leili},
 school = {Carnegie Mellon University},
 slides = {pubs/leili-talk2011-defense.pdf},
 title = {Fast algorithms for mining co-evolving time series},
 year = {2011}
}

@inproceedings{li2010fast,
 author = {Li, Lei},
 booktitle = {Proc. of ICDE},
 entrysubtype = {workshop},
 file = {:pubs/li2010fast - Fast Algorithms for Time Series Mining.pdf:PDF},
 owner = {leili},
 pages = {341--344},
 slides = {pubs/li-icde10-slides.pdf},
 title = {Fast Algorithms for Time Series Mining},
 year = {2010}
}

@inproceedings{henderson2010metric,
 author = {Keith Henderson and
Tina Eliassi{-}Rad and
Christos Faloutsos and
Leman Akoglu and
Lei Li and
Koji Maruhashi and
B. Aditya Prakash and
Hanghang Tong},
 booktitle = {Proceedings of the 16th {ACM} {SIGKDD} International Conference on
Knowledge Discovery and Data Mining, Washington, DC, USA, July 25-28,
2010},
 editor = {Bharat Rao and
Balaji Krishnapuram and
Andrew Tomkins and
Qiang Yang},
 pages = {163--172},
 title = {Metric forensics: a multi-level approach for mining volatile graphs},
 year = {2010}
}

@InProceedings{li2010bolero,
  author    = {Li, Lei and McCann, James and Pollard, Nancy and Faloutsos, Christos},
  booktitle = {Proceedings of the 2010 ACM SIGGRAPH/Eurographics Symposium on Computer Animation (SCA)},
  title     = {{BoLeRO}: a principled technique for including bone length constraints in motion capture occlusion filling},
  year      = {2010},
  address   = {Madrid, Spain},
  pages     = {179--188},
  series    = {SCA '10},
  acmid     = {1921454},
  file      = {:pubs/li2010bolero - BoLeRO_ a principled technique for including bone length constraints in motion capture occlusion filling.pdf:PDF;demo:pubs/BoLeRO-final-v1_xvid.avi:URL},
  numpages  = {10},
  owner     = {leili},
  software  = {software/bolero-r349.zip},
}

@article{li2010efficient,
 abstract = {Quad-core cpus have been a common desktop configuration for today’s office. The increasing number of processors on a single chip opens new opportunity for parallel computing. Our goal is to make use of the multi-core as well as multi-processor architectures to speed up large-scale data mining algorithms. In this paper, we present a general par- allel learning framework, Cut-And-Stitch, for training hidden Markov chain models. Particularly, we propose two model-specific variants, CAS-LDS for learning linear dynamical systems (LDS) and CAS-HMM for learning hidden Markov models (HMM). Our main contribution is a novel method to handle the data dependencies due to the chain structure of hidden variables, so as to parallelize the EM-based parameter learning algorithm. We imple- ment CAS-LDS and CAS-HMM using OpenMP on two supercomputers and a quad-core commercial desktop. The experimental results show that parallel algorithms using Cut-And-Stitch achieve comparable accuracy and almost linear speedups over the traditional serial version.},
 author = {Li, Lei and Fu, Bin and Faloutsos, Christos},
 comment = {This one is applying the idea from Cut-And-Stitch paper from linear dynamical system to hidden markov models. The extended version can be found in my thesis Chapter 6 and 7.},
 file = {:pubs/li2010efficient - Efficient Parallel Learning of Hidden Markov Chain Models on SMPs.pdf:PDF},
 journal = {IEICE Transactions on Information and Systems},
 number = {6},
 owner = {leili},
 pages = {1330--1342},
 title = {Efficient Parallel Learning of Hidden {Markov} chain Models on {SMP}s},
 volume = {E93.D},
 year = {2010}
}

@inproceedings{li2010parsimonious,
 acmid = {1920893},
 author = {Li, Lei and Prakash, B. Aditya and Faloutsos, Christos},
 booktitle = {the Proceedings of the Very Large Data Bases Endowment (VLDB)},
 file = {:pubs/li2010parsimonious - Parsimonious linear fingerprinting for time series.pdf:PDF},
 issn = {2150-8097},
 issue = {1-2},
 issue_date = {September 2010},
 numpages = {12},
 owner = {leili},
 pages = {385--396},
 slides = {pubs/li-vldb10-plif-slides.pdf},
 software = {software/plif-r345.zip},
 title = {Parsimonious linear fingerprinting for time series},
 volume = {3},
 year = {2010}
}

@InProceedings{guo2009tailoring,
  author       = {Guo, Fan and Li, Lei and Faloutsos, Christos},
  booktitle    = {Proceedings of the 2009 workshop on Web Search Click Data},
  title        = {Tailoring click models to user goals},
  year         = {2009},
  address      = {Barcelona, Spain},
  pages        = {88--92},
  series       = {WSCD '09},
  acmid        = {1507523},
  entrysubtype = {workshop},
  isbn         = {978-1-60558-434-8},
  keywords     = {click model, user behavior, web search},
  numpages     = {5},
}

@patent{chen2009adaptive,
 assignee = {Microsoft},
 author = {Chen, Zheng and Li, Lei and Lin, Chenxi and Liu, Qiaoling and Wang, Jian and Zhang, Benyu},
 day = {14},
 dayfiled = {29},
 nationality = {US},
 number = {US 7,634,471},
 owner = {lilei.02},
 title = {Adaptive grouping in a file network},
 year = {2009},
 yearfiled = {2006}
}

@patent{chen2009system,
 assignee = {Microsoft},
 author = {Chen, Zheng and Li, Lei and Lin, Chenxi and Liu, Qiaoling and Wang, Jian and Zhang, Benyu},
 day = {24},
 dayfiled = {30},
 nationality = {US},
 number = {US 7,624,130},
 owner = {lilei.02},
 title = {System and method for exploring a semantic file network},
 year = {2009},
 yearfiled = {2006}
}

@patent{chen2009extracting,
 assignee = {Microsoft},
 author = {Chen, Zheng and Li, Lei and Lin, Chenxi and Liu, Qiaoling and Wang, Jian and Zhang, Benyu},
 day = {10},
 dayfiled = {30},
 nationality = {US},
 number = {US 7,502,785},
 owner = {lilei.02},
 title = {Extracting Semantic Attributes},
 year = {2009},
 yearfiled = {2006}
}

@inproceedings{xu2008inferring,
 author = {Xu, Wanhong and Zhou, Xi and Li, Lei},
 booktitle = {Proc. of ICDE},
 entrysubtype = {workshop},
 pages = {525--530},
 title = {Inferring privacy information via social relations},
 year = {2008}
}

@inproceedings{sakurai2008efficient,
 author = {Sakurai, Yasushi and Chong, Rosalynn and Li, Lei and Faloutsos, Christos},
 booktitle = {SIAM International Conference on Data Mining (SDM)},
 ee = {http://www.siam.org/proceedings/datamining/2008/dm08_58_sakurai.pdf},
 file = {:http\:/www.siam.org/proceedings/datamining/2008/dm08_58_sakurai.pdf:URL},
 pages = {632--643},
 title = {Efficient Distribution Mining and Classification},
 year = {2008}
}

@inproceedings{li2008laziness,
 author = {Li, Lei and McCann, James and Faloutsos, Christos and Pollard, Nancy},
 booktitle = {The 29th Annual Conference of the European Association for Computer Graphics (EG), Short Paper Proceedings},
 file = {:pubs/li2008laziness - Laziness is a virtue_ Motion stitching using effort minimization.pdf:PDF},
 owner = {leili},
 title = {Laziness is a virtue: Motion stitching using effort minimization},
 year = {2008}
}

@inproceedings{guo2008c,
 acmid = {1454214},
 author = {Guo, Fan and Li, Lei and Faloutsos, Christos and Xing, Eric P.},
 booktitle = {the Proceedings of the Very Large Data Bases Endowment (VLDB)},
 file = {:pubs/guo2008c - C-DEM_ a multi-modal query system for Drosophila Embryo databases.pdf:PDF},
 issn = {2150-8097},
 issue = {2},
 numpages = {4},
 owner = {leili},
 pages = {1508--1511},
 title = {{C-DEM}: a Multi-modal Query System for Drosophila Embryo Databases},
 volume = {1},
 year = {2008}
}

@inproceedings{li2006providing,
 author = {Li, Lei and Liu, Qiaoling and Tao, Yunfeng and Zhang, Lei and Zhou, Jian and Yu, Yong},
 booktitle = {Asia-Pacific Web Conference},
 file = {:pubs/li2006providing - Providing an Uncertainty Reasoning Service for Semantic Web Application.pdf:PDF},
 pages = {628--639},
 title = {Providing an Uncertainty Reasoning Service for Semantic Web Application},
 year = {2006}
}

@Unpublished{bao2019pnat,
  author  = {Yu Bao and Hao Zhou and Jiangtao Feng and Mingxuan Wang and Shujian Huang and Jiajun Chen and Lei Li},
  title   = {{PNAT}: Non-autoregressive Transformer by Position Learning},
  year    = {2019},
  journal = {ArXiv preprint},
  volume  = {abs/1911.10677},
}

@noshow{liPreprintbidirectional,
 author = {Mingwei Li and Qingyuan Jiang and Yi He and Lei Li and Wujun Li},
 title = {Bidirectional Attentive Convolutional Neural Network for Near-Duplicate Video Retrieval},
 year = {Preprint}
}

@noshow{tianPreprintconversational,
 author = {Youzhi Tian and Zhou Yu and Cheng Yang and Hang Li and Lei Li},
 title = {Conversational Contextualized Multimodal Representation Learning},
 year = {Preprint}
}

@Unpublished{wang2021lightseq2,
  author  = {Xiaohui Wang and Ying Xiong and Xian Qian and Yang Wei and Lei Li and Mingxuan Wang},
  title   = {{LightSeq2}: Accelerated Training for Transformer-based Models on GPUs},
  year    = {2021},
  journal = {ArXiv preprint},
  volume  = {abs/2110.05722},
}

@Unpublished{xu2020reciprocal,
  author  = {Minkai Xu and Mingxuan Wang and Zhouhan Lin and Hao Zhou and Weinan Zhang and Lei Li},
  title   = {Reciprocal Supervised Learning Improves Neural Machine Translation},
  year    = {2020},
  journal = {ArXiv preprint},
  volume  = {abs/2012.02975},
}

@Unpublished{xu2021survey,
  author  = {Jingjing Xu and Wangchunshu Zhou and Zhiyi Fu and Hao Zhou and Lei Li},
  title   = {A Survey on Green Deep Learning},
  year    = {2021},
  journal = {ArXiv preprint},
  volume  = {abs/2111.05193},
}

@Unpublished{yan2019cross,
  author  = {An Yan and Xin Wang and Jiangtao Feng and Lei Li and William Yang Wang},
  title   = {Cross-Lingual Vision-Language Navigation},
  year    = {2019},
  journal = {ArXiv preprint},
  volume  = {abs/1910.11301},
}

@Comment{jabref-meta: databaseType:bibtex;}