Publications

Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2837, September 2019, pp. 76-80.
BibTeX TR2019-098 PDF
- @inproceedings{Moritz2019sep,
- author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
- title = {Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition},
- booktitle = {Interspeech},
- year = 2019,
- pages = {76--80},
- month = sep,
- doi = {10.21437/Interspeech.2019-2837},
- url = {https://www.merl.com/publications/TR2019-098}
- }
Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-3038, September 2019, pp. 3755-3759.
BibTeX TR2019-101 PDF
- @inproceedings{Seki2019sep,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
- title = {End-to-End Multilingual Multi-Speaker Speech Recognition},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3755--3759},
- month = sep,
- doi = {10.21437/Interspeech.2019-3038},
- url = {https://www.merl.com/publications/TR2019-101}
- }
Seki, H., Hori, T., Watanabe, S., Moritz, N., Le Roux, J., "Vectorized Beam Search for CTC-Attention-based Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2860, September 2019, pp. 3825-3829.
BibTeX TR2019-102 PDF
- @inproceedings{Seki2019sep2,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Moritz, Niko and Le Roux, Jonathan},
- title = {Vectorized Beam Search for CTC-Attention-based Speech Recognition},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3825--3829},
- month = sep,
- doi = {10.21437/Interspeech.2019-2860},
- url = {https://www.merl.com/publications/TR2019-102}
- }
Wichern, G., McQuinn, E., Antognini, J., Flynn, M., Zhu, R., Crow, D., Manilow, E., Le Roux, J., "WHAM!: Extending Speech Separation to Noisy Environments", Interspeech, DOI: 10.21437/Interspeech.2019-2821, September 2019, pp. 1368-1372.
BibTeX TR2019-099 PDF
- @inproceedings{Wichern2019sep,
- author = {Wichern, Gordon and McQuinn, Emmett and Antognini, Joe and Flynn, Michael and Zhu, Richard and Crow, Dwight and Manilow, Ethan and Le Roux, Jonathan},
- title = {WHAM!: Extending Speech Separation to Noisy Environments},
- booktitle = {Interspeech},
- year = 2019,
- pages = {1368--1372},
- month = sep,
- doi = {10.21437/Interspeech.2019-2821},
- url = {https://www.merl.com/publications/TR2019-099}
- }
Yalta, N., Watanabe, S., Hori, T., Nakadai, K., Ogata, T., "CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments", European Signal Processing Conference (EUSIPCO), DOI: 10.23919/EUSIPCO.2019.8902524, September 2019, pp. 1-5.
BibTeX TR2019-094 PDF
- @inproceedings{Yalta2019sep,
- author = {Yalta, Nelson and Watanabe, Shinji and Hori, Takaaki and Nakadai, Kazuhiro and Ogata, Tetsuya},
- title = {CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments},
- booktitle = {European Signal Processing Conference (EUSIPCO)},
- year = 2019,
- pages = {1--5},
- month = sep,
- doi = {10.23919/EUSIPCO.2019.8902524},
- url = {https://www.merl.com/publications/TR2019-094}
- }
Duan, C., Chen, S., Tian, D., Moura, J., Kovacevic, J., "Deep Graph Topology Learning for 3D Point Cloud Reconstruction", Graph Signal Processing Workshop (GSP), June 2019.
BibTeX TR2019-046 PDF
- @inproceedings{Duan2019jun,
- author = {Duan, Chaojing and Chen, Siheng and Tian, Dong and Moura, Jose and Kovacevic, Jelena},
- title = {Deep Graph Topology Learning for 3D Point Cloud Reconstruction},
- booktitle = {Graph Signal Processing Workshop (GSP)},
- year = 2019,
- month = jun,
- url = {https://www.merl.com/publications/TR2019-046}
- }
Aihara, R., Hanazawa, T., Okato, Y., Wichern, G., Le Roux, J., "Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682695, May 2019.
BibTeX TR2019-003 PDF
- @inproceedings{Aihara2019may,
- author = {{Aihara, Ryo and Hanazawa, Toshiyuki and Okato, Yohei and Wichern, Gordon and Le Roux, Jonathan}},
- title = {Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682695},
- url = {https://www.merl.com/publications/TR2019-003}
- }
Baskar, M.K., Burget, L., Watanabe, S., Karafiat, M., Hori, T., Cernocky, J.H., "Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682782, May 2019, pp. 5646-5650.
BibTeX TR2019-006 PDF
- @inproceedings{Baskar2019may,
- author = {Baskar, Murali Karthick and Burget, Lukas and Watanabe, Shinji and Karafiat, Martin and Hori, Takaaki and Cernocky, Jan, Honza},
- title = {Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- pages = {5646--5650},
- month = may,
- doi = {10.1109/ICASSP.2019.8682782},
- issn = {2379-190X},
- isbn = {978-1-4799-8131-1},
- url = {https://www.merl.com/publications/TR2019-006}
- }
Cho, J., Watanabe, S., Hori, T., Baskar, M.K., Inaguma, H., Villalba, J., Dehak, N., "Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683380, May 2019.
BibTeX TR2019-007 PDF
- @inproceedings{Cho2019may,
- author = {Cho, Jaejin and Watanabe, Shinji and Hori, Takaaki and Baskar, Murali Karthick and Inaguma, Hirofumi and Villalba, Jesus and Dehak, Najim},
- title = {Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683380},
- url = {https://www.merl.com/publications/TR2019-007}
- }
Das, P., Kao, J.-Y., Ortega, A., Mansour, H., Vetro, A., Sawada, T., Minezawa, A., "Hand Graph Representations for Unsupervised Segmentation of Complex Activities", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683643, May 2019.
BibTeX TR2019-009 PDF
- @inproceedings{Das2019may,
- author = {Das, Pratyusha and Kao, Jiun-Yu and Ortega, Antonio and Mansour, Hassan and Vetro, Anthony and Sawada, Tomoya and Minezawa, Akira},
- title = {Hand Graph Representations for Unsupervised Segmentation of Complex Activities},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683643},
- url = {https://www.merl.com/publications/TR2019-009}
- }
Hori, C., Alamri, H., Wang, J., Wichern, G., Hori, T., Cherian, A., Marks, T.K., Cartillier, V., Lopes, R., Das, A., Essa, I., Batra, D., Parikh, D., "End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682583, May 2019.
BibTeX TR2019-016 PDF
- @inproceedings{Hori2019may2,
- author = {Hori, Chiori and Alamri, Huda and Wang, Jue and Wichern, Gordon and Hori, Takaaki and Cherian, Anoop and Marks, Tim K. and Cartillier, Vincent and Lopes, Raphael and Das, Abhishek and Essa, Irfan and Batra, Dhruv and Parikh, Devi},
- title = {End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682583},
- url = {https://www.merl.com/publications/TR2019-016}
- }
Hori, T., Astudillo, R., Hayashi, T., Zhang, Y., Watanabe, S., Le Roux, J., "Cycle-Consistency Training for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683307, May 2019.
BibTeX TR2019-002 PDF
- @inproceedings{Hori2019may,
- author = {Hori, Takaaki and Astudillo, Ramon and Hayashi, Tomoki and Zhang, Yu and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Cycle-Consistency Training for End-to-End Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683307},
- url = {https://www.merl.com/publications/TR2019-002}
- }
Kadu, A., Mansour, H., Boufounos, P.T., Liu, D., "Reflection Tomographic Imaging of Highly Scattering Objects Using Incremental Frequency Inversion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682393, May 2019.
BibTeX TR2019-012 PDF Video
- @inproceedings{Kadu2019may,
- author = {Kadu, Ajinkya and Mansour, Hassan and Boufounos, Petros T. and Liu, Dehong},
- title = {Reflection Tomographic Imaging of Highly Scattering Objects Using Incremental Frequency Inversion},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682393},
- url = {https://www.merl.com/publications/TR2019-012}
- }
Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "The Phasebook: Building Complex Masks via Discrete Representations for Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682587, May 2019.
BibTeX TR2019-008 PDF
- @inproceedings{LeRoux2019may2,
- author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {The Phasebook: Building Complex Masks via Discrete Representations for Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682587},
- url = {https://www.merl.com/publications/TR2019-008}
- }
Le Roux, J., Wisdom, S., Erdogan, H., Hershey, J., "SDR -- Half-Baked or Well Done?", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683855, May 2019.
BibTeX TR2019-013 PDF
- @inproceedings{LeRoux2019may,
- author = {Le Roux, Jonathan and Wisdom, Scott and Erdogan, Hakan and Hershey, John},
- title = {SDR -- Half-Baked or Well Done?},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683855},
- url = {https://www.merl.com/publications/TR2019-013}
- }
Lodhi, M.A., Mansour, H., Boufounos, P.T., "Coherent Radar Imaging Using Unsynchronized Distributed Antennas", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683320, May 2019.
BibTeX TR2019-001 PDF Video
- @inproceedings{Lodhi2019may,
- author = {Lodhi, Muhammad Asad and Mansour, Hassan and Boufounos, Petros T.},
- title = {Coherent Radar Imaging Using Unsynchronized Distributed Antennas},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683320},
- url = {https://www.merl.com/publications/TR2019-001}
- }
Lohit, S., Liu, D., Mansour, H., Boufounos, P.T., "Unrolled Projected Gradient Descent for Multi-Spectral Image Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683124, May 2019.
BibTeX TR2019-010 PDF
- @inproceedings{Lohit2019may,
- author = {Lohit, Suhas and Liu, Dehong and Mansour, Hassan and Boufounos, Petros T.},
- title = {Unrolled Projected Gradient Descent for Multi-Spectral Image Fusion},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683124},
- url = {https://www.merl.com/publications/TR2019-010}
- }
Moritz, N., Hori, T., Le Roux, J., "Triggered Attention for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683510, May 2019.
BibTeX TR2019-015 PDF
- @inproceedings{Moritz2019may,
- author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
- title = {Triggered Attention for End-to-End Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683510},
- url = {https://www.merl.com/publications/TR2019-015}
- }
Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Single-Channel Source Separation via Unsupervised Spatial Clustering on Stereo Mixtures", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683198, May 2019.
BibTeX TR2019-014 PDF
- @inproceedings{Seetharaman2019may2,
- author = {Seetharaman, Prem and Wichern, Gordon and Le Roux, Jonathan and Pardo, Bryan},
- title = {Bootstrapping Single-Channel Source Separation via Unsupervised Spatial Clustering on Stereo Mixtures},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683198},
- url = {https://www.merl.com/publications/TR2019-014}
- }
Seetharaman, P., Wichern, G., Venkataramani, S., Le Roux, J., "Class-Conditional Embeddings for Music Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683007, May 2019.
BibTeX TR2019-004 PDF
- @inproceedings{Seetharaman2019may,
- author = {Seetharaman, Prem and Wichern, Gordon and Venkataramani, Shrikant and Le Roux, Jonathan},
- title = {Class-Conditional Embeddings for Music Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683007},
- url = {https://www.merl.com/publications/TR2019-004}
- }
Wang, P., Koike-Akino, T., Pajovic, M., Orlik, P.V., Tsujita, W., Gini, F., "Misspecified CRB Parameter Estimation for a Coupled Mixture of Polynomial Phase and Sinusoidal FM Signals", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682703, May 2019.
BibTeX TR2019-011 PDF
- @inproceedings{Wang2019may2,
- author = {{Wang, Pu and Koike-Akino, Toshiaki and Pajovic, Milutin and Orlik, Philip V. and Tsujita, Wataru and Gini, Fulvio}},
- title = {Misspecified CRB Parameter Estimation for a Coupled Mixture of Polynomial Phase and Sinusoidal FM Signals},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682703},
- url = {https://www.merl.com/publications/TR2019-011}
- }
Wang, X., Li, R., Mallidi, S.H., Hori, T., Watanabe, S., Hermansky, H., "Stream Attention-Based Multi-Array End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682650, May 2019.
BibTeX TR2019-005 PDF
- @inproceedings{Wang2019may,
- author = {Wang, Xiaofei and Li, Ruizhi and Mallidi, Sri Harish and Hori, Takaaki and Watanabe, Shinji and Hermansky, Hynek},
- title = {Stream Attention-Based Multi-Array End-to-End Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682650},
- url = {https://www.merl.com/publications/TR2019-005}
- }
Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "Phasebook and Friends: Leveraging discrete representations for source separation", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2019.2904183, Vol. 13, No. 2, pp. 370-382, March 2019.
BibTeX TR2018-199 PDF
- @article{LeRoux2019mar,
- author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {Phasebook and Friends: Leveraging discrete representations for source separation},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2019,
- volume = 13,
- number = 2,
- pages = {370--382},
- month = mar,
- doi = {10.1109/JSTSP.2019.2904183},
- url = {https://www.merl.com/publications/TR2018-199}
- }
d’Haro, L.F., Banchs, R., Hori, C., Li, H., "Automatic Evaluation of End-to-End Dialog Systems with Adequacy-Fluency Metrics", Special issue on DSTC6 in Computer Speech and Langauge, DOI: 10.1016/j.csl.2018.12.004, Vol. 55, pp. 200-215, March 2019.
BibTeX TR2018-195 PDF
- @article{dHaro2019mar,
- author = {d’Haro, Luis Fernando and Banchs, Rafael and Hori, Chiori and Li, Haizhou},
- title = {Automatic Evaluation of End-to-End Dialog Systems with Adequacy-Fluency Metrics},
- journal = {Special issue on DSTC6 in Computer Speech and Langauge},
- year = 2019,
- volume = 55,
- pages = {200--215},
- month = mar,
- publisher = {Elsevier},
- doi = {10.1016/j.csl.2018.12.004},
- url = {https://www.merl.com/publications/TR2018-195}
- }
Cho, J., Baskar, M.K., Li, R., Wiesner, M., Mallidi, S.H., Yalta, N., Karafiat, M., Watanabe, S., Hori, T., "Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/SLT.2018.8639655, December 2018.
BibTeX TR2018-175 PDF
- @inproceedings{Cho2018dec,
- author = {Cho, Jaejin and Baskar, Murali Karthick and Li, Ruizhi and Wiesner, Matthew and Mallidi, Sri Harish and Yalta, Nelson and Karafiat, Martin and Watanabe, Shinji and Hori, Takaaki},
- title = {Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling},
- booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
- year = 2018,
- month = dec,
- doi = {10.1109/SLT.2018.8639655},
- url = {https://www.merl.com/publications/TR2018-175}
- }