Publications

Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF
- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and Le Roux, Jonathan and Hori, Chiori},
- title = {Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747604, April 2022, pp. 711-715.
BibTeX TR2022-023 PDF
- @inproceedings{Slizovskaia2022mar,
- author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
- title = {Locate This, Not That: Class-Conditioned Sound Event DOA Estimation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {711--715},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9747604},
- url = {https://www.merl.com/publications/TR2022-023}
- }
Yu, J., Wang, P., Koike-Akino, T., Orlik, P.V., "Multi-Modal Recurrent Fusion for Indoor Localization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746071, April 2022.
BibTeX TR2022-018 PDF
- @inproceedings{Yu2022apr,
- author = {Yu, Jianyuan and Wang, Pu and Koike-Akino, Toshiaki and Orlik, Philip V.},
- title = {Multi-Modal Recurrent Fusion for Indoor Localization},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9746071},
- issn = {2379-190X},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-018}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3129363, Vol. 29, pp. 3476-3490, December 2021.
BibTeX TR2021-144 PDF
- @article{Wang2021dec,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- volume = 29,
- pages = {3476--3490},
- month = dec,
- doi = {10.1109/TASLP.2021.3129363},
- url = {https://www.merl.com/publications/TR2021-144}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "On The Compensation Between Magnitude and Phase in Speech Separation", IEEE Signal Processing Letters, DOI: 10.1109/LSP.2021.3116502, Vol. 28, pp. 2018-2022, November 2021.
BibTeX TR2021-137 PDF
- @article{Wang2021nov2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {On The Compensation Between Magnitude and Phase in Speech Separation},
- journal = {IEEE Signal Processing Letters},
- year = 2021,
- volume = 28,
- pages = {2018--2022},
- month = nov,
- doi = {10.1109/LSP.2021.3116502},
- url = {https://www.merl.com/publications/TR2021-137}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632667, October 2021, pp. 56-60.
BibTeX TR2021-127 PDF
- @inproceedings{Wang2021oct4,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {Convolutive Prediction for Reverberant Speech Separation},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {56--60},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632667},
- url = {https://www.merl.com/publications/TR2021-127}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement", arXiv, October 2021.
BibTeX arXiv
- @article{Wang2021oct,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement},
- journal = {arXiv},
- year = 2021,
- month = oct,
- url = {https://arxiv.org/abs/2110.00570}
- }
Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2021-571, September 2021, pp. 726-730.
BibTeX TR2021-103 PDF
- @inproceedings{Higuchi2021sep,
- author = {Higuchi, Yosuke and Moritz, Niko and Le Roux, Jonathan and Hori, Takaaki},
- title = {Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition},
- booktitle = {Interspeech},
- year = 2021,
- pages = {726--730},
- month = sep,
- doi = {10.21437/Interspeech.2021-571},
- url = {https://www.merl.com/publications/TR2021-103}
- }
Hori, T., Moritz, N., Hori, C., Le Roux, J., "Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers", Interspeech, DOI: 10.21437/Interspeech.2021-1643, August 2021, pp. 2097-2101.
BibTeX TR2021-100 PDF
- @inproceedings{Hori2021aug3,
- author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
- title = {Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers},
- booktitle = {Interspeech},
- year = 2021,
- pages = {2097--2101},
- month = aug,
- doi = {10.21437/Interspeech.2021-1643},
- url = {https://www.merl.com/publications/TR2021-100}
- }
Moritz, N., Hori, T., Le Roux, J., "Dual Causal/Non-Causal Self-Attention for Streaming End-to-End Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2021-1693, August 2021, pp. 1822-1826.
BibTeX TR2021-094 PDF
- @inproceedings{Moritz2021aug,
- author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
- title = {Dual Causal/Non-Causal Self-Attention for Streaming End-to-End Speech Recognition},
- booktitle = {Interspeech},
- year = 2021,
- pages = {1822--1826},
- month = aug,
- doi = {10.21437/Interspeech.2021-1693},
- url = {https://www.merl.com/publications/TR2021-094}
- }
Moritz, N., Hori, T., Le Roux, J., "Capturing Multi-Resolution Context by Dilated Self-Attention", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9415001, June 2021, pp. 5869-5873.
BibTeX TR2021-036 PDF
- @inproceedings{Moritz2021jun,
- author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
- title = {Capturing Multi-Resolution Context by Dilated Self-Attention},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {5869--5873},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9415001},
- url = {https://www.merl.com/publications/TR2021-036}
- }
Ma, Y., Boufounos, P.T., Mansour, H., Aeron, S., "Multiview Sensing with Unknown Permutations: An Optimal Transport Approach", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9415075, June 2021, pp. 1440-1444.
BibTeX TR2021-047 PDF Video
- @inproceedings{Ma2021jun,
- author = {Ma, Yanting and Boufounos, Petros T. and Mansour, Hassan and Aeron, Shuchin},
- title = {Multiview Sensing with Unknown Permutations: An Optimal Transport Approach},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {1440--1444},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9415075},
- issn = {1520-6149},
- isbn = {978-1-7281-7606-2},
- url = {https://www.merl.com/publications/TR2021-047}
- }
Chen, S., Eldar, Y., "Time-Varying Graph Signal Inpainting via Unrolling Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9413406, June 2021.
BibTeX TR2021-070 PDF
- @inproceedings{Chen2021jun,
- author = {Chen, Siheng and Eldar, Yonina},
- title = {Time-Varying Graph Signal Inpainting via Unrolling Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9413406},
- url = {https://www.merl.com/publications/TR2021-070}
- }
Chen, S., Eldar, Y., "Graph Signaling Denoising via Unrolling Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9415073, June 2021.
BibTeX TR2021-071 PDF
- @inproceedings{Chen2021jun3,
- author = {Chen, Siheng and Eldar, Yonina},
- title = {Graph Signaling Denoising via Unrolling Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9415073},
- url = {https://www.merl.com/publications/TR2021-071}
- }
Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9413358, June 2021, pp. 46-50.
BibTeX TR2021-069 PDF
- @inproceedings{Hung2021jun,
- author = {Hung, Yun-Ning and Wichern, Gordon and Le Roux, Jonathan},
- title = {Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {46--50},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9413358},
- issn = {2379-190X},
- isbn = {978-1-7281-7605-5},
- url = {https://www.merl.com/publications/TR2021-069}
- }
Hyder, R., Mansour, H., Ma, Y., Boufounos, P.T., Wang, P., "A Consensus Equilibrium Solution for Deep Image Prior Powered by Red", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9414290, June 2021, pp. 1380-1384.
BibTeX TR2021-046 PDF
- @inproceedings{Hyder2021jun,
- author = {{Hyder, Rakib and Mansour, Hassan and Ma, Yanting and Boufounos, Petros T. and Wang, Pu}},
- title = {A Consensus Equilibrium Solution for Deep Image Prior Powered by Red},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {1380--1384},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9414290},
- issn = {2379-190X},
- isbn = {978-1-7281-7605-5},
- url = {https://www.merl.com/publications/TR2021-046}
- }
Khurana, S., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9414299, June 2021, pp. 6553-6557.
BibTeX TR2021-039 PDF
- @inproceedings{Khurana2021jun,
- author = {Khurana, Sameer and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
- title = {Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {6553--6557},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9414299},
- url = {https://www.merl.com/publications/TR2021-039}
- }
Moritz, N., Hori, T., Le Roux, J., "Semi-Supervised Speech Recognition via Graph-Based Temporal Classification", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9414058, June 2021, pp. 6548-6552.
BibTeX TR2021-037 PDF
- @inproceedings{Moritz2021jun2,
- author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
- title = {Semi-Supervised Speech Recognition via Graph-Based Temporal Classification},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {6548--6552},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9414058},
- url = {https://www.merl.com/publications/TR2021-037}
- }
Shi, L., Liu, D., Umeda, M., Hana, N., "Fusion-Based Image Correlations Framework For Strain Measurement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9414987, June 2021.
BibTeX TR2021-012 PDF Video
- @inproceedings{Shi2021feb,
- author = {Shi, Laixi and Liu, Dehong and Umeda, Masaki and Hana, Norihiko},
- title = {Fusion-Based Image Correlations Framework For Strain Measurement},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9414987},
- issn = {2379-190X},
- isbn = {978-1-7281-7606-2},
- url = {https://www.merl.com/publications/TR2021-012}
- }
Yao, G., Wang, P., Berntorp, K., Mansour, H., Boufounos, P.T., Orlik, P.V., "Extended Object Tracking with Automotive Radar Using B-Spline Chained Ellipses Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9415080, June 2021, pp. 8408-8412.
BibTeX TR2021-048 PDF Video
- @inproceedings{Yao2021jun,
- author = {{Yao, Gang and Wang, Pu and Berntorp, Karl and Mansour, Hassan and Boufounos, Petros T. and Orlik, Philip V.}},
- title = {Extended Object Tracking with Automotive Radar Using B-Spline Chained Ellipses Model},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {8408--8412},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/ICASSP39728.2021.9415080},
- issn = {2379-190X},
- isbn = {978-1-7281-7605-5},
- url = {https://www.merl.com/publications/TR2021-048}
- }
Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/DSLW51110, June 2021, pp. 1-6.
BibTeX TR2021-073 PDF
- @inproceedings{Watanabe2021jun,
- author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
- title = {The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans},
- booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
- year = 2021,
- pages = {1--6},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/DSLW51110},
- isbn = {978-1-6654-2826-2},
- url = {https://www.merl.com/publications/TR2021-073}
- }
Kim, S., Galley, M., Gunasekara, C., Lee, S., Atkinson, A., Peng, B., Schulz, H., Gao, J., Li, J., Adada, M., Huang, M., Lastras, L., Kummerfeld, J.K., Lasecki, W.S., Hori, C., Cherian, A., Marks, T.K., Rastogi, A., Zang, X., Sunkara, S., Gupta, R., "Overview of the Eighth Dialog System Technology Challenge: DSTC8", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3078368, May 2021.
BibTeX TR2021-064 PDF
- @article{Kim2021may,
- author = {Kim, Seokhwan and Galley, Michel and Gunasekara, Chulaka and Lee, Sungjin and Atkinson, Adam and Peng, Baolin and Schulz, Hannes and Gao, Jianfeng and Li, Jinchao and Adada, Mahmoud and Huang, Minlie and Lastras, Luis and Kummerfeld, Jonathan K. and Lasecki, Walter S. and Hori, Chiori and Cherian, Anoop and Marks, Tim K. and Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav},
- title = {Overview of the Eighth Dialog System Technology Challenge: DSTC8},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- month = may,
- doi = {10.1109/TASLP.2021.3078368},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2021-064}
- }
Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
BibTeX TR2021-042 PDF Video
- @article{Hori2021may,
- author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and Le Roux, Jonathan and Sullivan, Alan and Vetro, Anthony},
- title = {マルチモーダルセンシング情報に基づくScene-aware Interaction 技術},
- journal = {Society of Automotive Engineers of Japan},
- year = 2021,
- volume = 75,
- number = 5,
- pages = {66--71},
- month = may,
- url = {https://www.merl.com/publications/TR2021-042}
- }
Hori, T., Moritz, N., Hori, C., Le Roux, J., "Transformer-based Long-context End-to-end Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2020-2928, October 2020, pp. 5011-5015.
BibTeX TR2020-139 PDF Presentation
- @inproceedings{Hori2020oct,
- author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
- title = {Transformer-based Long-context End-to-end Speech Recognition},
- booktitle = {Interspeech},
- year = 2020,
- pages = {5011--5015},
- month = oct,
- doi = {10.21437/Interspeech.2020-2928},
- issn = {1990-9772},
- url = {https://www.merl.com/publications/TR2020-139}
- }
Jayashankar, T., Le Roux, J., Moulin, P., "Detecting Audio Attacks on ASR Systems with Dropout Uncertainty", Interspeech, DOI: 10.21437/Interspeech.2020-1846, October 2020, pp. 4671-4675.
BibTeX TR2020-137 PDF Presentation
- @inproceedings{Jayashankar2020oct,
- author = {Jayashankar, Tejas and Le Roux, Jonathan and Moulin, Pierre},
- title = {Detecting Audio Attacks on ASR Systems with Dropout Uncertainty},
- booktitle = {Interspeech},
- year = 2020,
- pages = {4671--4675},
- month = oct,
- doi = {10.21437/Interspeech.2020-1846},
- issn = {1990-9772},
- url = {https://www.merl.com/publications/TR2020-137}
- }