Publications

Wichern, G., Chakrabarty, A., Wang, Z.-Q., Le Roux, J., "Anomalous sound detection using attentive neural processes", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632762, October 2021, pp. 186-190.
BibTeX TR2021-129 PDF
- @inproceedings{Wichern2021oct,
- author = {Wichern, Gordon and Chakrabarty, Ankush and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{Anomalous sound detection using attentive neural processes}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {186--190},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632762},
- url = {https://www.merl.com/publications/TR2021-129}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 9751-9761.
BibTeX TR2021-096 PDF Video
- @inproceedings{Chatterjee2021oct2,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {9751--9761},
- month = oct,
- url = {https://www.merl.com/publications/TR2021-096}
- }
Chatterjee, M., Le Roux, J., Ahuja, N., Cherian, A., "Visual Scene Graphs for Audio Source Separation", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 1204-1213.
BibTeX TR2021-095 PDF Video Software
- @inproceedings{Chatterjee2021oct,
- author = {Chatterjee, Moitreya and {Le Roux}, Jonathan and Ahuja, Narendra and Cherian, Anoop},
- title = {{Visual Scene Graphs for Audio Source Separation}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {1204--1213},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-095}
- }
Cherian, A., Pais, G., Jain, S., Marks, T.K., Sullivan, A., "InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 10023-10032.
BibTeX TR2021-097 PDF Video Data Software Presentation
- @inproceedings{Cherian2021oct,
- author = {Cherian, Anoop and Pais, Goncalo and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {{InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {10023--10032},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-097}
- }
Comas, A., Marks, T.K., Mansour, H., Lohit, S., Ma, Y., Liu, X., "TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506663, September 2021, pp. 309-313.
BibTeX TR2021-099 PDF
- @inproceedings{Comas2021sep,
- author = {Comas, Armand and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas and Ma, Yechi and Liu, Xiaoming},
- title = {{TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG}},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- pages = {309--313},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506663},
- url = {https://www.merl.com/publications/TR2021-099}
- }
Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2021-571, September 2021, pp. 726-730.
BibTeX TR2021-103 PDF
- @inproceedings{Higuchi2021sep,
- author = {Higuchi, Yosuke and Moritz, Niko and {Le Roux}, Jonathan and Hori, Takaaki},
- title = {{Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition}},
- booktitle = {Interspeech},
- year = 2021,
- pages = {726--730},
- month = sep,
- doi = {10.21437/Interspeech.2021-571},
- url = {https://www.merl.com/publications/TR2021-103}
- }
Liu, B., Guo, J., Koike-Akino, T., Wang, Y., Kim, K.J., Parsons, K., Orlik, P.V., Hashimoto, S., Yuan, J., "Anomaly Detection and Diagnosis Using Pre-Processing and Time-Delay Autoencoder", IEEE International conference on emerging technologies and factory automation, September 2021.
BibTeX TR2021-107 PDF
- @inproceedings{Liu2021sep,
- author = {Liu, Bryan and Guo, Jianlin and Koike-Akino, Toshiaki and Wang, Ye and Kim, Kyeong Jin and Parsons, Kieran and Orlik, Philip V. and Hashimoto, Shigeru and Yuan, Jinhong},
- title = {{Anomaly Detection and Diagnosis Using Pre-Processing and Time-Delay Autoencoder}},
- booktitle = {IEEE International conference on emerging technologies and factory automation},
- year = 2021,
- month = sep,
- url = {https://www.merl.com/publications/TR2021-107}
- }
Hori, T., Moritz, N., Hori, C., Le Roux, J., "Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers", Interspeech, DOI: 10.21437/Interspeech.2021-1643, August 2021, pp. 2097-2101.
BibTeX TR2021-100 PDF
- @inproceedings{Hori2021aug3,
- author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers}},
- booktitle = {Interspeech},
- year = 2021,
- pages = {2097--2101},
- month = aug,
- doi = {10.21437/Interspeech.2021-1643},
- url = {https://www.merl.com/publications/TR2021-100}
- }
Hori, C., Hori, T., Le Roux, J., "Optimizing Latency for Online Video Captioning Using Audio-VisualTransformers", Interspeech, DOI: 10.21437/Interspeech.2021-1975, August 2021, pp. 586–590.
BibTeX TR2021-093 PDF
- @inproceedings{Hori2021aug2,
- author = {Hori, Chiori and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Optimizing Latency for Online Video Captioning Using Audio-VisualTransformers}},
- booktitle = {Interspeech},
- year = 2021,
- pages = {586–590},
- month = aug,
- publisher = {ISCA},
- doi = {10.21437/Interspeech.2021-1975},
- url = {https://www.merl.com/publications/TR2021-093}
- }
Moritz, N., Hori, T., Le Roux, J., "Dual Causal/Non-Causal Self-Attention for Streaming End-to-End Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2021-1693, August 2021, pp. 1822-1826.
BibTeX TR2021-094 PDF
- @inproceedings{Moritz2021aug,
- author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Dual Causal/Non-Causal Self-Attention for Streaming End-to-End Speech Recognition}},
- booktitle = {Interspeech},
- year = 2021,
- pages = {1822--1826},
- month = aug,
- doi = {10.21437/Interspeech.2021-1693},
- url = {https://www.merl.com/publications/TR2021-094}
- }
Kojima, K., Koike-Akino, T., Tang, Y., Wang, Y., "Inverse design for integrated photonics using deep neural network", Integrated Photonics Research, Silicon and Nanophotonics (IPR), DOI: 10.1364/IPRSN.2021.IF3A.6, July 2021.
BibTeX TR2021-061 PDF
- @inproceedings{Kojima2021jul,
- author = {Kojima, Keisuke and Koike-Akino, Toshiaki and Tang, Yingheng and Wang, Ye},
- title = {{Inverse design for integrated photonics using deep neural network}},
- booktitle = {Integrated Photonics Research, Silicon and Nanophotonics (IPR)},
- year = 2021,
- month = jul,
- doi = {10.1364/IPRSN.2021.IF3A.6},
- url = {https://www.merl.com/publications/TR2021-061}
- }
Wang, Y., Aeron, S., Rakin, A.S., Koike-Akino, T., Moulin, P., "Robust Machine Learning via Privacy/Rate-Distortion Theory", IEEE International Symposium on Information Theory (ISIT), DOI: 10.1109/ISIT45174.2021.9517751, July 2021.
BibTeX TR2021-082 PDF Video Presentation
- @inproceedings{Wang2021jul,
- author = {Wang, Ye and Aeron, Shuchin and Rakin, Adnan S and Koike-Akino, Toshiaki and Moulin, Pierre},
- title = {{Robust Machine Learning via Privacy/Rate-Distortion Theory}},
- booktitle = {IEEE International Symposium on Information Theory (ISIT)},
- year = 2021,
- month = jul,
- publisher = {IEEE},
- doi = {10.1109/ISIT45174.2021.9517751},
- isbn = {978-1-5386-8210-4},
- url = {https://www.merl.com/publications/TR2021-082}
- }
Cherian, A., Wang, J., "Generalized One-Class Learning Using Pairs of Complementary Classifiers", IEEE Transactions on Pattern Analysis and Machine Intelligence, DOI: 10.1109/TPAMI.2021.3092999, June 2021.
BibTeX TR2021-076 PDF Software
- @article{Cherian2021jun,
- author = {Cherian, Anoop and Wang, Jue},
- title = {{Generalized One-Class Learning Using Pairs of Complementary Classifiers}},
- journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
- year = 2021,
- month = jun,
- doi = {10.1109/TPAMI.2021.3092999},
- url = {https://www.merl.com/publications/TR2021-076}
- }
Hori, C., "Human Perspective Scene Understanding via Multimodal Sensing," Tech. Rep. TR2022-151, Audio-Visual Scene Understanding Tutorial at CVPR 2021, June 2021.
BibTeX TR2022-151 PDF Video
- @techreport{Hori2021jun,
- author = {Hori, Chiori},
- title = {{Human Perspective Scene Understanding via Multimodal Sensing}},
- institution = {Mitsubishi Electric Research Laboratories},
- year = 2021,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-151}
- }
Raychaudhuri, D.S., van Baar, J., Paul, S., Roy-Chowdhury, A.K., "Cross-domain Imitation from Observations", International Conference on Machine Learning (ICML), June 2021.
BibTeX TR2021-074 PDF
- @inproceedings{Raychaudhuri2021jun,
- author = {Raychaudhuri, Dripta S. and {van Baar}, Jeroen and Paul, Sujoy and Roy-Chowdhury, Amit K.},
- title = {{Cross-domain Imitation from Observations}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2021,
- month = jun,
- url = {https://www.merl.com/publications/TR2021-074}
- }
Moritz, N., Hori, T., Le Roux, J., "Capturing Multi-Resolution Context by Dilated Self-Attention", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9415001, June 2021, pp. 5869-5873.
BibTeX TR2021-036 PDF
- @inproceedings{Moritz2021jun,
- author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Capturing Multi-Resolution Context by Dilated Self-Attention}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {5869--5873},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9415001},
- url = {https://www.merl.com/publications/TR2021-036}
- }
Chen, S., Eldar, Y., "Time-Varying Graph Signal Inpainting via Unrolling Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9413406, June 2021.
BibTeX TR2021-070 PDF
- @inproceedings{Chen2021jun,
- author = {Chen, Siheng and Eldar, Yonina},
- title = {{Time-Varying Graph Signal Inpainting via Unrolling Networks}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9413406},
- url = {https://www.merl.com/publications/TR2021-070}
- }
Chen, S., Eldar, Y., "Graph Signaling Denoising via Unrolling Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9415073, June 2021.
BibTeX TR2021-071 PDF
- @inproceedings{Chen2021jun3,
- author = {Chen, Siheng and Eldar, Yonina},
- title = {{Graph Signaling Denoising via Unrolling Networks}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9415073},
- url = {https://www.merl.com/publications/TR2021-071}
- }
Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9413358, June 2021, pp. 46-50.
BibTeX TR2021-069 PDF
- @inproceedings{Hung2021jun,
- author = {Hung, Yun-Ning and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {46--50},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9413358},
- issn = {2379-190X},
- isbn = {978-1-7281-7605-5},
- url = {https://www.merl.com/publications/TR2021-069}
- }
Khurana, S., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9414299, June 2021, pp. 6553-6557.
BibTeX TR2021-039 PDF
- @inproceedings{Khurana2021jun,
- author = {Khurana, Sameer and Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {6553--6557},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9414299},
- url = {https://www.merl.com/publications/TR2021-039}
- }
Moritz, N., Hori, T., Le Roux, J., "Semi-Supervised Speech Recognition via Graph-Based Temporal Classification", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9414058, June 2021, pp. 6548-6552.
BibTeX TR2021-037 PDF
- @inproceedings{Moritz2021jun2,
- author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Semi-Supervised Speech Recognition via Graph-Based Temporal Classification}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {6548--6552},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9414058},
- url = {https://www.merl.com/publications/TR2021-037}
- }
Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/DSLW51110, June 2021, pp. 1-6.
BibTeX TR2021-073 PDF
- @inproceedings{Watanabe2021jun,
- author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
- title = {{The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans}},
- booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
- year = 2021,
- pages = {1--6},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/DSLW51110},
- isbn = {978-1-6654-2826-2},
- url = {https://www.merl.com/publications/TR2021-073}
- }
Kim, S., Galley, M., Gunasekara, C., Lee, S., Atkinson, A., Peng, B., Schulz, H., Gao, J., Li, J., Adada, M., Huang, M., Lastras, L., Kummerfeld, J.K., Lasecki, W.S., Hori, C., Cherian, A., Marks, T.K., Rastogi, A., Zang, X., Sunkara, S., Gupta, R., "Overview of the Eighth Dialog System Technology Challenge: DSTC8", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3078368, May 2021.
BibTeX TR2021-064 PDF
- @article{Kim2021may,
- author = {Kim, Seokhwan and Galley, Michel and Gunasekara, Chulaka and Lee, Sungjin and Atkinson, Adam and Peng, Baolin and Schulz, Hannes and Gao, Jianfeng and Li, Jinchao and Adada, Mahmoud and Huang, Minlie and Lastras, Luis and Kummerfeld, Jonathan K. and Lasecki, Walter S. and Hori, Chiori and Cherian, Anoop and Marks, Tim K. and Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav},
- title = {{Overview of the Eighth Dialog System Technology Challenge: DSTC8}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- month = may,
- doi = {10.1109/TASLP.2021.3078368},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2021-064}
- }
Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
BibTeX TR2021-042 PDF Video
- @article{Hori2021may,
- author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and {Le Roux}, Jonathan and Sullivan, Alan and Vetro, Anthony},
- title = {{マルチモーダルセンシング情報に基づくScene-aware Interaction 技術}},
- journal = {Society of Automotive Engineers of Japan},
- year = 2021,
- volume = 75,
- number = 5,
- pages = {66--71},
- month = may,
- url = {https://www.merl.com/publications/TR2021-042}
- }
Pan, C., Chen, S., Ortega, A., "Spatio-Temporal Graph Scattering Transform", International Conference on Learning Representations (ICLR), May 2021.
BibTeX TR2021-044 PDF
- @inproceedings{Pan2021may,
- author = {Pan, Chao and Chen, Siheng and Ortega, Antonio},
- title = {{Spatio-Temporal Graph Scattering Transform}},
- booktitle = {International Conference on Learning Representations (ICLR)},
- year = 2021,
- month = may,
- url = {https://www.merl.com/publications/TR2021-044}
- }