Publications

629 / 3,739 publications found.


  •  Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Sequence Transduction with Graph-based Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747788, April 2022, pp. 7212-7216.
    BibTeX TR2022-024 PDF
    • @inproceedings{Moritz2022apr,
    • author = {Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Sequence Transduction with Graph-based Supervision},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7212--7216},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9747788},
    • url = {https://www.merl.com/publications/TR2022-024}
    • }
  •  Peng, K.-C., "Iterative Self Knowledge Distillation -- From Pothole Classification To Fine-Grained And COVID Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Gan, W.-S. and Ma, K. K., Eds., DOI: 10.1109/​ICASSP43922.2022.9746470, April 2022, pp. 3139-3143.
    BibTeX TR2022-020 PDF Video Presentation
    • @inproceedings{Peng2022apr,
    • author = {Peng, Kuan-Chuan},
    • title = {Iterative Self Knowledge Distillation -- From Pothole Classification To Fine-Grained And COVID Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • editor = {Gan, W.-S. and Ma, K. K.},
    • pages = {3139--3143},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9746470},
    • issn = {1520-6149},
    • isbn = {978-1-6654-0541-6},
    • url = {https://www.merl.com/publications/TR2022-020}
    • }
  •  Petermann, D., Wichern, G., Wang, Z.-Q., Le Roux, J., "The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9746005, April 2022, pp. 526-530.
    BibTeX TR2022-022 PDF Video Software
    • @inproceedings{Petermann2022apr,
    • author = {Petermann, Darius and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {526--530},
    • month = apr,
    • doi = {10.1109/ICASSP43922.2022.9746005},
    • url = {https://www.merl.com/publications/TR2022-022}
    • }
  •  Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
    BibTeX TR2022-019 PDF
    • @inproceedings{Shah2022apr,
    • author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and Le Roux, Jonathan and Hori, Chiori},
    • title = {Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7732--7736},
    • month = apr,
    • publisher = {IEEE},
    • issn = {1520-6149},
    • isbn = {978-1-6654-0540-9},
    • url = {https://www.merl.com/publications/TR2022-019}
    • }
  •  Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747604, April 2022, pp. 711-715.
    BibTeX TR2022-023 PDF
    • @inproceedings{Slizovskaia2022mar,
    • author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {Locate This, Not That: Class-Conditioned Sound Event DOA Estimation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {711--715},
    • month = apr,
    • doi = {10.1109/ICASSP43922.2022.9747604},
    • url = {https://www.merl.com/publications/TR2022-023}
    • }
  •  Cao, W., Benosman, M., Zhang, X., Ma, R., "Domain Knowledge-Based Automated Analog Circuit Design with Deep Reinforcement Learning", AAAI Conference on Artificial Intelligence, February 2022.
    BibTeX TR2022-017 PDF
    • @inproceedings{Cao2022feb,
    • author = {Cao, Weidong and Benosman, Mouhacine and Zhang, Xuan and Ma, Rui},
    • title = {Domain Knowledge-Based Automated Analog Circuit Design with Deep Reinforcement Learning},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2022,
    • month = feb,
    • url = {https://www.merl.com/publications/TR2022-017}
    • }
  •  Hori, C., Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Le Roux, J., Marks, T.K., "Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10", The 10th Dialog System Technology Challenge Workshop at AAAI, February 2022.
    BibTeX TR2022-016 PDF
    • @inproceedings{Hori2022feb,
    • author = {Hori, Chiori and Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Le Roux, Jonathan and Marks, Tim K.},
    • title = {Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10},
    • booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI},
    • year = 2022,
    • month = feb,
    • url = {https://www.merl.com/publications/TR2022-016}
    • }
  •  Shah, A.P., Hori, T., Le Roux, J., Hori, C., "DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning", The 10th Dialog System Technology Challenge Workshop at AAAI 2022, February 2022.
    BibTeX TR2022-025 PDF
    • @inproceedings{Shah2022feb,
    • author = {{Shah, Ankit Parag and Hori, Takaaki and Le Roux, Jonathan and Hori, Chiori}},
    • title = {DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning},
    • booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI 2022},
    • year = 2022,
    • month = feb,
    • url = {https://www.merl.com/publications/TR2022-025}
    • }
  •  Cherian, A., Hori, C., Marks, T.K., Le Roux, J., "(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering", AAAI Conference on Artificial Intelligence, DOI: 10.1609/​aaai.v36i1.19922, February 2022, pp. 444-453.
    BibTeX TR2022-014 PDF Video Presentation
    • @inproceedings{Cherian2022feb,
    • author = {Cherian, Anoop and Hori, Chiori and Marks, Tim K. and Le Roux, Jonathan},
    • title = {(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering},
    • booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
    • year = 2022,
    • pages = {444--453},
    • month = feb,
    • doi = {10.1609/aaai.v36i1.19922},
    • url = {https://www.merl.com/publications/TR2022-014}
    • }
  •  Ke, L., Peng, K.-C., Lyu, S., "Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition", AAAI Conference on Artificial Intelligence, February 2022.
    BibTeX TR2022-015 PDF Presentation
    • @inproceedings{Ke2022feb,
    • author = {Ke, Lipeng and Peng, Kuan-Chuan and Lyu, Siwei},
    • title = {Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2022,
    • month = feb,
    • url = {https://www.merl.com/publications/TR2022-015}
    • }
  •  Shah, A., Sra, S., Chellappa, R., Cherian, A., "Max-Margin Contrastive Learning", AAAI Conference on Artificial Intelligence, DOI: 10.1609/​aaai.v36i8.20796, February 2022, pp. 8220-8230.
    BibTeX TR2022-013 PDF
    • @inproceedings{Shah2022feb,
    • author = {Shah, Anshul and Sra, Suvrit and Chellappa, Rama and Cherian, Anoop},
    • title = {Max-Margin Contrastive Learning},
    • booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
    • year = 2022,
    • pages = {8220--8230},
    • month = feb,
    • doi = {10.1609/aaai.v36i8.20796},
    • url = {https://www.merl.com/publications/TR2022-013}
    • }
  •  Medin, S.C., Egger, B., Cherian, A., Wang, Y., Tenenbaum, J.B., Liu, X., Marks, T.K., "MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation", AAAI Conference on Artificial Intelligence, DOI: 10.1609/​aaai.v36i2.20091, February 2022, pp. 1962-1971.
    BibTeX TR2022-011 PDF Video Data Presentation
    • @inproceedings{Medin2022feb,
    • author = {Medin, Safa C. and Egger, Bernhard and Cherian, Anoop and Wang, Ye and Tenenbaum, Joshua B. and Liu, Xiaoming and Marks, Tim K.},
    • title = {MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2022,
    • pages = {1962--1971},
    • month = feb,
    • doi = {10.1609/aaai.v36i2.20091},
    • url = {https://www.merl.com/publications/TR2022-011}
    • }
  •  Lohit, S., Jones, M.J., "Model Compression Using Optimal Transport", IEEE Winter Conference on Applications of Computer Vision (WACV), January 2022.
    BibTeX TR2022-006 PDF Presentation
    • @inproceedings{Lohit2022jan,
    • author = {Lohit, Suhas and Jones, Michael J.},
    • title = {Model Compression Using Optimal Transport},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2022,
    • month = jan,
    • publisher = {CVF OpenAccess},
    • url = {https://www.merl.com/publications/TR2022-006}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2021.3129363, Vol. 29, pp. 3476-3490, December 2021.
    BibTeX TR2021-144 PDF
    • @article{Wang2021dec,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2021,
    • volume = 29,
    • pages = {3476--3490},
    • month = dec,
    • doi = {10.1109/TASLP.2021.3129363},
    • url = {https://www.merl.com/publications/TR2021-144}
    • }
  •  Yu, X., van Baar, J., Chen, S., Sullivan, A., "Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph", International Conference on 3D Vision (3DV), DOI: 10.1109/​3DV53792.2021.00060, December 2021, pp. 505-514.
    BibTeX TR2021-143 PDF
    • @inproceedings{Yu2021dec,
    • author = {Yu, Xin and van Baar, Jeroen and Chen, Siheng and Sullivan, Alan},
    • title = {Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph},
    • booktitle = {International Conference on 3D Vision (3DV)},
    • year = 2021,
    • pages = {505--514},
    • month = dec,
    • doi = {10.1109/3DV53792.2021.00060},
    • url = {https://www.merl.com/publications/TR2021-143}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "On The Compensation Between Magnitude and Phase in Speech Separation", IEEE Signal Processing Letters, DOI: 10.1109/​LSP.2021.3116502, Vol. 28, pp. 2018-2022, November 2021.
    BibTeX TR2021-137 PDF
    • @article{Wang2021nov2,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {On The Compensation Between Magnitude and Phase in Speech Separation},
    • journal = {IEEE Signal Processing Letters},
    • year = 2021,
    • volume = 28,
    • pages = {2018--2022},
    • month = nov,
    • doi = {10.1109/LSP.2021.3116502},
    • url = {https://www.merl.com/publications/TR2021-137}
    • }
  •  Demir, A., Koike-Akino, T., Wang, Y., Erdogmus, D., Haruna, M., "EEG-GNN: Graph Neural Networks for Classification of Electroencephalogram (EEG) Signals", International IEEE EMBS Conference on Neural Engineering, DOI: 10.1109/​EMBC46164.2021.9630194, October 2021.
    BibTeX TR2021-136 PDF Video Presentation
    • @inproceedings{Demir2021oct,
    • author = {Demir, Andac and Koike-Akino, Toshiaki and Wang, Ye and Erdogmus, Deniz and Haruna, Masaki},
    • title = {EEG-GNN: Graph Neural Networks for Classification of Electroencephalogram (EEG) Signals},
    • booktitle = {International IEEE EMBS Conference on Neural Engineering},
    • year = 2021,
    • month = oct,
    • publisher = {IEEE},
    • doi = {10.1109/EMBC46164.2021.9630194},
    • issn = {2694-0604},
    • isbn = {978-1-7281-1179-7},
    • url = {https://www.merl.com/publications/TR2021-136}
    • }
  •  Rakin, A.S., Wang, Y., Aeron, S., Koike-Akino, T., Moulin, P., Parsons, K., "Towards Universal Adversarial Examples and Defenses", IEEE Information Theory Workshop, DOI: 10.1109/​ITW48936.2021.9611439, October 2021.
    BibTeX TR2021-125 PDF Video
    • @inproceedings{Rakin2021oct,
    • author = {Rakin, Adnan S and Wang, Ye and Aeron, Shuchin and Koike-Akino, Toshiaki and Moulin, Pierre and Parsons, Kieran},
    • title = {Towards Universal Adversarial Examples and Defenses},
    • booktitle = {IEEE Information Theory Workshop},
    • year = 2021,
    • month = oct,
    • publisher = {IEEE},
    • doi = {10.1109/ITW48936.2021.9611439},
    • isbn = {978-1-6654-0312-2},
    • url = {https://www.merl.com/publications/TR2021-125}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA52581.2021.9632667, October 2021, pp. 56-60.
    BibTeX TR2021-127 PDF
    • @inproceedings{Wang2021oct4,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Convolutive Prediction for Reverberant Speech Separation},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2021,
    • pages = {56--60},
    • month = oct,
    • publisher = {IEEE},
    • doi = {10.1109/WASPAA52581.2021.9632667},
    • url = {https://www.merl.com/publications/TR2021-127}
    • }
  •  Wichern, G., Chakrabarty, A., Wang, Z.-Q., Le Roux, J., "Anomalous sound detection using attentive neural processes", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA52581.2021.9632762, October 2021, pp. 186-190.
    BibTeX TR2021-129 PDF
    • @inproceedings{Wichern2021oct,
    • author = {Wichern, Gordon and Chakrabarty, Ankush and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {Anomalous sound detection using attentive neural processes},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2021,
    • pages = {186--190},
    • month = oct,
    • publisher = {IEEE},
    • doi = {10.1109/WASPAA52581.2021.9632762},
    • url = {https://www.merl.com/publications/TR2021-129}
    • }
  •  Chatterjee, M., Ahuja, N., Cherian, A., "A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 9751-9761.
    BibTeX TR2021-096 PDF Video
    • @inproceedings{Chatterjee2021oct2,
    • author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
    • title = {A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2021,
    • pages = {9751--9761},
    • month = oct,
    • url = {https://www.merl.com/publications/TR2021-096}
    • }
  •  Chatterjee, M., Le Roux, J., Ahuja, N., Cherian, A., "Visual Scene Graphs for Audio Source Separation", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 1204-1213.
    BibTeX TR2021-095 PDF Video Software
    • @inproceedings{Chatterjee2021oct,
    • author = {Chatterjee, Moitreya and Le Roux, Jonathan and Ahuja, Narendra and Cherian, Anoop},
    • title = {Visual Scene Graphs for Audio Source Separation},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2021,
    • pages = {1204--1213},
    • month = oct,
    • publisher = {CVF},
    • url = {https://www.merl.com/publications/TR2021-095}
    • }
  •  Cherian, A., Pais, G., Jain, S., Marks, T.K., Sullivan, A., "InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 10023-10032.
    BibTeX TR2021-097 PDF Video Data Software Presentation
    • @inproceedings{Cherian2021oct,
    • author = {Cherian, Anoop and Pais, Goncalo and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
    • title = {InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2021,
    • pages = {10023--10032},
    • month = oct,
    • publisher = {CVF},
    • url = {https://www.merl.com/publications/TR2021-097}
    • }
  •  Comas, A., Marks, T.K., Mansour, H., Lohit, S., Ma, Y., Liu, X., "TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/​ICIP42928.2021.9506663, September 2021, pp. 309-313.
    BibTeX TR2021-099 PDF
    • @inproceedings{Comas2021sep,
    • author = {Comas, Armand and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas and Ma, Yechi and Liu, Xiaoming},
    • title = {TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG},
    • booktitle = {IEEE International Conference on Image Processing (ICIP)},
    • year = 2021,
    • pages = {309--313},
    • month = sep,
    • publisher = {IEEE},
    • doi = {10.1109/ICIP42928.2021.9506663},
    • url = {https://www.merl.com/publications/TR2021-099}
    • }
  •  Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2021-571, September 2021, pp. 726-730.
    BibTeX TR2021-103 PDF
    • @inproceedings{Higuchi2021sep,
    • author = {Higuchi, Yosuke and Moritz, Niko and Le Roux, Jonathan and Hori, Takaaki},
    • title = {Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2021,
    • pages = {726--730},
    • month = sep,
    • doi = {10.21437/Interspeech.2021-571},
    • url = {https://www.merl.com/publications/TR2021-103}
    • }