- Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F., Le Roux, J., Watanabe, S., "BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up," Tech. Rep. TR2023-068, DCASE2023 Challenge, May 2023.
BibTeX TR2023-068 PDF- @techreport{Wu2023may,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, Francois and Le Roux, Jonathan and Watanabe, Shinji},
- title = {BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up},
- institution = {DCASE2023 Challenge},
- year = 2023,
- month = may,
- url = {https://www.merl.com/publications/TR2023-068}
- }
- Chen, K., Wichern, G., Germain, F., Le Roux, J., "Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT", IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB), DOI: 10.1109/ICASSPW59220.2023.10193575, May 2023.
BibTeX TR2023-030 PDF- @inproceedings{Chen2023may,
- author = {Chen, Ke and Wichern, Gordon and Germain, Francois and Le Roux, Jonathan},
- title = {Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT},
- booktitle = {IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB)},
- year = 2023,
- month = may,
- doi = {10.1109/ICASSPW59220.2023.10193575},
- isbn = {979-8-3503-0261-5},
- url = {https://www.merl.com/publications/TR2023-030}
- }
- Aralikatti, R., Boeddeker, C., Wichern, G., Subramanian, A.S., Le Roux, J., "Reverberation as Supervision for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095022, May 2023, pp. 1-5.
BibTeX TR2023-016 PDF- @inproceedings{Aralikatti2023may,
- author = {Aralikatti, Rohith and Boeddeker, Christoph and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
- title = {Reverberation as Supervision for Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095022},
- url = {https://www.merl.com/publications/TR2023-016}
- }
- Berk, A., Ma, Y., Boufounos, P.T., Wang, P., Mansour, H., "Deep Proximal Gradient Method for Learned Convex Regularizers", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10094632, May 2023, pp. 1-5.
BibTeX TR2023-032 PDF Video- @inproceedings{Berk2023may,
- author = {Berk, Aaron and Ma, Yanting and Boufounos, Petros T. and Wang, Pu and Mansour, Hassan},
- title = {Deep Proximal Gradient Method for Learned Convex Regularizers},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10094632},
- isbn = {978-1-7281-6327-7},
- url = {https://www.merl.com/publications/TR2023-032}
- }
- Bralios, D., Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Latent Iterative Refinement for Modular Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10096897, May 2023, pp. 1-5.
BibTeX TR2023-019 PDF- @inproceedings{Bralios2023may,
- author = {Bralios, Dimitrios and Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and Le Roux, Jonathan},
- title = {Latent Iterative Refinement for Modular Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10096897},
- url = {https://www.merl.com/publications/TR2023-019}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., "Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095882, May 2023.
BibTeX TR2023-031 PDF- @inproceedings{Fujihashi2023may,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095882},
- isbn = {978-1-7281-6327-7},
- url = {https://www.merl.com/publications/TR2023-031}
- }
- Jeon, E.S., Lohit, S., Anirudh, R., Turaga, P., "Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10096888, May 2023.
BibTeX TR2023-021 PDF Presentation- @inproceedings{Jeon2023may,
- author = {Jeon, Eun Som and Lohit, Suhas and Anirudh, Rushil and Turaga, Pavan},
- title = {Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10096888},
- url = {https://www.merl.com/publications/TR2023-021}
- }
- Jin, S., Wang, P., Boufounos, P.T., Takahashi, R., Roy, S., "Spatial-Domain Object Detection Under MIMO-FMCW Automotive Radar Interference", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095409, May 2023, pp. 1-5.
BibTeX TR2023-027 PDF- @inproceedings{Jin2023may,
- author = {Jin, Sian and Wang, Pu and Boufounos, Petros T. and Takahashi, Ryuhei and Roy, Sumit},
- title = {Spatial-Domain Object Detection Under MIMO-FMCW Automotive Radar Interference},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095409},
- isbn = {978-1-7281-6327-7},
- url = {https://www.merl.com/publications/TR2023-027}
- }
- Kelkar, V.A., Liu, D., Inoue, H., Kanemaru, M., "Sparsity-driven Joint Blind Deconvolution-demodulation with Application to Motor Fault Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10094590, May 2023.
BibTeX TR2023-026 PDF- @inproceedings{Kelkar2023may,
- author = {Kelkar, Varun A. and Liu, Dehong and Inoue, Hiroshi and Kanemaru, Makoto},
- title = {Sparsity-driven Joint Blind Deconvolution-demodulation with Application to Motor Fault Detection},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- month = may,
- doi = {10.1109/ICASSP49357.2023.10094590},
- isbn = {978-1-7281-6328-4},
- url = {https://www.merl.com/publications/TR2023-026}
- }
- Petermann, D., Wichern, G., Subramanian, A.S., Le Roux, J., "Hyperbolic Audio Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10094943, May 2023, pp. 1-5.
BibTeX TR2023-017 PDF Video Software- @inproceedings{Petermann2023may,
- author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
- title = {Hyperbolic Audio Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10094943},
- url = {https://www.merl.com/publications/TR2023-017}
- }
- Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Optimal Condition Training for Target Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095128, May 2023, pp. 1-5.
BibTeX TR2023-018 PDF- @inproceedings{Tzinis2023may,
- author = {Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and Le Roux, Jonathan},
- title = {Optimal Condition Training for Target Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095128},
- url = {https://www.merl.com/publications/TR2023-018}
- }
- Ulvog, A., Rapp, J., Koike-Akino, T., Mansour, H., Boufounos, P.T., Parsons, K., "Phase Unwrapping in Correlated Noise for FMCW LIDAR Depth Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095456, May 2023, pp. 1-5.
BibTeX TR2023-028 PDF Video Presentation- @inproceedings{Ulvog2023may,
- author = {Ulvog, Alfred and Rapp, Joshua and Koike-Akino, Toshiaki and Mansour, Hassan and Boufounos, Petros T. and Parsons, Kieran},
- title = {Phase Unwrapping in Correlated Noise for FMCW LIDAR Depth Estimation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- doi = {10.1109/ICASSP49357.2023.10095456},
- isbn = {978-1-7281-6327-7},
- url = {https://www.merl.com/publications/TR2023-028}
- }
- Vaca-Rubio, C., Wang, P., Koike-Akino, T., Wang, Y., Boufounos, P.T., Popovski, P., "mmWave Wi-Fi Trajectory Estimation with Continuous-Time Neural Dynamic Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10096474, May 2023, pp. 1-5.
BibTeX TR2023-033 PDF Video- @inproceedings{Vaca-Rubio2023may,
- author = {Vaca-Rubio, Cristian and Wang, Pu and Koike-Akino, Toshiaki and Wang, Ye and Boufounos, Petros T. and Popovski, Petar},
- title = {mmWave Wi-Fi Trajectory Estimation with Continuous-Time Neural Dynamic Learning},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10096474},
- isbn = {978-1-7281-6327-7},
- url = {https://www.merl.com/publications/TR2023-033}
- }
- Yen, H., Germain, F., Wichern, G., Le Roux, J., "Cold Diffusion for Speech Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10096064, May 2023, pp. 1-5.
BibTeX TR2023-020 PDF- @inproceedings{Yen2023may,
- author = {Yen, Hao and Germain, Francois and Wichern, Gordon and Le Roux, Jonathan},
- title = {Cold Diffusion for Speech Enhancement},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10096064},
- url = {https://www.merl.com/publications/TR2023-020}
- }
- Zhao, Q., Ma, Y., Boufounos, P.T., Nabi, S., Mansour, H., "Deep Born Operator Learning for Reflection Tomographic Imaging", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095494, May 2023, pp. 1-5.
BibTeX TR2023-029 PDF Video- @inproceedings{Zhao2023may,
- author = {Zhao, Qingqing and Ma, Yanting and Boufounos, Petros T. and Nabi, Saleh and Mansour, Hassan},
- title = {Deep Born Operator Learning for Reflection Tomographic Imaging},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095494},
- url = {https://www.merl.com/publications/TR2023-029}
- }
- Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
BibTeX TR2022-166 PDF- @article{Wang2022dec2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
- title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2022,
- volume = 31,
- pages = {397--410},
- month = dec,
- doi = {10.1109/TASLP.2022.3224285},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2022-166}
- }
- Tzinis, E., Wichern, G., Subramanian, A.S., Smaragdis, P., Le Roux, J., "Heterogeneous Target Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2022-10717, September 2022, pp. 1796-1800.
BibTeX TR2022-115 PDF Video Presentation- @inproceedings{Tzinis2022sep,
- author = {Tzinis, Efthymios and Wichern, Gordon and Subramanian, Aswin Shanmugam and Smaragdis, Paris and Le Roux, Jonathan},
- title = {Heterogeneous Target Speech Separation},
- booktitle = {Interspeech},
- year = 2022,
- pages = {1796--1800},
- month = sep,
- doi = {10.21437/Interspeech.2022-10717},
- url = {https://www.merl.com/publications/TR2022-115}
- }
- Chang, X., Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747375, April 2022, pp. 7322-7326.
BibTeX TR2022-021 PDF- @inproceedings{Chang2022apr,
- author = {Chang, Xuankai and Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7322--7326},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747375},
- url = {https://www.merl.com/publications/TR2022-021}
- }
- Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746275, April 2022, pp. 7672-7676.
BibTeX TR2022-026 PDF- @inproceedings{Higuchi2022apr,
- author = {Higuchi, Yosuke and Moritz, Niko and Le Roux, Jonathan and Hori, Takaaki},
- title = {Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7672--7676},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9746275},
- url = {https://www.merl.com/publications/TR2022-026}
- }
- Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Sequence Transduction with Graph-based Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747788, April 2022, pp. 7212-7216.
BibTeX TR2022-024 PDF- @inproceedings{Moritz2022apr,
- author = {Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Sequence Transduction with Graph-based Supervision},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7212--7216},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747788},
- url = {https://www.merl.com/publications/TR2022-024}
- }
- Peng, K.-C., "Iterative Self Knowledge Distillation -- From Pothole Classification To Fine-Grained And COVID Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Gan, W.-S. and Ma, K. K., Eds., DOI: 10.1109/ICASSP43922.2022.9746470, April 2022, pp. 3139-3143.
BibTeX TR2022-020 PDF Video Presentation- @inproceedings{Peng2022apr,
- author = {Peng, Kuan-Chuan},
- title = {Iterative Self Knowledge Distillation -- From Pothole Classification To Fine-Grained And COVID Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- editor = {Gan, W.-S. and Ma, K. K.},
- pages = {3139--3143},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9746470},
- issn = {1520-6149},
- isbn = {978-1-6654-0541-6},
- url = {https://www.merl.com/publications/TR2022-020}
- }
- Petermann, D., Wichern, G., Wang, Z.-Q., Le Roux, J., "The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746005, April 2022, pp. 526-530.
BibTeX TR2022-022 PDF Software- @inproceedings{Petermann2022apr,
- author = {Petermann, Darius and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
- title = {The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {526--530},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9746005},
- url = {https://www.merl.com/publications/TR2022-022}
- }
- Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and Le Roux, Jonathan and Hori, Chiori},
- title = {Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
- Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747604, April 2022, pp. 711-715.
BibTeX TR2022-023 PDF- @inproceedings{Slizovskaia2022mar,
- author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
- title = {Locate This, Not That: Class-Conditioned Sound Event DOA Estimation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {711--715},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9747604},
- url = {https://www.merl.com/publications/TR2022-023}
- }
- Yu, J., Wang, P., Koike-Akino, T., Orlik, P.V., "Multi-Modal Recurrent Fusion for Indoor Localization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746071, April 2022.
BibTeX TR2022-018 PDF- @inproceedings{Yu2022apr,
- author = {Yu, Jianyuan and Wang, Pu and Koike-Akino, Toshiaki and Orlik, Philip V.},
- title = {Multi-Modal Recurrent Fusion for Indoor Localization},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9746071},
- issn = {2379-190X},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-018}
- }