Publications

Baoueb, T., Liu, H., Fontaine, M., Le Roux, J., Richard, G., "SpecDiff-GAN: A Spectrally-Shaped Noise Diffusion GAN for Speech and Music Synthesis", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10446830, March 2024, pp. 986-990.
BibTeX TR2024-013 PDF
- @inproceedings{Baoueb2024mar,
- author = {Baoueb, Teysir and Liu, Haocheng and Fontaine, Mathieu and {Le Roux}, Jonathan and Richard, Gaël},
- title = {{SpecDiff-GAN: A Spectrally-Shaped Noise Diffusion GAN for Speech and Music Synthesis}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {986--990},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10446830},
- issn = {2379-190X},
- isbn = {979-8-3503-4485-1},
- url = {https://www.merl.com/publications/TR2024-013}
- }
Liu, H., Baoueb, T., Fontaine, M., Le Roux, J., Richard, G., "GLA-Grad: A Griffin-Lim Extended Waveform Generation Diffusion Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10446058, March 2024, pp. 11611-11615.
BibTeX TR2024-014 PDF
- @inproceedings{Liu2024mar,
- author = {Liu, Haocheng and Baoueb, Teysir and Fontaine, Mathieu and {Le Roux}, Jonathan and Richard, Gaël},
- title = {{GLA-Grad: A Griffin-Lim Extended Waveform Generation Diffusion Model}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {11611--11615},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10446058},
- issn = {2379-190X},
- isbn = {979-8-3503-4485-1},
- url = {https://www.merl.com/publications/TR2024-014}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), June 2022.
BibTeX TR2022-082 PDF
- @inproceedings{Chatterjee2022jun,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-082}
- }
Cherian, A., Chatterjee, M., Ahuja, N., "Sound2Sight: Generating Visual Dynamics from Sound and Context", European Conference on Computer Vision (ECCV), Vedaldi, A. and Bischof, H. and Brox, Th. and Frahm, J.-M., Eds., August 2020.
BibTeX TR2020-121 PDF Software
- @inproceedings{Cherian2020aug,
- author = {Cherian, Anoop and Chatterjee, Moitreya and Ahuja, Narendra},
- title = {{Sound2Sight: Generating Visual Dynamics from Sound and Context}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2020,
- editor = {Vedaldi, A. and Bischof, H. and Brox, Th. and Frahm, J.-M.},
- month = aug,
- publisher = {Springer},
- url = {https://www.merl.com/publications/TR2020-121}
- }
Kavalerov, I., Wisdom, S., Erdogan, H., Patton, B., Wilson, K., Le Roux, J., Hershey, J., "Universal Sound Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937253, October 2019, pp. 170-174.
BibTeX TR2019-123 PDF
- @inproceedings{Kavalerov2019oct,
- author = {Kavalerov, Ilya and Wisdom, Scott and Erdogan, Hakan and Patton, Brian and Wilson, Kevin and {Le Roux}, Jonathan and Hershey, John},
- title = {{Universal Sound Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2019,
- pages = {170--174},
- month = oct,
- doi = {10.1109/WASPAA.2019.8937253},
- issn = {1947-1629},
- isbn = {978-1-7281-1123-0},
- url = {https://www.merl.com/publications/TR2019-123}
- }