Publications

Kavalerov, I., Wisdom, S., Erdogan, H., Patton, B., Wilson, K., Le Roux, J., Hershey, J., "Universal Sound Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937253, October 2019, pp. 170-174.
BibTeX TR2019-123 PDF
- @inproceedings{Kavalerov2019oct,
- author = {Kavalerov, Ilya and Wisdom, Scott and Erdogan, Hakan and Patton, Brian and Wilson, Kevin and Le Roux, Jonathan and Hershey, John},
- title = {Universal Sound Separation},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2019,
- pages = {170--174},
- month = oct,
- doi = {10.1109/WASPAA.2019.8937253},
- issn = {1947-1629},
- isbn = {978-1-7281-1123-0},
- url = {https://www.merl.com/publications/TR2019-123}
- }
Le Roux, J., Wisdom, S., Erdogan, H., Hershey, J., "SDR -- Half-Baked or Well Done?", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683855, May 2019.
BibTeX TR2019-013 PDF
- @inproceedings{LeRoux2019may,
- author = {Le Roux, Jonathan and Wisdom, Scott and Erdogan, Hakan and Hershey, John},
- title = {SDR -- Half-Baked or Well Done?},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683855},
- url = {https://www.merl.com/publications/TR2019-013}
- }
Erdogan, H., Hershey, J., Watanabe, S., Le Roux, J., "Deep recurrent networks for separation and recognition of single-channel speech in non-stationary background audio" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 7, Springer, July 2018.
BibTeX
- @incollection{Erdogan2018jul,
- author = {Erdogan, Hakan and Hershey, John and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Deep recurrent networks for separation and recognition of single-channel speech in non-stationary background audio},
- booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
- year = 2018,
- editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
- chapter = 7,
- month = jul,
- publisher = {Springer},
- isbn = {978-3-319-64680-0}
- }
Xiao, X., Watanabe, S., Erdogan, H., Mandel, M., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Yu, D., "Discriminative beamforming with phase aware neural networks for speech enhancement and recognition" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 4, Springer, July 9, 2018.
BibTeX
- @incollection{Xiao2018jul2,
- author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Mandel, Michael and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Yu, Dong},
- title = {Discriminative beamforming with phase aware neural networks for speech enhancement and recognition},
- booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
- year = 2018,
- editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
- chapter = 4,
- month = jul,
- publisher = {Springer}
- }
Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-012 PDF
- @inproceedings{Meng2017mar,
- author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
- title = {Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-012}
- }
Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
BibTeX TR2016-113 PDF
- @techreport{LeRoux2016sep,
- author = {Le Roux, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
- title = {Learning-Based Approaches to Speech Enhancement and Separation},
- booktitle = {Interspeech Tutorials},
- year = 2016,
- month = sep,
- url = {https://www.merl.com/publications/TR2016-113}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/Interspeech.2016-552, September 2016, pp. 1981-1985.
BibTeX TR2016-072 PDF
- @inproceedings{Erdogan2016sep,
- author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and Le Roux, Jonathan},
- title = {Improved MVDR beamforming using single-channel mask prediction networks},
- booktitle = {Interspeech},
- year = 2016,
- pages = {1981--1985},
- month = sep,
- doi = {10.21437/Interspeech.2016-552},
- url = {https://www.merl.com/publications/TR2016-072}
- }
Xiao, X., Watanabe, S., Erdogan, H., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Mandel, M., Yu, D., "Deep Beamforming Networks for Multi-Channel Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472778, March 2016, pp. 5745-5749.
BibTeX TR2016-002 PDF
- @inproceedings{Xiao2016mar,
- author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Mandel, Michael and Yu, Dong},
- title = {Deep Beamforming Networks for Multi-Channel Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {5745--5749},
- month = mar,
- doi = {10.1109/ICASSP.2016.7472778},
- url = {https://www.merl.com/publications/TR2016-002}
- }
Hori, T., Chen, Z., Erdogan, H., Hershey, J.R., Le Roux, J., Mitra, V., Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
BibTeX TR2015-135 PDF
- @inproceedings{Hori2015dec2,
- author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
- title = {The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {475--481},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.7404833},
- url = {https://www.merl.com/publications/TR2015-135}
- }
Chen, Z., Watanabe, S., Erdogan, H., Hershey, J.R., "Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks", Interspeech, September 2015, vol. 1 of 5, pp. 1278.
BibTeX TR2015-100 PDF
- @inproceedings{Chen2015sep,
- author = {Chen, Z. and Watanabe, S. and Erdogan, H. and Hershey, J.R.},
- title = {Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 of 5},
- pages = 1278,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-100}
- }
Weninger, F.J., Erdogan, H., Watanabe, S., Vincent, E., Le Roux, J., Hershey, J.R., Schuller, B.W., "Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR", Latent Variable Analysis and Signal Separation Conference (LVA), DOI: 10.1007/978-3-319-22482-4_11, August 2015, vol. 9237, pp. 91-99.
BibTeX TR2015-094 PDF
- @inproceedings{Weninger2015aug,
- author = {Weninger, F.J. and Erdogan, H. and Watanabe, S. and Vincent, E. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.W.},
- title = {Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR},
- booktitle = {Latent Variable Analysis and Signal Separation Conference (LVA)},
- year = 2015,
- volume = 9237,
- pages = {91--99},
- month = aug,
- doi = {10.1007/978-3-319-22482-4_11},
- isbn = {978-3-319-22482-4},
- url = {https://www.merl.com/publications/TR2015-094}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Le Roux, J., "Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178061, April 2015, pp. 708-712.
BibTeX TR2015-031 PDF
- @inproceedings{Erdogan2015apr,
- author = {Erdogan, H. and Hershey, J.R. and Watanabe, S. and {Le Roux}, J.},
- title = {Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {708--712},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178061},
- url = {https://www.merl.com/publications/TR2015-031}
- }
Topkaya, I.S., Erdogan, H., Porikli, F., "Detecting and Tracking Unknown Number of Objects with Dirichlet Process Mixture Models and Markov Random Fields", International Symposium on Visual Computing (ISVC), July 2013.
BibTeX TR2013-078 PDF
- @inproceedings{Topkaya2013jul,
- author = {Topkaya, I.S. and Erdogan, H. and Porikli, F.},
- title = {Detecting and Tracking Unknown Number of Objects with Dirichlet Process Mixture Models and Markov Random Fields},
- booktitle = {International Symposium on Visual Computing (ISVC)},
- year = 2013,
- month = jul,
- url = {https://www.merl.com/publications/TR2013-078}
- }