Publications

Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
BibTeX TR2016-115 PDF
- @techreport{Delcroix2016sep,
- author = {Delcroix, Marc and Watanabe, Shinji},
- title = {{Recent Advances in Distant Speech Recognition}},
- booktitle = {Interspeech Tutorials},
- institution = {Interspeech},
- year = 2016,
- month = sep,
- url = {https://www.merl.com/publications/TR2016-115}
- }
Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
BibTeX TR2016-113 PDF
- @techreport{LeRoux2016sep,
- author = {{Le Roux}, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
- title = {{Learning- Based Approaches to Speech Enhancement and Separation}},
- booktitle = {Interspeech Tutorials},
- year = 2016,
- month = sep,
- url = {https://www.merl.com/publications/TR2016-113}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/Interspeech.2016-552, September 2016, pp. 1981-1985.
BibTeX TR2016-072 PDF
- @inproceedings{Erdogan2016sep,
- author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and {Le Roux}, Jonathan},
- title = {{Improved MVDR beamforming using single-channel mask prediction networks}},
- booktitle = {Interspeech},
- year = 2016,
- pages = {1981--1985},
- month = sep,
- doi = {10.21437/Interspeech.2016-552},
- url = {https://www.merl.com/publications/TR2016-072}
- }
Hori, C., Hori, T., Watanabe, S., Hershey, J.R., "Context-Sensitive and Role-Dependent Spoken Language Understanding using Bidirectional and Attention LSTMs", Interspeech, DOI: 10.21437/Interspeech.2016-1171, September 2016, pp. 3236-3240.
BibTeX TR2016-074 PDF
- @inproceedings{Hori2016sep,
- author = {Hori, Chiori and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
- title = {{Context-Sensitive and Role-Dependent Spoken Language Understanding using Bidirectional and Attention LSTMs}},
- booktitle = {Interspeech},
- year = 2016,
- pages = {3236--3240},
- month = sep,
- doi = {10.21437/Interspeech.2016-1171},
- url = {https://www.merl.com/publications/TR2016-074}
- }
Isik, Y., Le Roux, J., Chen, Z., Watanabe, S., Hershey, J.R., "Single-Channel Multi-Speaker Separation using Deep Clustering", Interspeech, DOI: 10.21437/Interspeech.2016-1176, September 2016, pp. 545-549.
BibTeX TR2016-073 PDF
- @inproceedings{Isik2016sep,
- author = {Isik, Yusuf and {Le Roux}, Jonathan and Chen, Zhuo and Watanabe, Shinji and Hershey, John R.},
- title = {{Single-Channel Multi-Speaker Separation using Deep Clustering}},
- booktitle = {Interspeech},
- year = 2016,
- pages = {545--549},
- month = sep,
- doi = {10.21437/Interspeech.2016-1176},
- url = {https://www.merl.com/publications/TR2016-073}
- }
Zmolikova, K., Karafiat, M., Vesely, K., Delcroix, M., Watanabe, S., Burget, L., Cernocky, J.H., "Data selection by sequence summarizing neural network in mismatch condition training", Interspeech, DOI: 10.21437/Interspeech.2016-741, September 2016, pp. 2354-2358.
BibTeX TR2016-075 PDF
- @inproceedings{Zmolikova2016sep,
- author = {Zmolikova, Katerina and Karafiat, Martin and Vesely, Karel and Delcroix, Marc and Watanabe, Shinji and Burget, Lukas and Cernocky, Jan, Honza},
- title = {{Data selection by sequence summarizing neural network in mismatch condition training}},
- booktitle = {Interspeech},
- year = 2016,
- pages = {2354--2358},
- month = sep,
- doi = {10.21437/Interspeech.2016-741},
- url = {https://www.merl.com/publications/TR2016-075}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September 2016, pp. 35-39.
BibTeX TR2016-114 PDF
- @inproceedings{Hayashi2016sep,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection}},
- booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
- year = 2016,
- pages = {35--39},
- month = sep,
- url = {https://www.merl.com/publications/TR2016-114}
- }
Hori, C., Watanabe, S., Hori, T., Harsham, B.A., Hershey, J.R., Koji, Y., Fujii, Y., Furumoto, Y., "Driver Confusion Status Detection Using Recurrent Neural Networks", IEEE International Conference on Multimedia and Expo (ICME), DOI: 10.1109/ICME.2016.7552966, July 2016.
BibTeX TR2016-088 PDF
- @inproceedings{Hori2016jul,
- author = {Hori, Chiori and Watanabe, Shinji and Hori, Takaaki and Harsham, Bret A. and Hershey, John R. and Koji, Yusuke and Fujii, Youichi and Furumoto, Yuki},
- title = {{Driver Confusion Status Detection Using Recurrent Neural Networks}},
- booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
- year = 2016,
- month = jul,
- doi = {10.1109/ICME.2016.7552966},
- url = {https://www.merl.com/publications/TR2016-088}
- }
Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
BibTeX TR2016-003 PDF
- @inproceedings{Hershey2016mar,
- author = {Hershey, John R. and Chen, Zhuo and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Deep Clustering: Discriminative Embeddings for Segmentation and Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {31--35},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471631},
- url = {https://www.merl.com/publications/TR2016-003}
- }
Hori, T., Hori, C., Watanabe, S., Hershey, J.R., "Minimum Word Error Training of Long Short-Term Memory Recurrent Neural Network Language Models for Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472827, March 2016, pp. 5990-5994.
BibTeX TR2016-011 PDF
- @inproceedings{Hori2016mar,
- author = {Hori, Takaaki and Hori, Chiori and Watanabe, Shinji and Hershey, John R.},
- title = {{Minimum Word Error Training of Long Short-Term Memory Recurrent Neural Network Language Models for Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {5990--5994},
- month = mar,
- doi = {10.1109/ICASSP.2016.7472827},
- url = {https://www.merl.com/publications/TR2016-011}
- }
Vesely, K., Watanabe, S., Zmolikova, K., Karafiat, M., Burget, L., Cernocky, J.H., "Sequence Summarizing Neural Network for Speaker Adaptation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472692, March 2016, pp. 5315-5319.
BibTeX TR2016-001 PDF
- @inproceedings{Vesely2016mar,
- author = {Vesely, Karel and Watanabe, Shinji and Zmolikova, Katerina and Karafiat, Martin and Burget, Lukas and Cernocky, Jan, Honza},
- title = {{Sequence Summarizing Neural Network for Speaker Adaptation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {5315--5319},
- month = mar,
- doi = {10.1109/ICASSP.2016.7472692},
- url = {https://www.merl.com/publications/TR2016-001}
- }
Wisdom, S., Hershey, J.R., Le Roux, J., Watanabe, S., "Deep Unfolding for Multichannel Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471649, March 2016, pp. 121-125.
BibTeX TR2016-008 PDF
- @inproceedings{Wisdom2016mar,
- author = {Wisdom, Scott and Hershey, John R. and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Deep Unfolding for Multichannel Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {121--125},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471649},
- url = {https://www.merl.com/publications/TR2016-008}
- }
Xiao, X., Watanabe, S., Erdogan, H., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Mandel, M., Yu, D., "Deep Beamforming Networks for Multi-Channel Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472778, March 2016, pp. 5745-5749.
BibTeX TR2016-002 PDF
- @inproceedings{Xiao2016mar,
- author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Mandel, Michael and Yu, Dong},
- title = {{Deep Beamforming Networks for Multi-Channel Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {5745--5749},
- month = mar,
- doi = {10.1109/ICASSP.2016.7472778},
- url = {https://www.merl.com/publications/TR2016-002}
- }
Kanagawa, H., Tachioka, Y., Watanabe, S., Ishii, J., "Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), DOI: 10.1109/APSIPA.2015.7415425, December 2015, pp. 86-92.
BibTeX TR2015-150 PDF
- @inproceedings{Kanagawa2015dec,
- author = {Kanagawa, H. and Tachioka, Y. and Watanabe, S. and Ishii, J.},
- title = {{Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN}},
- booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
- year = 2015,
- pages = {86--92},
- month = dec,
- doi = {10.1109/APSIPA.2015.7415425},
- url = {https://www.merl.com/publications/TR2015-150}
- }
Barker, J., Marxer, R., Vincent, E., Watanabe, S., "The Third 'CHiME' Speech Separation and Recognition Challenge: Dataset, Task and Baselines", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.75404837, December 2015, pp. 504-511.
BibTeX TR2015-136 PDF
- @inproceedings{Barker2015dec,
- author = {Barker, J. and Marxer, R. and Vincent, E. and Watanabe, S.},
- title = {{The Third 'CHiME' Speech Separation and Recognition Challenge: Dataset, Task and Baselines}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {504--511},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.75404837},
- url = {https://www.merl.com/publications/TR2015-136}
- }
Hsiao, R., Ma, J., Hartmann, W., Karafiat, M., Grezl, F., Burget, L., Szoke, I., Cernocky, J., Watanabe, S., Chen, Z., Mallidi, S.H., Hermansky, H., Tsakalidis, S., Schwartz, R., "Robust Speech Recognition in Unknown Reverberant and Noisy Conditions", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ARSU.2015.7404841, December 2015, pp. 533-538.
BibTeX TR2015-138 PDF
- @inproceedings{Hsiao2015dec,
- author = {Hsiao, R. and Ma, J. and Hartmann, W. and Karafiat, M. and Grezl, F. and Burget, L. and Szoke, I. and Cernocky, J. and Watanabe, S. and Chen, Z. and Mallidi, S.H. and Hermansky, H. and Tsakalidis, S. and Schwartz, R.},
- title = {{Robust Speech Recognition in Unknown Reverberant and Noisy Conditions}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {533--538},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ARSU.2015.7404841},
- url = {https://www.merl.com/publications/TR2015-138}
- }
Moriya, T., Shinozaki, T., Watanabe, S., Duh, K., "Automation of System Building for State-of-the-Art Large Vocabulary Speech Recognition Using Evolution Strategy", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404852, December 2015, pp. 610-616.
BibTeX TR2015-137 PDF
- @inproceedings{Moriya2015dec,
- author = {Moriya, T. and Shinozaki, T. and Watanabe, S. and Duh, K.},
- title = {{Automation of System Building for State-of-the-Art Large Vocabulary Speech Recognition Using Evolution Strategy}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {610--616},
- month = dec,
- doi = {10.1109/ASRU.2015.7404852},
- url = {https://www.merl.com/publications/TR2015-137}
- }
Hori, T., Chen, Z., Erdogan, H., Hershey, J.R., Le Roux, J., Mitra, V., Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
BibTeX TR2015-135 PDF
- @inproceedings{Hori2015dec2,
- author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
- title = {{The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {475--481},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.7404833},
- url = {https://www.merl.com/publications/TR2015-135}
- }
Hori, C.., Hori, T., Watanabe, S., Hershey, J.R., "Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers", NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction, December 2015.
BibTeX TR2015-134 PDF
- @inproceedings{Hori2015dec1,
- author = {Hori, C. and Hori, T. and Watanabe, S. and Hershey, J.R.},
- title = {{Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers}},
- booktitle = {NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction},
- year = 2015,
- month = dec,
- url = {https://www.merl.com/publications/TR2015-134}
- }
Tawara, N., Ogawa, T., Watanabe, S., Nakamura, A., Kobayashi, T., "A Sampling-Based Speaker Clustering Using Utterance-Oriented Dirichlet Process Mixture Model and Its Evaluation on Large Scale Data", APSIPA Transactions on Signal and Information Processing, DOI: 10.1017/ATSIP.2015.19, Vol. 4, October 2015.
BibTeX TR2015-153 PDF
- @article{Tawara2015oct,
- author = {Tawara, N. and Ogawa, T. and Watanabe, S. and Nakamura, A. and Kobayashi, T.},
- title = {{A Sampling-Based Speaker Clustering Using Utterance-Oriented Dirichlet Process Mixture Model and Its Evaluation on Large Scale Data}},
- journal = {APSIPA Transactions on Signal and Information Processing},
- year = 2015,
- volume = 4,
- month = oct,
- doi = {10.1017/ATSIP.2015.19},
- issn = {2048-7703},
- url = {https://www.merl.com/publications/TR2015-153}
- }
Harsham, B.A., Watanabe, S., Esenther, A., Hershey, J.R., Le Roux, J., Luan, Y., Nikovski, D.N., Potluru, V.K., "Driver Prediction to Improve Interaction with In-Vehicle HMI", Workshop on DSP for In-Vehicle Systems and Safety (DSP), October 2015.
BibTeX TR2015-120 PDF
- @inproceedings{Harsham2015oct,
- author = {Harsham, B.A. and Watanabe, S. and Esenther, A. and Hershey, J.R. and {Le Roux}, J. and Luan, Y. and Nikovski, D.N. and Potluru, V.K.},
- title = {{Driver Prediction to Improve Interaction with In-Vehicle HMI}},
- booktitle = {Workshop on DSP for In-Vehicle Systems and Safety (DSP)},
- year = 2015,
- month = oct,
- url = {https://www.merl.com/publications/TR2015-120}
- }
Abdelaziz, A.H., Watanabe, S., Hershey, J.R., Vincent, E., Kolossa, D., "Uncertainty Propagation Through Deep Neural Networks", Interspeech, September 2015, vol. 1 or 5, pp. 3561.
BibTeX TR2015-098 PDF
- @inproceedings{Abdelaziz2015sep,
- author = {Abdelaziz, A.H. and Watanabe, S. and Hershey, J.R. and Vincent, E. and Kolossa, D.},
- title = {{Uncertainty Propagation Through Deep Neural Networks}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3561,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-098}
- }
Chen, Z., Watanabe, S., Erdogan, H., Hershey, J.R., "Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks", Interspeech, September 2015, vol. 1 of 5, pp. 1278.
BibTeX TR2015-100 PDF
- @inproceedings{Chen2015sep,
- author = {Chen, Z. and Watanabe, S. and Erdogan, H. and Hershey, J.R.},
- title = {{Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 of 5},
- pages = 1278,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-100}
- }
Luan, Y., Watanabe, S., Harsham, B.A., "Efficient Learning for Spoken Language Understanding Tasks with Word Embedding Based Pre-Training", Interspeech, September 2015, vol. 1 or 5, pp. 1398-1402.
BibTeX TR2015-097 PDF
- @inproceedings{Luan2015sep,
- author = {Luan, Y. and Watanabe, S. and Harsham, B.A.},
- title = {{Efficient Learning for Spoken Language Understanding Tasks with Word Embedding Based Pre-Training}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = {1398--1402},
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-097}
- }
Tachioka, Y., Watanabe, S., "Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features", Interspeech, September 2015, vol. 1 or 5, pp. 3541.
BibTeX TR2015-099 PDF
- @inproceedings{Tachioka2015sep,
- author = {Tachioka, Y. and Watanabe, S.},
- title = {{Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3541,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-099}
- }