Publications

Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
BibTeX TR2016-115 PDF
- @techreport{Delcroix2016sep,
- author = {Delcroix, Marc and Watanabe, Shinji},
- title = {{Recent Advances in Distant Speech Recognition}},
- booktitle = {Interspeech Tutorials},
- institution = {Interspeech},
- year = 2016,
- month = sep,
- url = {https://www.merl.com/publications/TR2016-115}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/Interspeech.2016-552, September 2016, pp. 1981-1985.
BibTeX TR2016-072 PDF
- @inproceedings{Erdogan2016sep,
- author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and {Le Roux}, Jonathan},
- title = {{Improved MVDR beamforming using single-channel mask prediction networks}},
- booktitle = {Interspeech},
- year = 2016,
- pages = {1981--1985},
- month = sep,
- doi = {10.21437/Interspeech.2016-552},
- url = {https://www.merl.com/publications/TR2016-072}
- }
Kamilov, U., Mansour, H., "Learning MMSE Optimal Thresholds for FISTA", International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST), August 2016.
BibTeX TR2016-111 PDF
- @inproceedings{Kamilov2016aug,
- author = {Kamilov, Ulugbek and Mansour, Hassan},
- title = {{Learning MMSE Optimal Thresholds for FISTA}},
- booktitle = {International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST)},
- year = 2016,
- month = aug,
- url = {https://www.merl.com/publications/TR2016-111}
- }
Vemulapalli, R., Tuzel, C.O., Liu, M.-Y., "Deep Gaussian Conditional Random Field Network: A Model-based Deep Network for Discriminative Denoising", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2016.351, June 2016, pp. 4801-4809.
BibTeX TR2016-079 PDF
- @inproceedings{Vemulapalli2016jun2,
- author = {Vemulapalli, Raviteja and Tuzel, C. Oncel and Liu, Ming-Yu},
- title = {{Deep Gaussian Conditional Random Field Network: A Model-based Deep Network for Discriminative Denoising}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2016,
- pages = {4801--4809},
- month = jun,
- doi = {10.1109/CVPR.2016.351},
- url = {https://www.merl.com/publications/TR2016-079}
- }
Vemulapalli, R., Tuzel, C.O., Liu, M.-Y., Chellappa, R., "Gaussian Conditional Random Field Network for Semantic Segmentation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016, pp. 3224-3233.
BibTeX TR2016-078 PDF
- @inproceedings{Vemulapalli2016jun,
- author = {Vemulapalli, Raviteja and Tuzel, C. Oncel and Liu, Ming-Yu and Chellappa, Rama},
- title = {{Gaussian Conditional Random Field Network for Semantic Segmentation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2016,
- pages = {3224--3233},
- month = jun,
- url = {https://www.merl.com/publications/TR2016-078}
- }
Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
BibTeX TR2016-003 PDF
- @inproceedings{Hershey2016mar,
- author = {Hershey, John R. and Chen, Zhuo and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Deep Clustering: Discriminative Embeddings for Segmentation and Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {31--35},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471631},
- url = {https://www.merl.com/publications/TR2016-003}
- }
Wisdom, S., Hershey, J.R., Le Roux, J., Watanabe, S., "Deep Unfolding for Multichannel Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471649, March 2016, pp. 121-125.
BibTeX TR2016-008 PDF
- @inproceedings{Wisdom2016mar,
- author = {Wisdom, Scott and Hershey, John R. and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Deep Unfolding for Multichannel Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {121--125},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471649},
- url = {https://www.merl.com/publications/TR2016-008}
- }
Xiao, X., Watanabe, S., Erdogan, H., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Mandel, M., Yu, D., "Deep Beamforming Networks for Multi-Channel Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472778, March 2016, pp. 5745-5749.
BibTeX TR2016-002 PDF
- @inproceedings{Xiao2016mar,
- author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Mandel, Michael and Yu, Dong},
- title = {{Deep Beamforming Networks for Multi-Channel Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {5745--5749},
- month = mar,
- doi = {10.1109/ICASSP.2016.7472778},
- url = {https://www.merl.com/publications/TR2016-002}
- }
Liu, M.-Y., Mallya, A., Tuzel, C.O., Chen, X., "Unsupervised Network Pretraining via Encoding Human Design", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/WACV.2016.7477698, March 2016, pp. 1-9.
BibTeX TR2016-022 PDF
- @inproceedings{Liu2016mar,
- author = {Liu, Ming-Yu and Mallya, Arun and Tuzel, C. Oncel and Chen, Xi},
- title = {{Unsupervised Network Pretraining via Encoding Human Design}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2016,
- pages = {1--9},
- month = mar,
- doi = {10.1109/WACV.2016.7477698},
- url = {https://www.merl.com/publications/TR2016-022}
- }
Kanagawa, H., Tachioka, Y., Watanabe, S., Ishii, J., "Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), DOI: 10.1109/APSIPA.2015.7415425, December 2015, pp. 86-92.
BibTeX TR2015-150 PDF
- @inproceedings{Kanagawa2015dec,
- author = {Kanagawa, H. and Tachioka, Y. and Watanabe, S. and Ishii, J.},
- title = {{Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN}},
- booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
- year = 2015,
- pages = {86--92},
- month = dec,
- doi = {10.1109/APSIPA.2015.7415425},
- url = {https://www.merl.com/publications/TR2015-150}
- }
Hori, T., Chen, Z., Erdogan, H., Hershey, J.R., Le Roux, J., Mitra, V., Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
BibTeX TR2015-135 PDF
- @inproceedings{Hori2015dec2,
- author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
- title = {{The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {475--481},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.7404833},
- url = {https://www.merl.com/publications/TR2015-135}
- }
Abdelaziz, A.H., Watanabe, S., Hershey, J.R., Vincent, E., Kolossa, D., "Uncertainty Propagation Through Deep Neural Networks", Interspeech, September 2015, vol. 1 or 5, pp. 3561.
BibTeX TR2015-098 PDF
- @inproceedings{Abdelaziz2015sep,
- author = {Abdelaziz, A.H. and Watanabe, S. and Hershey, J.R. and Vincent, E. and Kolossa, D.},
- title = {{Uncertainty Propagation Through Deep Neural Networks}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3561,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-098}
- }
Tachioka, Y., Watanabe, S., "Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features", Interspeech, September 2015, vol. 1 or 5, pp. 3541.
BibTeX TR2015-099 PDF
- @inproceedings{Tachioka2015sep,
- author = {Tachioka, Y. and Watanabe, S.},
- title = {{Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3541,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-099}
- }
Liu, M.-Y., Lin, A., Ramalingam, S., Tuzel, C.O., "Layered Interpretation of Street View Images", Robotics: Science and Systems Conference (RSS), DOI: 10.15607/RSS.2015.XI.025, July 2015.
BibTeX TR2015-073 PDF
- @inproceedings{Liu2015jul,
- author = {Liu, M.-Y. and Lin, A. and Ramalingam, S. and Tuzel, C.O.},
- title = {{Layered Interpretation of Street View Images}},
- booktitle = {Robotics: Science and Systems Conference (RSS)},
- year = 2015,
- month = jul,
- doi = {10.15607/RSS.2015.XI.025},
- url = {https://www.merl.com/publications/TR2015-073}
- }
Tachioka, Y., Narita, T., Watanabe, S., "Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments", EURASIP Journal on Advances in Signal Processing, DOI: 10.1186/s13634-015-0241-y, June 2015.
BibTeX TR2015-152 PDF
- @article{Tachioka2015jun,
- author = {Tachioka, Y. and Narita, T. and Watanabe, S.},
- title = {{Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments}},
- journal = {EURASIP Journal on Advances in Signal Processing},
- year = 2015,
- month = jun,
- doi = {10.1186/s13634-015-0241-y},
- url = {https://www.merl.com/publications/TR2015-152}
- }
Ahmed, E., Jones, M.J., Marks, T.K., "An Improved Deep Learning Architecture for Person Re-Identification", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2015.7299016, June 2015, pp. 3908-3916.
BibTeX TR2015-076 PDF
- @inproceedings{Jones2015jun,
- author = {Ahmed, E. and Jones, M.J. and Marks, T.K.},
- title = {{An Improved Deep Learning Architecture for Person Re-Identification}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2015,
- pages = {3908--3916},
- month = jun,
- doi = {10.1109/CVPR.2015.7299016},
- url = {https://www.merl.com/publications/TR2015-076}
- }
Sharma, A., Tuzel, C.O., Jacobs, D., "Deep Hierarchical Parsing for Semantic Segmentation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2015.7298651, June 2015, pp. 530-538.
BibTeX TR2015-057 PDF Video
- @inproceedings{Sharma2015jun,
- author = {Sharma, A. and Tuzel, C.O. and Jacobs, D.},
- title = {{Deep Hierarchical Parsing for Semantic Segmentation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2015,
- pages = {530--538},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR.2015.7298651},
- issn = {1063-6919},
- url = {https://www.merl.com/publications/TR2015-057}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Le Roux, J., "Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178061, April 2015, pp. 708-712.
BibTeX TR2015-031 PDF
- @inproceedings{Erdogan2015apr,
- author = {Erdogan, H. and Hershey, J.R. and Watanabe, S. and {Le Roux}, J.},
- title = {{Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {708--712},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178061},
- url = {https://www.merl.com/publications/TR2015-031}
- }
Le Roux, J., Hershey, J.R., Weninger, F.J., "Deep NMF for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7177933, April 2015, pp. 66-70.
BibTeX TR2015-029 PDF
- @inproceedings{LeRoux2015apr1,
- author = {{Le Roux}, J. and Hershey, J.R. and Weninger, F.J.},
- title = {{Deep NMF for Speech Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {66--70},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7177933},
- url = {https://www.merl.com/publications/TR2015-029}
- }
Shinozaki, T., Watanabe, S., "Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178918, April 2015, pp. 4979-4983.
BibTeX TR2015-032 PDF
- @inproceedings{Shinozaki2015apr,
- author = {Shinozaki, T. and Watanabe, S.},
- title = {{Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {4979--4983},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178918},
- url = {https://www.merl.com/publications/TR2015-032}
- }
Sharma, A., Tuzel, O., Liu, M.-Y., "Recursive Context Propagation Network for Semantic Scene Labeling", Advances in Neural Information Processing Systems (NIPS), December 2014.
BibTeX TR2014-093 PDF
- @inproceedings{Sharma2014dec,
- author = {Sharma, A. and Tuzel, O. and Liu, M.-Y.},
- title = {{Recursive Context Propagation Network for Semantic Scene Labeling}},
- booktitle = {Advances in Neural Information Processing Systems (NIPS)},
- year = 2014,
- month = dec,
- url = {https://www.merl.com/publications/TR2014-093}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Schuller, B., "Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation", IEEE Global Conference on Signal and Information Processing (GlobalSIP), DOI: 10.1109/GlobalSIP.2014.7032183, December 2014, pp. 577-581.
BibTeX TR2014-104 PDF
- @inproceedings{Weninger2014dec,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.},
- title = {{Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation}},
- booktitle = {IEEE Global Conference on Signal and Information Processing (GlobalSIP)},
- year = 2014,
- pages = {577--581},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GlobalSIP.2014.7032183},
- url = {https://www.merl.com/publications/TR2014-104}
- }
Hershey, J.R., Le Roux, J., Weninger, F., "Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures", arXiv, August 2014.
BibTeX arXiv
- @article{Hershey2014aug,
- author = {Hershey, J.R. and {Le Roux}, J. and Weninger, F.},
- title = {{Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures}},
- journal = {arXiv},
- year = 2014,
- month = aug,
- url = {https://arxiv.org/abs/1409.2574}
- }
Tachioka, Y., Narita, T., Weninger, F., Watanabe, S., "Dual system combination approach for various reverberant environments with dereverberation techniques", IEEE REVERB Workshop, May 2014.
BibTeX TR2014-032 PDF
- @inproceedings{Tachioka2014may,
- author = {Tachioka, Y. and Narita, T. and Weninger, F. and Watanabe, S.},
- title = {{Dual system combination approach for various reverberant environments with dereverberation techniques}},
- booktitle = {IEEE REVERB Workshop},
- year = 2014,
- month = may,
- url = {https://www.merl.com/publications/TR2014-032}
- }
Weninger, F., Watanabe, S., Le Roux, J., Hershey, J.R., Tachioka, Y., Geiger, J., Schuller, B., Rigoll, G., "The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement", IEEE REVERB Workshop, May 2014.
BibTeX TR2014-033 PDF
- @inproceedings{Weninger2014may2,
- author = {Weninger, F. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R. and Tachioka, Y. and Geiger, J. and Schuller, B. and Rigoll, G.},
- title = {{The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement}},
- booktitle = {IEEE REVERB Workshop},
- year = 2014,
- month = may,
- url = {https://www.merl.com/publications/TR2014-033}
- }