Publications

200 / 3,808 publications found.


  •  Zhang, Z., Xu, W., Sullivan, A., "Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning", arXiv, March 2018.
    BibTeX arXiv
    • @article{Zhang2018mar,
    • author = {Zhang, Ziming and Xu, Wenju and Sullivan, Alan},
    • title = {{Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning}},
    • journal = {arXiv},
    • year = 2018,
    • month = mar,
    • url = {https://arxiv.org/abs/1903.00760}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {{Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming}},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {{Hybrid CTC/Attention Architecture for End-to-End Speech Recognition}},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
    BibTeX TR2017-132 PDF Video
    • @inproceedings{Hori2017aug,
    • author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
    • title = {{Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM}},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-132}
    • }
  •  Abbeloos, W., Caccamo, S., Ataer-Cansizoglu, E., Taguchi, Y., Feng, C., Lee, T.-Y., "Detecting and Grouping Identical Objects for Region Proposal and Classification", CVPR Workshop on Deep Learning for Robotic Vision, DOI: 10.1109/​CVPRW.2017.76, July 2017.
    BibTeX TR2017-099 PDF
    • @inproceedings{Abbeloos2017jul,
    • author = {Abbeloos, Wim and Caccamo, Sergio and Ataer-Cansizoglu, Esra and Taguchi, Yuichi and Feng, Chen and Lee, Teng-Yok},
    • title = {{Detecting and Grouping Identical Objects for Region Proposal and Classification}},
    • booktitle = {CVPR Workshop on Deep Learning for Robotic Vision},
    • year = 2017,
    • month = jul,
    • doi = {10.1109/CVPRW.2017.76},
    • url = {https://www.merl.com/publications/TR2017-099}
    • }
  •  Yu, Z., Feng, C., Liu, M.-Y., Ramalingam, S., "CASENet: Deep Category-Aware Semantic Edge Detection", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/​CVPR.2017.191, July 2017.
    BibTeX TR2017-100 PDF Video Data Software
    • @inproceedings{Yu2017jul,
    • author = {Yu, Zhiding and Feng, Chen and Liu, Ming-Yu and Ramalingam, Srikumar},
    • title = {{CASENet: Deep Category-Aware Semantic Edge Detection}},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2017,
    • month = jul,
    • doi = {10.1109/CVPR.2017.191},
    • url = {https://www.merl.com/publications/TR2017-100}
    • }
  •  Feng, C., Liu, M.-Y., Kao, C.-C., Lee, T.-Y., "Deep Active Learning for Civil Infrastructure Defect Detection and Classification", International Workshop on Computing in Civil Engineering (IWCCE), June 2017.
    BibTeX TR2017-034 PDF
    • @inproceedings{Feng2017jun,
    • author = {Feng, Chen and Liu, Ming-Yu and Kao, Chieh-Chi and Lee, Teng-Yok},
    • title = {{Deep Active Learning for Civil Infrastructure Defect Detection and Classification}},
    • booktitle = {International Workshop on Computing in Civil Engineering (IWCCE)},
    • year = 2017,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2017-034}
    • }
  •  Farahmand, A.-M., Nabi, S., Nikovski, D.N., "Deep Reinforcement Learning for Partial Differential Equation Control", American Control Conference (ACC), DOI: 10.23919/​ACC.2017.7963427, May 2017.
    BibTeX TR2017-063 PDF
    • @inproceedings{Farahmand2017may,
    • author = {Farahmand, Amir-massoud and Nabi, Saleh and Nikovski, Daniel N.},
    • title = {{Deep Reinforcement Learning for Partial Differential Equation Control}},
    • booktitle = {American Control Conference (ACC)},
    • year = 2017,
    • month = may,
    • doi = {10.23919/ACC.2017.7963427},
    • url = {https://www.merl.com/publications/TR2017-063}
    • }
  •  Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-010 PDF
    • @inproceedings{Luo2017mar,
    • author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and {Le Roux}, Jonathan and Mesgarani, Nima},
    • title = {{Deep Clustering and Conventional Networks for Music Separation: Strong Together}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-010}
    • }
  •  Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-012 PDF
    • @inproceedings{Meng2017mar,
    • author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
    • title = {{Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-012}
    • }
  •  Hara, K., Liu, M.-Y., Tuzel, C.O., Farahmand, A.-M., "Attentional Network for Visual Object Detection", arXiv, January 2017.
    BibTeX arXiv
    • @article{Hara2017jan,
    • author = {Hara, Kota and Liu, Ming-Yu and Tuzel, C. Oncel and Farahmand, Amir-massoud},
    • title = {{Attentional Network for Visual Object Detection}},
    • journal = {arXiv},
    • year = 2017,
    • month = jan,
    • url = {https://arxiv.org/abs/1702.01478}
    • }
  •  Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/​APSIPA.2016.7820724, December 2016.
    BibTeX TR2016-162 PDF
    • @inproceedings{Xiao2016dec,
    • author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
    • title = {{Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition}},
    • booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
      },
    • year = 2016,
    • month = dec,
    • doi = {10.1109/APSIPA.2016.7820724},
    • url = {https://www.merl.com/publications/TR2016-162}
    • }
  •  Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
    BibTeX TR2016-113 PDF
    • @techreport{LeRoux2016sep,
    • author = {{Le Roux}, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
    • title = {{Learning- Based Approaches to Speech Enhancement and Separation}},
    • booktitle = {Interspeech Tutorials},
    • year = 2016,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-113}
    • }
  •  Isik, Y., Le Roux, J., Chen, Z., Watanabe, S., Hershey, J.R., "Single-Channel Multi-Speaker Separation using Deep Clustering", Interspeech, DOI: 10.21437/​Interspeech.2016-1176, September 2016, pp. 545-549.
    BibTeX TR2016-073 PDF
    • @inproceedings{Isik2016sep,
    • author = {Isik, Yusuf and {Le Roux}, Jonathan and Chen, Zhuo and Watanabe, Shinji and Hershey, John R.},
    • title = {{Single-Channel Multi-Speaker Separation using Deep Clustering}},
    • booktitle = {Interspeech},
    • year = 2016,
    • pages = {545--549},
    • month = sep,
    • doi = {10.21437/Interspeech.2016-1176},
    • url = {https://www.merl.com/publications/TR2016-073}
    • }
  •  Kamilov, U., Mansour, H., "Learning MMSE Optimal Thresholds for FISTA", International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST), August 2016.
    BibTeX TR2016-111 PDF
    • @inproceedings{Kamilov2016aug,
    • author = {Kamilov, Ulugbek and Mansour, Hassan},
    • title = {{Learning MMSE Optimal Thresholds for FISTA}},
    • booktitle = {International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST)},
    • year = 2016,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2016-111}
    • }
  •  Son, K., Liu, M.-Y., Taguchi, Y., "Learning to Remove Multipath Distortions in Time-of-Flight Range Images for a Robotic Arm Setup", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/​ICRA.2016.7487515, May 2016, pp. 3390-3397.
    BibTeX TR2016-036 PDF
    • @inproceedings{Son2016may,
    • author = {Son, Kilho and Liu, Ming-Yu and Taguchi, Yuichi},
    • title = {{Learning to Remove Multipath Distortions in Time-of-Flight Range Images for a Robotic Arm Setup}},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
    • year = 2016,
    • pages = {3390--3397},
    • month = may,
    • doi = {10.1109/ICRA.2016.7487515},
    • url = {https://www.merl.com/publications/TR2016-036}
    • }
  •  Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2016.7471631, March 2016, pp. 31-35.
    BibTeX TR2016-003 PDF
    • @inproceedings{Hershey2016mar,
    • author = {Hershey, John R. and Chen, Zhuo and {Le Roux}, Jonathan and Watanabe, Shinji},
    • title = {{Deep Clustering: Discriminative Embeddings for Segmentation and Separation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2016,
    • pages = {31--35},
    • month = mar,
    • doi = {10.1109/ICASSP.2016.7471631},
    • url = {https://www.merl.com/publications/TR2016-003}
    • }
  •  Liu, M.-Y., Mallya, A., Tuzel, C.O., Chen, X., "Unsupervised Network Pretraining via Encoding Human Design", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/​WACV.2016.7477698, March 2016, pp. 1-9.
    BibTeX TR2016-022 PDF
    • @inproceedings{Liu2016mar,
    • author = {Liu, Ming-Yu and Mallya, Arun and Tuzel, C. Oncel and Chen, Xi},
    • title = {{Unsupervised Network Pretraining via Encoding Human Design}},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2016,
    • pages = {1--9},
    • month = mar,
    • doi = {10.1109/WACV.2016.7477698},
    • url = {https://www.merl.com/publications/TR2016-022}
    • }
  •  Tachioka, Y., Watanabe, S., "Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features", Interspeech, September 2015, vol. 1 or 5, pp. 3541.
    BibTeX TR2015-099 PDF
    • @inproceedings{Tachioka2015sep,
    • author = {Tachioka, Y. and Watanabe, S.},
    • title = {{Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features}},
    • booktitle = {Interspeech},
    • year = 2015,
    • volume = {1 or 5},
    • pages = 3541,
    • month = sep,
    • isbn = {978-1-5108-1790-6},
    • url = {https://www.merl.com/publications/TR2015-099}
    • }
  •  Ahmed, E., Jones, M.J., Marks, T.K., "An Improved Deep Learning Architecture for Person Re-Identification", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/​CVPR.2015.7299016, June 2015, pp. 3908-3916.
    BibTeX TR2015-076 PDF
    • @inproceedings{Jones2015jun,
    • author = {Ahmed, E. and Jones, M.J. and Marks, T.K.},
    • title = {{An Improved Deep Learning Architecture for Person Re-Identification}},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2015,
    • pages = {3908--3916},
    • month = jun,
    • doi = {10.1109/CVPR.2015.7299016},
    • url = {https://www.merl.com/publications/TR2015-076}
    • }
  •  Sharma, A., Tuzel, C.O., Jacobs, D., "Deep Hierarchical Parsing for Semantic Segmentation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/​CVPR.2015.7298651, June 2015, pp. 530-538.
    BibTeX TR2015-057 PDF Video
    • @inproceedings{Sharma2015jun,
    • author = {Sharma, A. and Tuzel, C.O. and Jacobs, D.},
    • title = {{Deep Hierarchical Parsing for Semantic Segmentation}},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2015,
    • pages = {530--538},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/CVPR.2015.7298651},
    • issn = {1063-6919},
    • url = {https://www.merl.com/publications/TR2015-057}
    • }
  •  Shinozaki, T., Watanabe, S., "Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2015.7178918, April 2015, pp. 4979-4983.
    BibTeX TR2015-032 PDF
    • @inproceedings{Shinozaki2015apr,
    • author = {Shinozaki, T. and Watanabe, S.},
    • title = {{Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2015,
    • pages = {4979--4983},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2015.7178918},
    • url = {https://www.merl.com/publications/TR2015-032}
    • }
  •  Hershey, J.R., Le Roux, J., Weninger, F., "Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures", arXiv, August 2014.
    BibTeX arXiv
    • @article{Hershey2014aug,
    • author = {Hershey, J.R. and {Le Roux}, J. and Weninger, F.},
    • title = {{Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures}},
    • journal = {arXiv},
    • year = 2014,
    • month = aug,
    • url = {https://arxiv.org/abs/1409.2574}
    • }
  •  Koike-Akino, T., "Perspective of Statistical Learning for Nonlinear Equalization in Coherent Optical Communications", Signal Processing in Photonic Communications (SPPCom), DOI: 10.1364/​SPPCOM.2014.ST2D.2, July 2014.
    BibTeX TR2014-113 PDF
    • @inproceedings{Koike-Akino2014jul,
    • author = {Koike-Akino, T.},
    • title = {{Perspective of Statistical Learning for Nonlinear Equalization in Coherent Optical Communications}},
    • booktitle = {Signal Processing in Photonic Communications (SPPCom)},
    • year = 2014,
    • month = jul,
    • doi = {10.1364/SPPCOM.2014.ST2D.2},
    • isbn = {978-1-55752-737-0},
    • url = {https://www.merl.com/publications/TR2014-113}
    • }
  •  Watanabe, S., Le Roux, J., "Black Box Optimization for Automatic Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2014.6854202, May 2014, pp. 3256-3260.
    BibTeX TR2014-021 PDF
    • @inproceedings{Watanabe2014may,
    • author = {Watanabe, S. and {Le Roux}, J.},
    • title = {{Black Box Optimization for Automatic Speech Recognition}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2014,
    • pages = {3256--3260},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2014.6854202},
    • url = {https://www.merl.com/publications/TR2014-021}
    • }