Publications

Radhakrishan, R., Xiong, Z., Divakaran, A., Ishikawa, Y., "Generation of Sports Highlights Using a Combination of Supervised & Unsupervised Learning in Audio Domain", IEEE Pacific-Rim Conference on Multimedia (PCM), December 2003, vol. 2, pp. 935-939.
BibTeX TR2003-144 PDF
- @inproceedings{Radhakrishan2003dec,
- author = {Radhakrishan, R. and Xiong, Z. and Divakaran, A. and Ishikawa, Y.},
- title = {Generation of Sports Highlights Using a Combination of Supervised & Unsupervised Learning in Audio Domain},
- booktitle = {IEEE Pacific-Rim Conference on Multimedia (PCM)},
- year = 2003,
- volume = 2,
- pages = {935--939},
- month = dec,
- url = {https://www.merl.com/publications/TR2003-144}
- }
Raj, B., Seltzer, M.L., Reyes-Gomez, M.J., "Speech Recognizer Based Maximum Likelihood Beamforming", NSF Workshop on Perspectives on Speech Separation, October 2003.
BibTeX TR2003-87 PDF
- @inproceedings{Raj2003oct,
- author = {Raj, B. and Seltzer, M.L. and Reyes-Gomez, M.J.},
- title = {Speech Recognizer Based Maximum Likelihood Beamforming},
- booktitle = {NSF Workshop on Perspectives on Speech Separation},
- year = 2003,
- month = oct,
- url = {https://www.merl.com/publications/TR2003-87}
- }
Reyes-Gomez, M.J., Raj, B., Ellis, D.P.W., "Multi-Channel Source Separation by Beamforming Trained with Factorial HMMS", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2003, pp. 13-16.
BibTeX TR2003-88 PDF
- @inproceedings{Reyes-Gomez2003oct,
- author = {Reyes-Gomez, M.J. and Raj, B. and Ellis, D.P.W.},
- title = {Multi-Channel Source Separation by Beamforming Trained with Factorial HMMS},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2003,
- pages = {13--16},
- month = oct,
- url = {https://www.merl.com/publications/TR2003-88}
- }
Smaragdis, P., Brown, J.C., "Non-negative Matrix Factorization for Polyphonic Music Transcription", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2003, pp. 177-180.
BibTeX TR2003-139 PDF
- @inproceedings{Smaragdis2003oct,
- author = {Smaragdis, P. and Brown, J.C.},
- title = {Non-negative Matrix Factorization for Polyphonic Music Transcription},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2003,
- pages = {177--180},
- month = oct,
- url = {https://www.merl.com/publications/TR2003-139}
- }
Lamere, P., Kwok, P., Walker, W., Gouvea, E., Singh, R., Raj, B., Wolf, P.P., "Design of the CMU Sphinx-4 Decoder", Eurospeech, September 2003.
BibTeX TR2003-110 PDF
- @inproceedings{Lamere2003sep,
- author = {Lamere, P. and Kwok, P. and Walker, W. and Gouvea, E. and Singh, R. and Raj, B. and Wolf, P.P.},
- title = {Design of the CMU Sphinx-4 Decoder},
- booktitle = {Eurospeech},
- year = 2003,
- month = sep,
- url = {https://www.merl.com/publications/TR2003-110}
- }
Singh, R., Warmuth, M., Raj, B., Lamere, P., "Classification with Free Energy at Raised Temperatures", Eurospeech, September 2003.
BibTeX TR2003-22 PDF
- @inproceedings{Singh2003sep,
- author = {Singh, R. and Warmuth, M. and Raj, B. and Lamere, P.},
- title = {Classification with Free Energy at Raised Temperatures},
- booktitle = {Eurospeech},
- year = 2003,
- month = sep,
- url = {https://www.merl.com/publications/TR2003-22}
- }
Xiong, Z., Radhakrishnan, R., Divakaran, A., Huang, T.S., "Comparing MFCC and MPEG-7 Audio Features for Feature Extraction, Maximum Likelihood HMM and Entropic Prior HMM for Sports Audio Classification", IEEE International Conference on Multimedia and Expo (ICME), DOI: 10.1109/ICME.2003.1221332, July 2003, vol. 3, pp. 397-400.
BibTeX TR2004-082 PDF
- @inproceedings{Xiong2003jul2,
- author = {Xiong, Z. and Radhakrishnan, R. and Divakaran, A. and Huang, T.S.},
- title = {Comparing MFCC and MPEG-7 Audio Features for Feature Extraction, Maximum Likelihood HMM and Entropic Prior HMM for Sports Audio Classification},
- booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
- year = 2003,
- volume = 3,
- pages = {397--400},
- month = jul,
- doi = {10.1109/ICME.2003.1221332},
- url = {https://www.merl.com/publications/TR2004-082}
- }
Raj, B., Whittaker, E.W.D., "Lossless Compression of Language Model Structure and Word Identifiers", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2003, vol. 1, pp. 388-391.
BibTeX IEEE Xplore
- @inproceedings{Raj2003apr,
- author = {Raj, B. and Whittaker, E.W.D.},
- title = {Lossless Compression of Language Model Structure and Word Identifiers},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2003,
- volume = 1,
- pages = {388--391},
- month = apr,
- issn = {1520-6149},
- url = {https://ieeexplore.ieee.org/document/1198799}
- }
Reyes-Gomez, M.J., Raj, B., Ellis, D.P.W., "Multi-Channel Source Separation by Factorial HMMs", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2003.
BibTeX
- @inproceedings{Reyes-Gomez2003apr,
- author = {Reyes-Gomez, M.J. and Raj, B. and Ellis, D.P.W.},
- title = {Multi-Channel Source Separation by Factorial HMMs},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2003,
- month = apr
- }
Singh, R., Raj, B., "Tracking Noise via Dynamical Systems with a Continuum of States", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2003.
BibTeX
- @inproceedings{Singh2003apr,
- author = {Singh, R. and Raj, B.},
- title = {Tracking Noise via Dynamical Systems with a Continuum of States},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2003,
- month = apr
- }
Xiong, Z., Radhakrishnan, R., Divakaran, A., Huang, T.S., "Comparing MFCC and MPEG-7 Audio Features for Feature Extraction, Maximum Likelihood HMM and Entropic Prior HMM for Sports Audio Classification", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2003.1200048, April 2003, vol. 5, pp. 628-631.
BibTeX IEEE Xplore
- @inproceedings{Xiong2003apr1,
- author = {Xiong, Z. and Radhakrishnan, R. and Divakaran, A. and Huang, T.S.},
- title = {Comparing MFCC and MPEG-7 Audio Features for Feature Extraction, Maximum Likelihood HMM and Entropic Prior HMM for Sports Audio Classification},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2003,
- volume = 5,
- pages = {628--631},
- month = apr,
- doi = {10.1109/ICASSP.2003.1200048},
- issn = {1520-6149},
- url = {https://ieeexplore.ieee.org/document/1200048}
- }
Xiong, Z., Radhakrishnan, R., Divakaran, A., Huang, T.S., "Audio Events Detection Based Highlights Extraction from Baseball, Golf and Soccer Games in a Unified Framework", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2003, vol. 5, pp. 632-635.
BibTeX IEEE Xplore
- @inproceedings{Xiong2003apr2,
- author = {Xiong, Z. and Radhakrishnan, R. and Divakaran, A. and Huang, T.S.},
- title = {Audio Events Detection Based Highlights Extraction from Baseball, Golf and Soccer Games in a Unified Framework},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2003,
- volume = 5,
- pages = {632--635},
- month = apr,
- issn = {1520-6149},
- url = {https://ieeexplore.ieee.org/document/1200049}
- }
Smaragdis, P., Casey, M., "Audio/Visual Independent Components", International Symposium on Independent Component Analysis and Blind Source Separation (ICA), April 2003, pp. 709-714.
BibTeX TR2003-138 PDF
- @inproceedings{Smaragdis2003apr,
- author = {Smaragdis, P. and Casey, M.},
- title = {Audio/Visual Independent Components},
- booktitle = {International Symposium on Independent Component Analysis and Blind Source Separation (ICA)},
- year = 2003,
- pages = {709--714},
- month = apr,
- url = {https://www.merl.com/publications/TR2003-138}
- }
Seltzer, M.L., Raj, B., "Speech Recognizer Based Filter Optimization for Microphone Array Processing", IEEE Transactions on Signal Processing, Vol. 10, No. 3, pp. 69-71, March 2003.
BibTeX IEEE Xplore
- @article{Seltzer2003mar,
- author = {Seltzer, M.L. and Raj, B.},
- title = {Speech Recognizer Based Filter Optimization for Microphone Array Processing},
- journal = {IEEE Transactions on Signal Processing},
- year = 2003,
- volume = 10,
- number = 3,
- pages = {69--71},
- month = mar,
- issn = {1070-9908},
- url = {https://ieeexplore.ieee.org/document/1182087}
- }
Raj, B., Singh, R., "Classifier-Based Non-Linear Projection for Adaptive Endpointing of Continuous Speech", Computer Speech and Language, Vol. 17, No. 1, pp. 5-26, January 2003.
BibTeX
- @article{Raj2003jan,
- author = {Raj, B. and Singh, R.},
- title = {Classifier-Based Non-Linear Projection for Adaptive Endpointing of Continuous Speech},
- journal = {Computer Speech and Language},
- year = 2003,
- volume = 17,
- number = 1,
- pages = {5--26},
- month = jan
- }
Divakaran, A., Radhakrishnan, R., Xiong, Z., Casey, M., "Procedure for Audio-Assisted Browsing of News Video Using Generalized Sound Recognition", SPIE Conference on Storage and Retrieval for Multimedia Databases, DOI: 10.1117/12.476294, January 2003, vol. 5021, pp. 160-166.
BibTeX SPIE Digital Library
- @inproceedings{Divakaran2003jan,
- author = {Divakaran, A. and Radhakrishnan, R. and Xiong, Z. and Casey, M.},
- title = {Procedure for Audio-Assisted Browsing of News Video Using Generalized Sound Recognition},
- booktitle = {SPIE Conference on Storage and Retrieval for Multimedia Databases},
- year = 2003,
- volume = 5021,
- pages = {160--166},
- month = jan,
- doi = {10.1117/12.476294},
- url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/5021/0000/Procedure-for-audio-assisted-browsing-of-news-video-using-generalized/10.1117/12.476294.short?SSO=1}
- }
Wolf, P., Raj, B., "The MERL SpokenQuery Information Retrieval System: A System for Retrieving Pertinent Documents from a Spoken Query", IEEE International Conference on Multimedia and Expo (ICME), August 2002, vol. 2, pp. 317-320.
BibTeX TR2002-57 PDF
- @inproceedings{Wolf2002aug,
- author = {Wolf, P. and Raj, B.},
- title = {The MERL SpokenQuery Information Retrieval System: A System for Retrieving Pertinent Documents from a Spoken Query},
- booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
- year = 2002,
- volume = 2,
- pages = {317--320},
- month = aug,
- url = {https://www.merl.com/publications/TR2002-57}
- }
Singh, R., Raj, B., Stern, R.M., "Model Compensation and Matched Condition Methods for Robust Speech Recognition" in Noise Reduction in Speech Applications, Davis, G.M., Eds., Electrical Engineering \& Applied Signal Processing Series, chapter 10, pp. 245-275, CRC Press, May 2002.
BibTeX CRC Press
- @incollection{Singh2002may2,
- author = {Singh, R. and Raj, B. and Stern, R.M.},
- title = {Model Compensation and Matched Condition Methods for Robust Speech Recognition},
- booktitle = {Noise Reduction in Speech Applications},
- year = 2002,
- editor = {Davis, G.M.},
- series = {Electrical Engineering \& Applied Signal Processing Series},
- chapter = 10,
- pages = {245--275},
- month = may,
- publisher = {CRC Press},
- url = {https://www.crcpress.com/product/isbn/9780849309496}
- }
Singh, R., Stern, R.M., Raj, B., "Signal and Feature Compensation Methods for Robust Speech Recognition" in Noise Reduction in Speech Applications, Davis, G.M., Eds., Electrical Engineering \& Applied Signal Processing Series, chapter 9, pp. 219-244, CRC Press, May 2002.
BibTeX CRC Press
- @incollection{Singh2002may1,
- author = {Singh, R. and Stern, R.M. and Raj, B.},
- title = {Signal and Feature Compensation Methods for Robust Speech Recognition},
- booktitle = {Noise Reduction in Speech Applications},
- year = 2002,
- editor = {Davis, G.M.},
- series = {Electrical Engineering \& Applied Signal Processing Series},
- chapter = 9,
- pages = {219--244},
- month = may,
- publisher = {CRC Press},
- url = {https://www.crcpress.com/product/isbn/9780849309496}
- }
Seltzer, M., Raj, B., Stern, R., "Speech Recognizer Based Microphone Array Processing for Robust Hands-Free Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2002, vol. 1, pp. 897-900.
BibTeX IEEE Xplore
- @inproceedings{Seltzer2002may,
- author = {Seltzer, M. and Raj, B. and Stern, R.},
- title = {Speech Recognizer Based Microphone Array Processing for Robust Hands-Free Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2002,
- volume = 1,
- pages = {897--900},
- month = may,
- issn = {1520-62149},
- url = {https://ieeexplore.ieee.org/document/1005885}
- }
Edward Whittaker, "Comparison of Width-wise and Length-wise Language Model Compression", Tech. Rep. TR2001-42, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 2001.
BibTeX TR2001-42 PDF
- @techreport{MERL_TR2001-42,
- author = {Edward Whittaker},
- title = {Comparison of Width-wise and Length-wise Language Model Compression},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2001-42},
- month = dec,
- year = 2001,
- url = {https://www.merl.com/publications/TR2001-42/}
- }
Edward Whittaker, "Quantization-based language model compression", Tech. Rep. TR2001-41, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 2001.
BibTeX TR2001-41 PDF
- @techreport{MERL_TR2001-41,
- author = {Edward Whittaker},
- title = {Quantization-based language model compression},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2001-41},
- month = dec,
- year = 2001,
- url = {https://www.merl.com/publications/TR2001-41/}
- }
Michael Seltzer, "Calibration of Microphone Arrays for Improved Speech Recognition", Tech. Rep. TR2001-43, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 2001.
BibTeX TR2001-43 PDF
- @techreport{MERL_TR2001-43,
- author = {Michael Seltzer},
- title = {Calibration of Microphone Arrays for Improved Speech Recognition},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2001-43},
- month = dec,
- year = 2001,
- url = {https://www.merl.com/publications/TR2001-43/}
- }
Pedro Moreno, Beth Logan, "A Boosting Approach for Confidence Scoring", Tech. Rep. TR2001-44, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 2001.
BibTeX TR2001-44 PDF
- @techreport{MERL_TR2001-44,
- author = {Pedro Moreno, Beth Logan},
- title = {A Boosting Approach for Confidence Scoring},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2001-44},
- month = dec,
- year = 2001,
- url = {https://www.merl.com/publications/TR2001-44/}
- }
Raj, B., Migdal, J., Singh, R., "Distributed Speech Recognition with Codec Parameters", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2001, pp. 127-130.
BibTeX TR2001-45 PDF
- @inproceedings{Raj2001dec,
- author = {Raj, B. and Migdal, J. and Singh, R.},
- title = {Distributed Speech Recognition with Codec Parameters},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2001,
- pages = {127--130},
- month = dec,
- url = {https://www.merl.com/publications/TR2001-45}
- }