Publications

Shashanka, M.V.S., Raj, B., Smaragdis, P., "Sparse Overcomplete Decomposition for Single Channel Speaker Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2007, vol. 2, pp. 641-644.
BibTeX TR2007-031 PDF
- @inproceedings{Shashanka2007apr,
- author = {Shashanka, M.V.S. and Raj, B. and Smaragdis, P.},
- title = {{Sparse Overcomplete Decomposition for Single Channel Speaker Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2007,
- volume = 2,
- pages = {641--644},
- month = apr,
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2007-031}
- }
Smaragdis, P., Shashanka, M., "A Framework for Secure Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2007, vol. 4, pp. 969-972.
BibTeX IEEE Xplore
- @inproceedings{Smaragdis2007apr,
- author = {Smaragdis, P. and Shashanka, M.},
- title = {{A Framework for Secure Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2007,
- volume = 4,
- pages = {969--972},
- month = apr,
- issn = {1520-6149},
- url = {https://ieeexplore.ieee.org/document/4218264}
- }
Smaragdis, P., "Convolutive Speech Bases and their Application to Supervised Speech Separation", IEEE Transactions on Audio, Speech and Language Processing, Vol. 15, No. 1, pp. 1-12, January 2007.
BibTeX TR2007-002 PDF
- @article{Smaragdis2007jan2,
- author = {Smaragdis, P.},
- title = {{Convolutive Speech Bases and their Application to Supervised Speech Separation}},
- journal = {IEEE Transactions on Audio, Speech and Language Processing},
- year = 2007,
- volume = 15,
- number = 1,
- pages = {1--12},
- month = jan,
- issn = {1558-7916},
- url = {https://www.merl.com/publications/TR2007-002}
- }
Smaragdis, P., Boufounos, P., "Position and Trajectory Learning for Microphone Arrays", IEEE Transactions on Audio, Speech and Language Processing, Vol. 15, No. 1, pp. 358-368, January 2007.
BibTeX TR2007-001 PDF
- @article{Smaragdis2007jan1,
- author = {Smaragdis, P. and Boufounos, P.},
- title = {{Position and Trajectory Learning for Microphone Arrays}},
- journal = {IEEE Transactions on Audio, Speech and Language Processing},
- year = 2007,
- volume = 15,
- number = 1,
- pages = {358--368},
- month = jan,
- issn = {1558-7916},
- url = {https://www.merl.com/publications/TR2007-001}
- }
Smaragdis, P., Raj, B., Shashanka, M., "A Probabilistic Latent Variable Model for Acoustic Modeling", Advances in Neural Information Processing Systems (NIPS), December 2006.
BibTeX TR2006-121 PDF
- @inproceedings{Smaragdis2006dec,
- author = {Smaragdis, P. and Raj, B. and Shashanka, M.},
- title = {{A Probabilistic Latent Variable Model for Acoustic Modeling}},
- booktitle = {Advances in Neural Information Processing Systems (NIPS)},
- year = 2006,
- month = dec,
- url = {https://www.merl.com/publications/TR2006-121}
- }
Weinberg, G., Raj, B., Kalgaonkar, K., "Two New Techniques for Natural Spoken User Interfaces", ACM Symposium on User Interface Software and Technology (UIST), October 2006.
BibTeX TR2006-098 PDF
- @inproceedings{Weinberg2006oct,
- author = {Weinberg, G. and Raj, B. and Kalgaonkar, K.},
- title = {{Two New Techniques for Natural Spoken User Interfaces}},
- booktitle = {ACM Symposium on User Interface Software and Technology (UIST)},
- year = 2006,
- month = oct,
- url = {https://www.merl.com/publications/TR2006-098}
- }
Radhakrishnan, R., Divakaran, A., "Merging Segmentations of Low-level and Mid-level Time Series for Audio Class Discovery", Asilomar Conference on Signals, Systems and Computers (ACSSC), October - November 2006, pp. 64-68.
BibTeX IEEE Xplore
- @inproceedings{Radhakrishnan2006oct,
- author = {Radhakrishnan, R. and Divakaran, A.},
- title = {{Merging Segmentations of Low-level and Mid-level Time Series for Audio Class Discovery}},
- booktitle = {Asilomar Conference on Signals, Systems and Computers (ACSSC)},
- year = 2006,
- pages = {64--68},
- month = oct,
- issn = {1058-6393},
- url = {https://ieeexplore.ieee.org/document/4176513}
- }
Paris Smaragdis, Bhiksha Raj, Madhusudana Shashanka, "Supervised and Semi-Supervised Separation of Sounds from Single-Channel Mixtures", Tech. Rep. TR2007-062, Mitsubishi Electric Research Laboratories, Cambridge, MA, July 2006.
BibTeX TR2007-062 PDF
- @techreport{MERL_TR2007-062,
- author = {Paris Smaragdis, Bhiksha Raj, Madhusudana Shashanka},
- title = {Supervised and Semi-Supervised Separation of Sounds from Single-Channel Mixtures},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2007-062},
- month = jul,
- year = 2006,
- url = {https://www.merl.com/publications/TR2007-062/}
- }
Wittenburg, K., Lanning, T., Schwenke, D., Shubin, H., Vetro, A., "The Prospects for Unrestricted Speech Input for TV Content Search", International Working Conference on Advanced Visual Interfaces (AVI), May 2006.
BibTeX TR2006-045 PDF
- @inproceedings{Wittenburg2006may,
- author = {Wittenburg, K. and Lanning, T. and Schwenke, D. and Shubin, H. and Vetro, A.},
- title = {{The Prospects for Unrestricted Speech Input for TV Content Search}},
- booktitle = {International Working Conference on Advanced Visual Interfaces (AVI)},
- year = 2006,
- month = may,
- url = {https://www.merl.com/publications/TR2006-045}
- }
Radhakrishnan, R., Divakaran, A., "Generative Process Tracking for Audio Analysis", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2006, vol. 5, pp. V.
BibTeX TR2006-053 PDF
- @inproceedings{Radhakrishnan2006may,
- author = {Radhakrishnan, R. and Divakaran, A.},
- title = {{Generative Process Tracking for Audio Analysis}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2006,
- volume = 5,
- pages = {V},
- month = may,
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2006-053}
- }
Raj, B., Shashanka, M.V.S., Smaragdis, P., "Latent Dirichlet Decomposition for Single Channel Speaker Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2006.
BibTeX TR2006-064 PDF
- @inproceedings{Raj2006may,
- author = {Raj, B. and Shashanka, M.V.S. and Smaragdis, P.},
- title = {{Latent Dirichlet Decomposition for Single Channel Speaker Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2006,
- month = may,
- url = {https://www.merl.com/publications/TR2006-064}
- }
Shashanka, M.V.S., Smaragdis, P., "Secure Sound Classification: Gaussian Mixture Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2006.
BibTeX TR2006-065 PDF
- @inproceedings{Shashanka2006may,
- author = {Shashanka, M.V.S. and Smaragdis, P.},
- title = {{Secure Sound Classification: Gaussian Mixture Models}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2006,
- month = may,
- url = {https://www.merl.com/publications/TR2006-065}
- }
Raj, B., Singh, R., "Reconstructing Spectral Vectors with Uncertain Spectrographic Masks for Robust Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), November 2005, pp. 27-32.
BibTeX TR2005-160 PDF
- @inproceedings{Raj2005nov,
- author = {Raj, B. and Singh, R.},
- title = {{Reconstructing Spectral Vectors with Uncertain Spectrographic Masks for Robust Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2005,
- pages = {27--32},
- month = nov,
- url = {https://www.merl.com/publications/TR2005-160}
- }
Hu, R., Raj, B., "A Robust Voice Activity Detector Using an Acoustic Doppler Radar", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), November 2005, pp. 171-176.
BibTeX TR2005-159 PDF
- @inproceedings{Hu2005nov,
- author = {Hu, R. and Raj, B.},
- title = {{A Robust Voice Activity Detector Using an Acoustic Doppler Radar}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2005,
- pages = {171--176},
- month = nov,
- url = {https://www.merl.com/publications/TR2005-159}
- }
Radhakrishnan, R., Divakaran, A., Smaragdis, P., "Audio Analysis for Surveillance Applications", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 158-161.
BibTeX TR2005-139 PDF
- @inproceedings{Radhakrishnan2005oct,
- author = {Radhakrishnan, R. and Divakaran, A. and Smaragdis, P.},
- title = {{Audio Analysis for Surveillance Applications}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2005,
- pages = {158--161},
- month = oct,
- url = {https://www.merl.com/publications/TR2005-139}
- }
Raj, B., Smaragdis, P., "Latent Variable Decomposition of Spectrograms for Single Channel Speaker Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 17-20.
BibTeX TR2005-137 PDF
- @inproceedings{Raj2005oct,
- author = {Raj, B. and Smaragdis, P.},
- title = {{Latent Variable Decomposition of Spectrograms for Single Channel Speaker Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2005,
- pages = {17--20},
- month = oct,
- url = {https://www.merl.com/publications/TR2005-137}
- }
Smaragdis, P., Boufounos, P., "Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 114-117.
BibTeX TR2005-138 PDF
- @inproceedings{Smaragdis2005oct,
- author = {Smaragdis, P. and Boufounos, P.},
- title = {{Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2005,
- pages = {114--117},
- month = oct,
- url = {https://www.merl.com/publications/TR2005-138}
- }
Forlines, C., Schmidt-Nielsen, B., Raj, B., Wittenburg, K., Wolf, P., "A Comparison between Spoken Queries and Menu-based Interfaces for In-Car Digital Music Selection", IFIP TC13 International Conference on Human-Computer Interaction (INTERACT), September 2005.
BibTeX TR2005-020 PDF
- @inproceedings{Forlines2005sep1,
- author = {Forlines, C. and Schmidt-Nielsen, B. and Raj, B. and Wittenburg, K. and Wolf, P.},
- title = {{A Comparison between Spoken Queries and Menu-based Interfaces for In-Car Digital Music Selection}},
- booktitle = {IFIP TC13 International Conference on Human-Computer Interaction (INTERACT)},
- year = 2005,
- month = sep,
- url = {https://www.merl.com/publications/TR2005-020}
- }
Bansal, D., Raj, B., Smaragdis, P., "Bandwidth Expansion of Narrowband Speech Using non-Negative Matrix Factorization", Eurospeech, September 2005.
BibTeX TR2005-135 PDF
- @inproceedings{Bansal2005sep,
- author = {Bansal, D. and Raj, B. and Smaragdis, P.},
- title = {{Bandwidth Expansion of Narrowband Speech Using non-Negative Matrix Factorization}},
- booktitle = {Eurospeech},
- year = 2005,
- month = sep,
- url = {https://www.merl.com/publications/TR2005-135}
- }
Raj, B., Singh, R., Smaragdis, P., "Recognizing Speech from Simultaneous Speakers", Eurospeech, September 2005.
BibTeX TR2005-136 PDF
- @inproceedings{Raj2005sep,
- author = {Raj, B. and Singh, R. and Smaragdis, P.},
- title = {{Recognizing Speech from Simultaneous Speakers}},
- booktitle = {Eurospeech},
- year = 2005,
- month = sep,
- url = {https://www.merl.com/publications/TR2005-136}
- }
Smaragdis, P., "From Learning Music to Learning to Separate", Forum Acusticum, August 2005.
BibTeX TR2005-134 PDF
- @inproceedings{Smaragdis2005aug,
- author = {Smaragdis, P.},
- title = {{From Learning Music to Learning to Separate}},
- booktitle = {Forum Acusticum},
- year = 2005,
- month = aug,
- url = {https://www.merl.com/publications/TR2005-134}
- }
Guinness, J., Raj, B., Schmidt-Nielsen, B., Turicchia, L., Sarpeshkar, R., "A Companding Front End for Noise-Robust Automatic Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2005, vol. 1, pp. 249-252.
BibTeX TR2005-023 PDF
- @inproceedings{Guinness2005mar,
- author = {Guinness, J. and Raj, B. and Schmidt-Nielsen, B. and Turicchia, L. and Sarpeshkar, R.},
- title = {{A Companding Front End for Noise-Robust Automatic Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2005,
- volume = 1,
- pages = {249--252},
- month = mar,
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2005-023}
- }
Radhakrishnan, R., Divakaran, A., "Systematic Acquisition of Audio Classes for Elevator Surveillance", SPIE Conference on Image and Video Communications and Processing, March 2005, vol. 5685, pp. 64-71.
BibTeX TR2005-076 PDF
- @inproceedings{Radhakrishnan2005mar,
- author = {Radhakrishnan, R. and Divakaran, A.},
- title = {{Systematic Acquisition of Audio Classes for Elevator Surveillance}},
- booktitle = {SPIE Conference on Image and Video Communications and Processing},
- year = 2005,
- volume = 5685,
- pages = {64--71},
- month = mar,
- url = {https://www.merl.com/publications/TR2005-076}
- }
Radhakrishnan, R., Divakaran, A., Xiong, Z., "A Time Series Clustering based Framework for Multimedia Mining and Summarization", ACM SIGMM International Workshop on Multimedia Information Retrieval (MIR), October 2004, pp. 157-164.
BibTeX TR2004-046 PDF
- @inproceedings{Radhakrishnan2004oct,
- author = {Radhakrishnan, R. and Divakaran, A. and Xiong, Z.},
- title = {{A Time Series Clustering based Framework for Multimedia Mining and Summarization}},
- booktitle = {ACM SIGMM International Workshop on Multimedia Information Retrieval (MIR)},
- year = 2004,
- pages = {157--164},
- month = oct,
- isbn = {1-58113-940-3},
- url = {https://www.merl.com/publications/TR2004-046}
- }
Wolf, P., Woelfel, J., van Gemert, J., Raj, B., Wong, D., "SpokenQuery: An Alternate Approach to Choosing Items with Speech", International Conference on Spoken Language Processing (ICSLP), October 2004.
BibTeX TR2004-121 PDF
- @inproceedings{Wolf2004oct,
- author = {Wolf, P. and Woelfel, J. and {van Gemert}, J. and Raj, B. and Wong, D.},
- title = {{SpokenQuery: An Alternate Approach to Choosing Items with Speech}},
- booktitle = {International Conference on Spoken Language Processing (ICSLP)},
- year = 2004,
- month = oct,
- url = {https://www.merl.com/publications/TR2004-121}
- }