- Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU57964.2023.10389618, December 2023.
BibTeX TR2023-152 PDF Video- @inproceedings{Pan2023dec2,
- author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
- title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2023,
- month = dec,
- doi = {10.1109/ASRU57964.2023.10389618},
- isbn = {979-8-3503-0689-7},
- url = {https://www.merl.com/publications/TR2023-152}
- }
- Li, Z., Lowy, A., Liu, J., Koike-Akino, T., Malin, B., Parsons, K., Wang, Y., "Exploring User-level Gradient Inversion with a Diffusion Prior", International Workshop on Federated Learning in the Age of Foundation Models in Conjunction with NeurIPS, December 2023.
BibTeX TR2023-149 PDF- @inproceedings{Li2023dec,
- author = {Li, Zhuohang and Lowy, Andrew and Liu, Jing and Koike-Akino, Toshiaki and Malin, Bradley and Parsons, Kieran and Wang, Ye},
- title = {Exploring User-level Gradient Inversion with a Diffusion Prior},
- booktitle = {International Workshop on Federated Learning in the Age of Foundation Models in Conjunction with NeurIPS},
- year = 2023,
- month = dec,
- url = {https://www.merl.com/publications/TR2023-149}
- }
- He, Y., Shin, S., Cherian, A., Markham, A., Trigon, N., "Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2023, pp. 5496-5507.
BibTeX TR2023-144 PDF- @inproceedings{He2023dec,
- author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Markham, Andrew and Trigon, Niki},
- title = {Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2023,
- pages = {5496--5507},
- month = dec,
- url = {https://www.merl.com/publications/TR2023-144}
- }
- Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks", International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero), November 2023.
BibTeX TR2023-141 PDF- @inproceedings{Wu2023nov,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and Le Roux, Jonathan and Watanabe, Shinji},
- title = {On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks},
- booktitle = {International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero)},
- year = 2023,
- month = nov,
- url = {https://www.merl.com/publications/TR2023-141}
- }
- Nair, N.G., Cherian, A., Lohit, S., Wang, Y., Koike-Akino, T., Patel, V.M., Marks, T.K., "Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis", IEEE International Conference on Computer Vision (ICCV), October 2023, pp. 20850-20860.
BibTeX TR2023-126 PDF Software Presentation- @inproceedings{Nair2023sep,
- author = {Nair, Nithin Gopalakrishnan and Cherian, Anoop and Lohit, Suhas and Wang, Ye and Koike-Akino, Toshiaki and Patel, Vishal M. and Marks, Tim K.},
- title = {Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis},
- booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
- year = 2023,
- pages = {20850--20860},
- month = oct,
- publisher = {IEEE/CVF},
- url = {https://www.merl.com/publications/TR2023-126}
- }
- Sharma, M., Chatterjee, M., Peng, K.-C., Lohit, S., Jones, M.J., "Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection", IEEE International Conference on Computer Vision Workshops (ICCV), October 2023, pp. 924-932.
BibTeX TR2023-125 PDF Presentation- @inproceedings{Sharma2023oct,
- author = {Sharma, Manish and Chatterjee, Moitreya and Peng, Kuan-Chuan and Lohit, Suhas and Jones, Michael J.},
- title = {Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection},
- booktitle = {IEEE International Conference on Computer Vision Workshops (ICCV)},
- year = 2023,
- pages = {924--932},
- month = oct,
- url = {https://www.merl.com/publications/TR2023-125}
- }
- Huang, B., Yu, J., Jain, S., "EARL: Eye-on-Hand Reinforcement Learner for Dynamic Grasping with Active Pose Estimation", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), DOI: 10.1109/IROS55552.2023.10341988, October 2023, pp. 2963-2970.
BibTeX TR2023-118 PDF Video- @inproceedings{Huang2023oct,
- author = {{Huang, Baichuan and Yu, Jingjin and Jain, Siddarth}},
- title = {EARL: Eye-on-Hand Reinforcement Learner for Dynamic Grasping with Active Pose Estimation},
- booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2023,
- pages = {2963--2970},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/IROS55552.2023.10341988},
- issn = {2153-0866},
- isbn = {978-1-6654-9190-7},
- url = {https://www.merl.com/publications/TR2023-118}
- }
- Falcon Perez, R., Wichern, G., Germain, F., Le Roux, J., "Location as supervision for weakly supervised multi-channel source separation of machine sounds", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA58266.2023.10248128, September 2023.
BibTeX TR2023-119 PDF Presentation- @inproceedings{FalconPerez2023aug,
- author = {Falcon Perez, Ricardo and Wichern, Gordon and Germain, Francois and Le Roux, Jonathan},
- title = {Location as supervision for weakly supervised multi-channel source separation of machine sounds},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2023,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/WASPAA58266.2023.10248128},
- issn = {1947-1629},
- isbn = {979-8-3503-2372-6},
- url = {https://www.merl.com/publications/TR2023-119}
- }
- Germain, F., Wichern, G., Le Roux, J., "Hyperbolic Unsupervised Anomalous Sound Detection", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA58266.2023.10248092, September 2023.
BibTeX TR2023-108 PDF Video Presentation- @inproceedings{Germain2023aug,
- author = {Germain, Francois and Wichern, Gordon and Le Roux, Jonathan},
- title = {Hyperbolic Unsupervised Anomalous Sound Detection},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2023,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/WASPAA58266.2023.10248092},
- issn = {1947-1629},
- isbn = {979-8-3503-2372-6},
- url = {https://www.merl.com/publications/TR2023-108}
- }
- Petermann, D., Wichern, G., Subramanian, A.S., Wang, Z.-Q., Le Roux, J., "Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2023.3290428, Vol. 31, pp. 2592-2605, September 2023.
BibTeX TR2023-113 PDF- @article{Petermann2023sep,
- author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and Wang, Zhong-Qiu and Le Roux, Jonathan},
- title = {Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2023,
- volume = 31,
- pages = {2592--2605},
- month = sep,
- doi = {10.1109/TASLP.2023.3290428},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2023-113}
- }
- Yoshino, K., Chen, Y.-N., Crook, P., Kottur, S., Li, J., Hedayatnia, B., Moon, S., Fe, Z., Li, Z., Zhang, J., Fen, Y., Zhou, J., Kim, S., Liu, Y., Jin, D., Papangelis, A., Gopalakrishnan, K., Hakkani-Tur, D., Damavandi, B., Geramifard, A., <br /><br />
Hori, C., Shah, A., Zhang, C., Li, H., Sedoc, J., D’Haro, L.F., Banchs, R., Rudnicky, A., "Overview of the Tenth Dialog System Technology Challenge: DSTC10", IEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2023.3293030, pp. 1-14, August 2023.
BibTeX TR2023-109 PDF- @article{Yoshino2023aug,
- author = {Yoshino, Koichiro and Chen, Yun-Nung and Crook, Paul and Kottur, Satwik and Li, Jinchao and Hedayatnia, Behnam and Moon, Seungwhan and Fe, Zhengcong and Li, Zekang and Zhang, Jinchao and Fen, Yang and Zhou, Jie and Kim, Seokhwan and Liu, Yang and Jin, Di and Papangelis, Alexandros and Gopalakrishnan, Karthik and Hakkani-Tur, Dilek and Damavandi, Babak and Geramifard, Alborz and
Hori, Chiori and Shah, Ankit and Zhang, Chen and Li, Haizhou and Sedoc, João and D’Haro, Luis F. and Banchs, Rafael and Rudnicky, Alexander}, - title = {Overview of the Tenth Dialog System Technology Challenge: DSTC10},
- journal = {IEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2023,
- pages = {1--14},
- month = aug,
- doi = {10.1109/TASLP.2023.3293030},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2023-109}
- }
- Hori, C., Peng, P., Harwath, D., Liu, X., Ota, K., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos", Interspeech, DOI: 10.21437/Interspeech.2023-1983, August 2023, pp. 4663-4667.
BibTeX TR2023-104 PDF- @inproceedings{Hori2023aug,
- author = {Hori, Chiori and Peng, Puyuang and Harwath, David and Liu, Xinyu and Ota, Kei and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and Le Roux, Jonathan},
- title = {Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos},
- booktitle = {Interspeech},
- year = 2023,
- pages = {4663--4667},
- month = aug,
- doi = {10.21437/Interspeech.2023-1983},
- url = {https://www.merl.com/publications/TR2023-104}
- }
- Ahmed, M.R., Koike-Akino, T., Parsons, K., Wang, Y., "Joint Software-Hardware Design for Green AI", International Midwest Symposium on Circuits and Systems (MWSCAS), DOI: 10.1109/MWSCAS57524.2023.10405937, August 2023.
BibTeX TR2023-096 PDF- @inproceedings{Ahmed2023aug,
- author = {Ahmed, Md Rubel and Koike-Akino, Toshiaki and Parsons, Kieran and Wang, Ye},
- title = {Joint Software-Hardware Design for Green AI},
- booktitle = {International Midwest Symposium on Circuits and Systems (MWSCAS)},
- year = 2023,
- month = aug,
- publisher = {IEEE},
- doi = {10.1109/MWSCAS57524.2023.10405937},
- issn = {1558-3899},
- isbn = {979-8-3503-0210-3},
- url = {https://www.merl.com/publications/TR2023-096}
- }
- Ahmed, M.R., Koike-Akino, T., Parsons, K., Wang, Y., "AutoHLS: Learning to Accelerate Design Space Exploration for HLS Designs", International Midwest Symposium on Circuits and Systems (MWSCAS), DOI: 10.1109/MWSCAS57524.2023.10405914, August 2023.
BibTeX TR2023-097 PDF- @inproceedings{Ahmed2023aug2,
- author = {Ahmed, Md Rubel and Koike-Akino, Toshiaki and Parsons, Kieran and Wang, Ye},
- title = {AutoHLS: Learning to Accelerate Design Space Exploration for HLS Designs},
- booktitle = {International Midwest Symposium on Circuits and Systems (MWSCAS)},
- year = 2023,
- month = aug,
- publisher = {IEEE},
- doi = {10.1109/MWSCAS57524.2023.10405914},
- issn = {1558-3899},
- isbn = {979-8-3503-0210-3},
- url = {https://www.merl.com/publications/TR2023-097}
- }
- Pandharipande, A., Cheng, C.-H., Dauwels, J., Gurbuz, S., Ibanez-Guzman, J., Li, G., Piazzoni, A., Wang, P., Santra, A., "Sensing and Machine Learning for Automotive Perception: A Review", IEEE Sensors Journal, DOI: 10.1109/JSEN.2023.3262134, Vol. 23, No. 11, pp. 11097-11115, June 2023.
BibTeX TR2023-089 PDF- @article{Pandharipande2023jun,
- author = {Pandharipande, Ashish and Cheng, Chih-Hong and Dauwels, Justin and Gurbuz, Sevgi and Ibanez-Guzman, Javier and Li, Guofa and Piazzoni, Andrea and Wang, Pu and Santra, Avik},
- title = {Sensing and Machine Learning for Automotive Perception: A Review},
- journal = {IEEE Sensors Journal},
- year = 2023,
- volume = 23,
- number = 11,
- pages = {11097--11115},
- month = jun,
- doi = {10.1109/JSEN.2023.3262134},
- issn = {1558-1748},
- url = {https://www.merl.com/publications/TR2023-089}
- }
- Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F., Le Roux, J., Watanabe, S., "BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up," Tech. Rep. TR2023-068, DCASE2023 Challenge, May 2023.
BibTeX TR2023-068 PDF- @techreport{Wu2023may,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, Francois and Le Roux, Jonathan and Watanabe, Shinji},
- title = {BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up},
- institution = {DCASE2023 Challenge},
- year = 2023,
- month = may,
- url = {https://www.merl.com/publications/TR2023-068}
- }
- Cherian, A., Jain, S., Marks, T.K., Sullivan, A., "Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160644, May 2023, pp. 9296-9302.
BibTeX TR2023-010 PDF Presentation- @inproceedings{Cherian2023may,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {9296--9302},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160644},
- url = {https://www.merl.com/publications/TR2023-010}
- }
- Curtis, A., Kaelbling, L., Jain, S., "Task-Directed Exploration in Continuous POMDPs for Robotic Manipulation of Articulated Objects", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160306, May 2023, pp. 3721-3728.
BibTeX TR2023-046 PDF- @inproceedings{Curtis2023may,
- author = {Curtis, Aidan and Kaelbling, Leslie and Jain, Siddarth},
- title = {Task-Directed Exploration in Continuous POMDPs for Robotic Manipulation of Articulated Objects},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {3721--3728},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160306},
- isbn = {979-8-3503-2365-8},
- url = {https://www.merl.com/publications/TR2023-046}
- }
- Haruna, M., Ogino, M., Tagashira, S., Kashiwa, M., Morita, S., Koike-Akino, T., Imai, K., Zuho, T., Makita, M., Takahashi, Y., "Avatar Technologies of Team LAST MILE Toward Mobile Smart Device Operation Service", IEEE International Conference on Robotics and Automation (ICRA), May 2023.
BibTeX TR2023-047 PDF- @inproceedings{Haruna2023may,
- author = {Haruna, Masaki and Ogino, Masaki and Tagashira, Shigeaki and Kashiwa, Munetaka and Morita, Susumu and Koike-Akino, Toshiaki and Imai, Kota and Zuho, Tiancheng and Makita, Masashi and Takahashi, Yasutake},
- title = {Avatar Technologies of Team LAST MILE Toward Mobile Smart Device Operation Service},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- month = may,
- url = {https://www.merl.com/publications/TR2023-047}
- }
- Ota, K., Tung, H.-Y., Smith, K., Cherian, A., Marks, T.K., Sullivan, A., Kanezaki, A., Tenenbaum, J.B., "H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160575, May 2023, pp. 7272-7278.
BibTeX TR2023-009 PDF- @inproceedings{Ota2023may,
- author = {Ota, Kei and Tung, Hsiao-Yu and Smith, Kevin and Cherian, Anoop and Marks, Tim K. and Sullivan, Alan and Kanezaki, Asako and Tenenbaum, Joshua B.},
- title = {H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {7272--7278},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160575},
- url = {https://www.merl.com/publications/TR2023-009}
- }
- Wang, X., Kim, K.J., Wang, Y., Koike-Akino, T., Parsons, K., "DeepEAD: Explainable Anomaly Detection from System Logs", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC45041.2023.10279563, May 2023.
BibTeX TR2023-050 PDF- @inproceedings{Wang2023may,
- author = {Wang, Xinda and Kim, Kyeong Jin and Wang, Ye and Koike-Akino, Toshiaki and Parsons, Kieran},
- title = {DeepEAD: Explainable Anomaly Detection from System Logs},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2023,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICC45041.2023.10279563},
- issn = {1938-1883},
- isbn = {978-1-5386-7462-8},
- url = {https://www.merl.com/publications/TR2023-050}
- }
- Kim, K.J., Vinod, A.P., Guo, J., Deshpande, V.M., Parsons, K., "Spectrum Sharing-inspired Safe Motion Planning", IEEE International Conference on Communications Workshops (ICC), May 2023.
BibTeX TR2023-049 PDF- @inproceedings{Kim2023may2,
- author = {Kim, Kyeong Jin and Vinod, Abraham P. and Guo, Jianlin and Deshpande, Vedang M. and Parsons, Kieran},
- title = {Spectrum Sharing-inspired Safe Motion Planning},
- booktitle = {IEEE International Conference on Communications Workshops (ICC)},
- year = 2023,
- month = may,
- url = {https://www.merl.com/publications/TR2023-049}
- }
- Xu, Y., Wang, B., Sakamoto, Y., Yamamoto, T., Nishimura, Y., "Comparison of Learning-based Surrogate Models for Electric Motors", Conference on the Computation of Electromagnetic Fields (COMPUMAG), DOI: 10.1109/COMPUMAG56388.2023.10411811, May 2023, pp. 1-4.
BibTeX TR2023-042 PDF- @inproceedings{Xu2023may,
- author = {Xu, Yihao and Wang, Bingnan and Sakamoto, Yusuke and Yamamoto, Tatsuya and Nishimura, Yuki},
- title = {Comparison of Learning-based Surrogate Models for Electric Motors},
- booktitle = {2023 24th International Conference on the Computation of Electromagnetic Fields (COMPUMAG)},
- year = 2023,
- pages = {1--4},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/COMPUMAG56388.2023.10411811},
- url = {https://www.merl.com/publications/TR2023-042}
- }
- Chen, K., Wichern, G., Germain, F., Le Roux, J., "Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT", IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB), DOI: 10.1109/ICASSPW59220.2023.10193575, May 2023.
BibTeX TR2023-030 PDF- @inproceedings{Chen2023may,
- author = {Chen, Ke and Wichern, Gordon and Germain, Francois and Le Roux, Jonathan},
- title = {Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT},
- booktitle = {IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB)},
- year = 2023,
- month = may,
- doi = {10.1109/ICASSPW59220.2023.10193575},
- isbn = {979-8-3503-0261-5},
- url = {https://www.merl.com/publications/TR2023-030}
- }
- Aralikatti, R., Boeddeker, C., Wichern, G., Subramanian, A.S., Le Roux, J., "Reverberation as Supervision for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095022, May 2023, pp. 1-5.
BibTeX TR2023-016 PDF- @inproceedings{Aralikatti2023may,
- author = {Aralikatti, Rohith and Boeddeker, Christoph and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
- title = {Reverberation as Supervision for Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095022},
- url = {https://www.merl.com/publications/TR2023-016}
- }