Publications

Delattre, F., Dirnfeld, D., Nguyen, P., Scarano, S., Jones, M.J., Miraldo, P., Learned-Miller, E., "Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/ICCV51070.2023.00894, October 2023, pp. 3715-3724.
BibTeX TR2023-123 PDF Video Software
- @inproceedings{Delattre2023oct,
- author = {Delattre, Fabien and Dirnfeld, David and Nguyen, Phat and Scarano, Stephen and Jones, Michael J. and Miraldo, Pedro and Learned-Miller, Erik},
- title = {Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2023,
- pages = {3715--3724},
- month = oct,
- publisher = {IEEE/CVF},
- doi = {10.1109/ICCV51070.2023.00894},
- issn = {2380-7504},
- isbn = {979-8-3503-0718-4},
- url = {https://www.merl.com/publications/TR2023-123}
- }
Miraldo, P., Piedade, V., "BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/ICCV51070.2023.00346, October 2023, pp. 3715-3724.
BibTeX TR2023-124 PDF Video Software
- @inproceedings{Miraldo2023oct,
- author = {Miraldo, Pedro and Piedade, Valter},
- title = {BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2023,
- pages = {3715--3724},
- month = oct,
- publisher = {IEEE/CVF},
- doi = {10.1109/ICCV51070.2023.00346},
- issn = {2380-7504},
- isbn = {979-8-3503-0718-4},
- url = {https://www.merl.com/publications/TR2023-124}
- }
Nair, N.G., Cherian, A., Lohit, S., Wang, Y., Koike-Akino, T., Patel, V.M., Marks, T.K., "Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis", IEEE International Conference on Computer Vision (ICCV), October 2023, pp. 20850-20860.
BibTeX TR2023-126 PDF Software Presentation
- @inproceedings{Nair2023sep,
- author = {Nair, Nithin Gopalakrishnan and Cherian, Anoop and Lohit, Suhas and Wang, Ye and Koike-Akino, Toshiaki and Patel, Vishal M. and Marks, Tim K.},
- title = {Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis},
- booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
- year = 2023,
- pages = {20850--20860},
- month = oct,
- publisher = {IEEE/CVF},
- url = {https://www.merl.com/publications/TR2023-126}
- }
Sharma, M., Chatterjee, M., Peng, K.-C., Lohit, S., Jones, M.J., "Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection", IEEE International Conference on Computer Vision Workshops (ICCV), October 2023, pp. 924-932.
BibTeX TR2023-125 PDF Presentation
- @inproceedings{Sharma2023oct,
- author = {Sharma, Manish and Chatterjee, Moitreya and Peng, Kuan-Chuan and Lohit, Suhas and Jones, Michael J.},
- title = {Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection},
- booktitle = {IEEE International Conference on Computer Vision Workshops (ICCV)},
- year = 2023,
- pages = {924--932},
- month = oct,
- url = {https://www.merl.com/publications/TR2023-125}
- }
Shenoy, V., Marks, T.K., Mansour, H., Lohit, S., "Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP49359.2023.10222169, September 2023, pp. 2715-2719.
BibTeX TR2023-116 PDF Video
- @inproceedings{Shenoy2023sep,
- author = {{Shenoy, Vineet and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas}},
- title = {Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2023,
- pages = {2715--2719},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP49359.2023.10222169},
- isbn = {978-1-7281-9835-4},
- url = {https://www.merl.com/publications/TR2023-116}
- }
Yoshino, K., Chen, Y.-N., Crook, P., Kottur, S., Li, J., Hedayatnia, B., Moon, S., Fe, Z., Li, Z., Zhang, J., Fen, Y., Zhou, J., Kim, S., Liu, Y., Jin, D., Papangelis, A., Gopalakrishnan, K., Hakkani-Tur, D., Damavandi, B., Geramifard, A., <br /><br /> Hori, C., Shah, A., Zhang, C., Li, H., Sedoc, J., D’Haro, L.F., Banchs, R., Rudnicky, A., "Overview of the Tenth Dialog System Technology Challenge: DSTC10", IEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2023.3293030, pp. 1-14, August 2023.
BibTeX TR2023-109 PDF
- @article{Yoshino2023aug,
- author = {Yoshino, Koichiro and Chen, Yun-Nung and Crook, Paul and Kottur, Satwik and Li, Jinchao and Hedayatnia, Behnam and Moon, Seungwhan and Fe, Zhengcong and Li, Zekang and Zhang, Jinchao and Fen, Yang and Zhou, Jie and Kim, Seokhwan and Liu, Yang and Jin, Di and Papangelis, Alexandros and Gopalakrishnan, Karthik and Hakkani-Tur, Dilek and Damavandi, Babak and Geramifard, Alborz and
  
  Hori, Chiori and Shah, Ankit and Zhang, Chen and Li, Haizhou and Sedoc, João and D’Haro, Luis F. and Banchs, Rafael and Rudnicky, Alexander},
- title = {Overview of the Tenth Dialog System Technology Challenge: DSTC10},
- journal = {IEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2023,
- pages = {1--14},
- month = aug,
- doi = {10.1109/TASLP.2023.3293030},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2023-109}
- }
Hori, C., Peng, P., Harwath, D., Liu, X., Ota, K., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos", Interspeech, DOI: 10.21437/Interspeech.2023-1983, August 2023, pp. 4663-4667.
BibTeX TR2023-104 PDF
- @inproceedings{Hori2023aug,
- author = {Hori, Chiori and Peng, Puyuang and Harwath, David and Liu, Xinyu and Ota, Kei and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and Le Roux, Jonathan},
- title = {Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos},
- booktitle = {Interspeech},
- year = 2023,
- pages = {4663--4667},
- month = aug,
- doi = {10.21437/Interspeech.2023-1983},
- url = {https://www.merl.com/publications/TR2023-104}
- }
Singh, A., Jones, M.J., Learned-Miller, E., "EVAL: Explainable Video Anomaly Localization", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR52729.2023.01795, June 2023.
BibTeX TR2023-071 PDF Video Software Presentation
- @inproceedings{Singh2023jun,
- author = {Singh, Ashish and Jones, Michael J. and Learned-Miller, Erik},
- title = {EVAL: Explainable Video Anomaly Localization},
- booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- month = jun,
- doi = {10.1109/CVPR52729.2023.01795},
- url = {https://www.merl.com/publications/TR2023-071}
- }
Jha, D.K., Jain, S., Romeres, D., Yerazunis, W.S., Nikovski, D., "Generalizable Human-Robot Collaborative Assembly Using Imitation Learning and Force Control", European Control Conference (ECC), DOI: 10.23919/ECC57647.2023.10178330, May 2023, pp. 1-8.
BibTeX TR2023-065 PDF
- @inproceedings{Jha2023may,
- author = {Jha, Devesh K. and Jain, Siddarth and Romeres, Diego and Yerazunis, William S. and Nikovski, Daniel},
- title = {Generalizable Human-Robot Collaborative Assembly Using Imitation Learning and Force Control},
- booktitle = {European Control Conference (ECC)},
- year = 2023,
- pages = {1--8},
- month = may,
- doi = {10.23919/ECC57647.2023.10178330},
- url = {https://www.merl.com/publications/TR2023-065}
- }
Skibik, T., Vinod, A.P., Weiss, A., Di Cairano, S., "MPC with Integrated Evasive Maneuvers for Failure-safe Automated Driving", American Control Conference (ACC), May 2023, pp. 1122-1128.
BibTeX TR2023-055 PDF
- @inproceedings{Skibik2023may,
- author = {Skibik, Terrence and Vinod, Abraham P. and Weiss, Avishai and Di Cairano, Stefano},
- title = {MPC with Integrated Evasive Maneuvers for Failure-safe Automated Driving},
- booktitle = {American Control Conference (ACC)},
- year = 2023,
- pages = {1122--1128},
- month = may,
- url = {https://www.merl.com/publications/TR2023-055}
- }
Jain, S., Corcodel, R., Jha, D.K., Romeres, D., "Vision Guided Food Assembly by Robot Teaching from Target Composition", ICRA 2023 Workshop on Task-Informed Grasping IV (TIG-IV): From Farm to Fork, May 2023.
BibTeX TR2023-067 PDF
- @inproceedings{Jain2023may,
- author = {Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego},
- title = {Vision Guided Food Assembly by Robot Teaching from Target Composition},
- booktitle = {ICRA 2023 Workshop on Task-Informed Grasping IV (TIG-IV): From Farm to Fork},
- year = 2023,
- month = may,
- url = {https://www.merl.com/publications/TR2023-067}
- }
Cherian, A., Jain, S., Marks, T.K., Sullivan, A., "Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160644, May 2023, pp. 9296-9302.
BibTeX TR2023-010 PDF Presentation
- @inproceedings{Cherian2023may,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {9296--9302},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160644},
- url = {https://www.merl.com/publications/TR2023-010}
- }
Ota, K., Tung, H.-Y., Smith, K., Cherian, A., Marks, T.K., Sullivan, A., Kanezaki, A., Tenenbaum, J.B., "H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160575, May 2023, pp. 7272-7278.
BibTeX TR2023-009 PDF
- @inproceedings{Ota2023may,
- author = {Ota, Kei and Tung, Hsiao-Yu and Smith, Kevin and Cherian, Anoop and Marks, Tim K. and Sullivan, Alan and Kanezaki, Asako and Tenenbaum, Joshua B.},
- title = {H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {7272--7278},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160575},
- url = {https://www.merl.com/publications/TR2023-009}
- }
Shah, A., Roy, A., Shah, K., Mishra, S.K., Jacobs, D., Cherian, A., Chellappa, R., "HaLP: Hallucinating Latent Positives for Skeleton-based Self-Supervised Learning of Actions", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2023, pp. 18846-18856.
BibTeX TR2023-035 PDF
- @inproceedings{Shah2023may,
- author = {Shah, Anshul and Roy, Aniket and Shah, Ketul and Mishra, Shlok Kumar and Jacobs, David and Cherian, Anoop and Chellappa, Rama},
- title = {HaLP: Hallucinating Latent Positives for Skeleton-based Self-Supervised Learning of Actions},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- pages = {18846--18856},
- month = may,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2023-035}
- }
Zhang, J., Cherian, A., Liu, Y., Shabat, I.B., Rodriguez, C., Gould, S., "Aligning Step-by-Step Instructional Diagrams to Video Demonstrations", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2023, pp. 2483-2492.
BibTeX TR2023-034 PDF
- @inproceedings{Zhang2023may,
- author = {Zhang, Jiahao and Cherian, Anoop and Liu, Yanbin and Shabat, Itzik Ben and Rodriguez, Cristian and Gould, Stephen},
- title = {Aligning Step-by-Step Instructional Diagrams to Video Demonstrations},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- pages = {2483--2492},
- month = may,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2023-034}
- }
Jeon, E.S., Lohit, S., Anirudh, R., Turaga, P., "Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10096888, May 2023.
BibTeX TR2023-021 PDF Presentation
- @inproceedings{Jeon2023may,
- author = {Jeon, Eun Som and Lohit, Suhas and Anirudh, Rushil and Turaga, Pavan},
- title = {Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10096888},
- url = {https://www.merl.com/publications/TR2023-021}
- }
Cherian, A., Peng, K.-C., Lohit, S., Smith, K., Tenenbaum, J.B., "Are Deep Neural Networks SMARTer than Second Graders?", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), March 2023, pp. 10834-10844.
BibTeX TR2023-014 PDF Video Data Software Presentation
- @inproceedings{Cherian2023mar,
- author = {Cherian, Anoop and Peng, Kuan-Chuan and Lohit, Suhas and Smith, Kevin and Tenenbaum, Joshua B.},
- title = {Are Deep Neural Networks SMARTer than Second Graders?},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- pages = {10834--10844},
- month = mar,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2023-014}
- }
Mateus, A., Ranade, S., Ramalingam, S., Miraldo, P., "Fast and Accurate 3D Registration from Line Intersection Constraints", International Journal of Computer Vision, DOI: 10.1007/s11263-023-01774-1, Vol. 131, pp. 2044-2069, February 2023.
BibTeX TR2023-007 PDF
- @article{Mateus2023feb,
- author = {Mateus, Andre and Ranade, Siddhant and Ramalingam, Srikumar and Miraldo, Pedro},
- title = {Fast and Accurate 3D Registration from Line Intersection Constraints},
- journal = {International Journal of Computer Vision},
- year = 2023,
- volume = 131,
- pages = {2044--2069},
- month = feb,
- doi = {10.1007/s11263-023-01774-1},
- url = {https://www.merl.com/publications/TR2023-007}
- }
Aich, A., Peng, K.-C., Roy-Chowdhury, A.K., "Cross-Domain Video Anomaly Detection without Target Domain Adaptation", IEEE Winter Conference on Applications of Computer Vision (WACV), Crandall, D. and Gong, B. and Lee, Y. J. and Souvenir, R. and Yu, S., Eds., DOI: 10.1109/WACV56688.2023.00261, January 2023, pp. 2578-2590.
BibTeX TR2023-001 PDF Video Presentation
- @inproceedings{Aich2023jan,
- author = {Aich, Abhishek and Peng, Kuan-Chuan and Roy-Chowdhury, Amit K.},
- title = {Cross-Domain Video Anomaly Detection without Target Domain Adaptation},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2023,
- editor = {Crandall, D. and Gong, B. and Lee, Y. J. and Souvenir, R. and Yu, S.},
- pages = {2578--2590},
- month = jan,
- publisher = {IEEE},
- doi = {10.1109/WACV56688.2023.00261},
- issn = {2642-9381},
- isbn = {978-1-6654-9346-8},
- url = {https://www.merl.com/publications/TR2023-001}
- }
Shimoya, R., Morimoto, T., van Baar, J., Boufounos, P.T., Ma, Y., Mansour, H., "Learning Occlusion-Aware Dense Correspondences for Multi-Modal Images", IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS), DOI: 10.1109/AVSS56176.2022.9959354, November 2022, pp. 1-8.
BibTeX TR2022-149 PDF
- @inproceedings{Shimoya2022nov,
- author = {Shimoya, Ryosuke and Morimoto, Tahashi and van Baar, Jeroen and Boufounos, Petros T. and Ma, Yanting and Mansour, Hassan},
- title = {Learning Occlusion-Aware Dense Correspondences for Multi-Modal Images},
- booktitle = {IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)},
- year = 2022,
- pages = {1--8},
- month = nov,
- doi = {10.1109/AVSS56176.2022.9959354},
- isbn = {978-1-6654-6382-9},
- url = {https://www.merl.com/publications/TR2022-149}
- }
Romero, D., Lohit, S., "Learning Partial Equivariances from Data", Advances in Neural Information Processing Systems (NeurIPS), S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh, Eds., November 2022, pp. 36466-36478.
BibTeX TR2022-148 PDF Software Presentation
- @inproceedings{Romero2022nov,
- author = {Romero, David and Lohit, Suhas},
- title = {Learning Partial Equivariances from Data},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},
- pages = {36466--36478},
- month = nov,
- url = {https://www.merl.com/publications/TR2022-148}
- }
Wang, H., Lohit, S., Jones, M.J., Fu, R., "What Makes a “Good” Data Augmentation in Knowledge Distillation – A Statistical Perspective", Advances in Neural Information Processing Systems (NeurIPS), S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh, Eds., November 2022, pp. 13456-13469.
BibTeX TR2022-147 PDF
- @inproceedings{Wang2022nov,
- author = {Wang, Huan and Lohit, Suhas and Jones, Michael J. and Fu, Raymond},
- title = {What Makes a “Good” Data Augmentation in Knowledge Distillation – A Statistical Perspective},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},
- pages = {13456--13469},
- month = nov,
- url = {https://www.merl.com/publications/TR2022-147}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation", Advances in Neural Information Processing Systems (NeurIPS), November 2022.
BibTeX TR2022-140 PDF Presentation
- @inproceedings{Chatterjee2022nov,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- month = nov,
- url = {https://www.merl.com/publications/TR2022-140}
- }
Ahmed, S.M., Lohit, S., Peng, K.-C., Jones, M.J., Roy Chowdhury, A.K., "Cross-Modal Knowledge Transfer Without Task-Relevant Source Data", European Conference on Computer Vision (ECCV), Avidan, S and Brostow, G and Cisse M and Farinella, G.M. and Hassner T., Eds., DOI: 10.1007/978-3-031-19830-4_7, October 2022, pp. 111-127.
BibTeX TR2022-135 PDF Video Software Presentation
- @inproceedings{Ahmed2022oct,
- author = {{Ahmed, Sk Miraj and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Roy Chowdhury, Amit K.}},
- title = {Cross-Modal Knowledge Transfer Without Task-Relevant Source Data},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2022,
- editor = {Avidan, S and Brostow, G and Cisse M and Farinella, G.M. and Hassner T.},
- pages = {111--127},
- month = oct,
- publisher = {Springer},
- doi = {10.1007/978-3-031-19830-4_7},
- isbn = {978-3-031-19830-4},
- url = {https://www.merl.com/publications/TR2022-135}
- }
Paul, S., Roy Chowdhury, A.K., Cherian, A., "AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments", Advances in Neural Information Processing Systems (NeurIPS), October 2022, pp. 6236-6249.
BibTeX TR2022-131 PDF Video Data Software
- @inproceedings{Paul2022oct2,
- author = {Paul, Sudipta and Roy Chowdhury, Amit K and Cherian, Anoop},
- title = {AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- pages = {6236--6249},
- month = oct,
- url = {https://www.merl.com/publications/TR2022-131}
- }