Publications

Paul, S., Roy Chowdhury, A.K., Cherian, A., "AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments", Advances in Neural Information Processing Systems (NeurIPS), October 2022, pp. 6236-6249.
BibTeX TR2022-131 PDF Video Data Software
- @inproceedings{Paul2022oct2,
- author = {Paul, Sudipta and Roy Chowdhury, Amit K and Cherian, Anoop},
- title = {AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- pages = {6236--6249},
- month = oct,
- url = {https://www.merl.com/publications/TR2022-131}
- }
Mansour, H., Lohit, S., Boufounos, P.T., "Distributed Radar Autofocus Imaging Using Deep Priors", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP46576.2022.9897332, October 2022, pp. 2511-2515.
BibTeX TR2022-129 PDF Video
- @inproceedings{Mansour2022oct,
- author = {{Mansour, Hassan and Lohit, Suhas and Boufounos, Petros T.}},
- title = {Distributed Radar Autofocus Imaging Using Deep Priors},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2022,
- pages = {2511--2515},
- month = oct,
- doi = {10.1109/ICIP46576.2022.9897332},
- url = {https://www.merl.com/publications/TR2022-129}
- }
Hori, C., Hori, T., Le Roux, J., "Low-Latency Streaming Scene-aware Interaction Using Audio-Visual Transformers", Interspeech, DOI: 10.21437/Interspeech.2022-10891, September 2022, pp. 4511-4515.
BibTeX TR2022-116 PDF
- @inproceedings{Hori2022sep,
- author = {Hori, Chiori and Hori, Takaaki and Le Roux, Jonathan},
- title = {Low-Latency Streaming Scene-aware Interaction Using Audio-Visual Transformers},
- booktitle = {Interspeech},
- year = 2022,
- pages = {4511--4515},
- month = sep,
- doi = {10.21437/Interspeech.2022-10891},
- url = {https://www.merl.com/publications/TR2022-116}
- }
Rambhatla, S., Jones, M.J., Chellappa, R., "An Empirical Analysis of Boosting Deep Networks", International Joint Conference on Neural Networks (IJCNN), DOI: 10.1109/IJCNN55064.2022.9892204, July 2022.
BibTeX TR2022-075 PDF Presentation
- @inproceedings{Rambhatla2022jul,
- author = {Rambhatla, Sai and Jones, Michael J. and Chellappa, Rama},
- title = {An Empirical Analysis of Boosting Deep Networks},
- booktitle = {International Joint Conference on Neural Networks (IJCNN)},
- year = 2022,
- month = jul,
- doi = {10.1109/IJCNN55064.2022.9892204},
- url = {https://www.merl.com/publications/TR2022-075}
- }
Miraldo, P., Iglesias, J.P., "A Unified Model for Line Projections in Catadioptric Cameras with Rotationally Symmetric Mirrors", IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR52688.2022.01534, June 2022.
BibTeX TR2022-084 PDF
- @inproceedings{Miraldo2022jun,
- author = {Miraldo, Pedro and Iglesias, Jose Pedro},
- title = {A Unified Model for Line Projections in Catadioptric Cameras with Rotationally Symmetric Mirrors},
- booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2022,
- month = jun,
- doi = {10.1109/CVPR52688.2022.01534},
- url = {https://www.merl.com/publications/TR2022-084}
- }
Sullivan, A., Wang, J., Li, X., Chen, S., Abbot, L., "PointMotionNet: Point-Wise Motion Learning for Large-Scale LiDAR Point Clouds Sequences", CVPR Workshop on Autonomous Driving, June 2022.
BibTeX TR2022-083 PDF
- @inproceedings{Sullivan2022jun,
- author = {Sullivan, Alan and Wang, Jun and Li, Xiaolong and Chen, Siheng and Abbot, Lynn},
- title = {PointMotionNet: Point-Wise Motion Learning for Large-Scale LiDAR Point Clouds Sequences},
- booktitle = {CVPR Workshop on Autonomous Driving},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-083}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), June 2022.
BibTeX TR2022-082 PDF
- @inproceedings{Chatterjee2022jun,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-082}
- }
Zhu, X., Jain, S., Tomizuka, M., van Baar, J., "Learning to Synthesize Volumetric Meshes from Vision-based Tactile Imprints", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA46639.2022.9812092, May 2022, pp. 4833-4839.
BibTeX TR2022-055 PDF
- @inproceedings{Zhu2022may2,
- author = {Zhu, Xinghao and Jain, Siddarth and Tomizuka, Masayoshi and van Baar, Jeroen},
- title = {Learning to Synthesize Volumetric Meshes from Vision-based Tactile Imprints},
- booktitle = {2022 IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2022,
- pages = {4833--4839},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA46639.2022.9812092},
- isbn = {978-1-7281-9681-7},
- url = {https://www.merl.com/publications/TR2022-055}
- }
Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF
- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and Le Roux, Jonathan and Hori, Chiori},
- title = {Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
Hori, C., Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Le Roux, J., Marks, T.K., "Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10", The 10th Dialog System Technology Challenge Workshop at AAAI, February 2022.
BibTeX TR2022-016 PDF
- @inproceedings{Hori2022feb,
- author = {Hori, Chiori and Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Le Roux, Jonathan and Marks, Tim K.},
- title = {Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10},
- booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-016}
- }
Shah, A.P., Hori, T., Le Roux, J., Hori, C., "DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning", The 10th Dialog System Technology Challenge Workshop at AAAI 2022, February 2022.
BibTeX TR2022-025 PDF
- @inproceedings{Shah2022feb,
- author = {{Shah, Ankit Parag and Hori, Takaaki and Le Roux, Jonathan and Hori, Chiori}},
- title = {DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning},
- booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI 2022},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-025}
- }
Cherian, A., Hori, C., Marks, T.K., Le Roux, J., "(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i1.19922, February 2022, pp. 444-453.
BibTeX TR2022-014 PDF Video Presentation
- @inproceedings{Cherian2022feb,
- author = {Cherian, Anoop and Hori, Chiori and Marks, Tim K. and Le Roux, Jonathan},
- title = {(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {444--453},
- month = feb,
- doi = {10.1609/aaai.v36i1.19922},
- url = {https://www.merl.com/publications/TR2022-014}
- }
Ke, L., Peng, K.-C., Lyu, S., "Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition", AAAI Conference on Artificial Intelligence, February 2022.
BibTeX TR2022-015 PDF Presentation
- @inproceedings{Ke2022feb,
- author = {Ke, Lipeng and Peng, Kuan-Chuan and Lyu, Siwei},
- title = {Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-015}
- }
Shah, A., Sra, S., Chellappa, R., Cherian, A., "Max-Margin Contrastive Learning", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i8.20796, February 2022, pp. 8220-8230.
BibTeX TR2022-013 PDF
- @inproceedings{Shah2022feb,
- author = {Shah, Anshul and Sra, Suvrit and Chellappa, Rama and Cherian, Anoop},
- title = {Max-Margin Contrastive Learning},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {8220--8230},
- month = feb,
- doi = {10.1609/aaai.v36i8.20796},
- url = {https://www.merl.com/publications/TR2022-013}
- }
Medin, S.C., Egger, B., Cherian, A., Wang, Y., Tenenbaum, J.B., Liu, X., Marks, T.K., "MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i2.20091, February 2022, pp. 1962-1971.
BibTeX TR2022-011 PDF Video Data Presentation
- @inproceedings{Medin2022feb,
- author = {Medin, Safa C. and Egger, Bernhard and Cherian, Anoop and Wang, Ye and Tenenbaum, Joshua B. and Liu, Xiaoming and Marks, Tim K.},
- title = {MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {1962--1971},
- month = feb,
- doi = {10.1609/aaai.v36i2.20091},
- url = {https://www.merl.com/publications/TR2022-011}
- }
Lohit, S., Jones, M.J., "Model Compression Using Optimal Transport", IEEE Winter Conference on Applications of Computer Vision (WACV), January 2022.
BibTeX TR2022-006 PDF Presentation
- @inproceedings{Lohit2022jan,
- author = {Lohit, Suhas and Jones, Michael J.},
- title = {Model Compression Using Optimal Transport},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2022,
- month = jan,
- publisher = {CVF OpenAccess},
- url = {https://www.merl.com/publications/TR2022-006}
- }
Yu, X., van Baar, J., Chen, S., Sullivan, A., "Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph", International Conference on 3D Vision (3DV), DOI: 10.1109/3DV53792.2021.00060, December 2021, pp. 505-514.
BibTeX TR2021-143 PDF
- @inproceedings{Yu2021dec,
- author = {Yu, Xin and van Baar, Jeroen and Chen, Siheng and Sullivan, Alan},
- title = {Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph},
- booktitle = {International Conference on 3D Vision (3DV)},
- year = 2021,
- pages = {505--514},
- month = dec,
- doi = {10.1109/3DV53792.2021.00060},
- url = {https://www.merl.com/publications/TR2021-143}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 9751-9761.
BibTeX TR2021-096 PDF Video
- @inproceedings{Chatterjee2021oct2,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {9751--9761},
- month = oct,
- url = {https://www.merl.com/publications/TR2021-096}
- }
Chatterjee, M., Le Roux, J., Ahuja, N., Cherian, A., "Visual Scene Graphs for Audio Source Separation", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 1204-1213.
BibTeX TR2021-095 PDF Video Software
- @inproceedings{Chatterjee2021oct,
- author = {Chatterjee, Moitreya and Le Roux, Jonathan and Ahuja, Narendra and Cherian, Anoop},
- title = {Visual Scene Graphs for Audio Source Separation},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {1204--1213},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-095}
- }
Cherian, A., Pais, G., Jain, S., Marks, T.K., Sullivan, A., "InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 10023-10032.
BibTeX TR2021-097 PDF Video Data Software Presentation
- @inproceedings{Cherian2021oct,
- author = {Cherian, Anoop and Pais, Goncalo and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {10023--10032},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-097}
- }
Kannapiran, S., van Baar, J., Berman, S., "A Visual Inertial Odometry Framework for 3D Points, Lines and Planes", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), DOI: 10.1109/IROS51168.2021.9636526, September 2021.
BibTeX TR2021-131 PDF
- @inproceedings{Kannapiran2021sep,
- author = {Kannapiran, Shenbagaraj and van Baar, Jeroen and Berman, Spring},
- title = {A Visual Inertial Odometry Framework for 3D Points, Lines and Planes},
- booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2021,
- month = sep,
- doi = {10.1109/IROS51168.2021.9636526},
- url = {https://www.merl.com/publications/TR2021-131}
- }
Comas, A., Marks, T.K., Mansour, H., Lohit, S., Ma, Y., Liu, X., "TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506663, September 2021, pp. 309-313.
BibTeX TR2021-099 PDF
- @inproceedings{Comas2021sep,
- author = {Comas, Armand and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas and Ma, Yechi and Liu, Xiaoming},
- title = {TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- pages = {309--313},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506663},
- url = {https://www.merl.com/publications/TR2021-099}
- }
Das, P., Ortega, A., Chen, S., Mansour, H., Vetro, A., "Application-agnostic spatio-temporal hand graph representations for stable activity understanding", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506054, September 2021, pp. 1074-1078.
BibTeX TR2021-112 PDF
- @inproceedings{Das2021sep,
- author = {Das, Pratyusha and Ortega, Antonio and Chen, Siheng and Mansour, Hassan and Vetro, Anthony},
- title = {Application-agnostic spatio-temporal hand graph representations for stable activity understanding},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- pages = {1074--1078},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506054},
- url = {https://www.merl.com/publications/TR2021-112}
- }
Shi, L., Liu, D., Thornton, J.E., "Robust Camera Pose Estimation for Image Stitching", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506680, September 2021.
BibTeX TR2021-113 PDF
- @inproceedings{Shi2021sep,
- author = {Shi, Laixi and Liu, Dehong and Thornton, Jay E.},
- title = {Robust Camera Pose Estimation for Image Stitching},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506680},
- isbn = {978-1-6654-4115-5},
- url = {https://www.merl.com/publications/TR2021-113}
- }
Hu, W., Pang, J., Liu, X., Tian, D., Lin, C.-W., Vetro, A., "Graph Signal Processing for Geometric Data and Beyond: Theory and Applications", IEEE Transactions on Multimedia, DOI: 10.1109/TMM.2021.3111440, Vol. 24, pp. 3961-3977, September 2021.
BibTeX TR2021-121 PDF
- @article{Hu2021oct,
- author = {Hu, Wei and Pang, Jiahao and Liu, Xianming and Tian, Dong and Lin, Chia-Wen and Vetro, Anthony},
- title = {Graph Signal Processing for Geometric Data and Beyond: Theory and Applications},
- journal = {IEEE Transactions on Multimedia},
- year = 2021,
- volume = 24,
- pages = {3961--3977},
- month = sep,
- doi = {10.1109/TMM.2021.3111440},
- issn = {1941-0077},
- url = {https://www.merl.com/publications/TR2021-121}
- }