[ { "id": "-8Kt5J-9AI", "title": "Tailoring Visual Object Representations to Human Requirements: A Case Study with a Recycling Robot", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robots are well-suited to alleviate the burden of repetitive and tedious manipulation tasks. In many applications though, a robot may be asked to interact with a wide variety of objects, making it hard or even impossible to pre-program visual object classifiers suitable for the task of interest. In this work, we study the problem of learning a classifier for visual objects based on a few examples provided by humans. We frame this problem from the perspective of learning a suitable visual object representation that allows us to distinguish the desired object category from others. Our proposed approach integrates human supervision into the representation learning process by combining contrastive learning with an additional loss function that brings the representations of human examples close to each other in the latent space. Our experiments show that our proposed method performs better than self-supervised and fully supervised learning methods in offline evaluations and can also be used in real-time by a robot in a simplified recycling domain, where recycling streams contain a variety of objects.", "keywords": "Representation Learning;Contrastive Learning;Human-in-the-loop Robot Learning", "primary_area": "", "supplementary_material": "/attachment/1b017dc0a9ed0b63ec5d9b34a1e87d4e6dec0cf4.zip", "author": "Debasmita Ghose;Michal Adam Lewkowicz;Kaleb Gezahegn;Julian Lee;Timothy Adamson;Marynel Vazquez;Brian Scassellati", "authorids": "~Debasmita_Ghose1;~Michal_Adam_Lewkowicz1;~Kaleb_Gezahegn1;~Julian_Lee1;~Timothy_Adamson1;~Marynel_Vazquez1;~Brian_Scassellati1", "gender": "F;M;M;M;M;;M", "homepage": "https://www.debasmitaghose.com/;;;;;https://www.marynel.net;http://scazlab.yale.edu", "dblp": "239/4905;;;;;96/9255;32/818.html", "google_scholar": "cgF857gAAAAJ;2TWe9N4AAAAJ;;;-NVFbtsAAAAJ;-wm8urcAAAAJ;0jSdqoEAAAAJ", "orcid": ";;;;;0000-0003-0698-5472;", "linkedin": "debasmita-ghose-59859763/;michal-lewkowicz-9a4824232/;kalebg/;http://www.linkedin.com/in/julian-lee-87b620230;tim-adamson-b79432b1/;;", "or_profile": "~Debasmita_Ghose1;~Michal_Adam_Lewkowicz1;~Kaleb_Gezahegn1;~Julian_Lee1;~Timothy_Adamson1;~Marynel_Vazquez1;~Brian_Scassellati1", "aff": "Yale University;Department of Computer Science, Yale University;Yale University;Yale University;;Yale University;Yale University", "aff_domain": "yale.edu;cs.yale.edu;yale.edu;yale.edu;;yale.edu;yale.edu", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nghose2022tailoring,\ntitle={Tailoring Visual Object Representations to Human Requirements: A Case Study with a Recycling Robot},\nauthor={Debasmita Ghose and Michal Adam Lewkowicz and Kaleb Gezahegn and Julian Lee and Timothy Adamson and Marynel Vazquez and Brian Scassellati},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=-8Kt5J-9AI}\n}", "github": "https://github.com/ScazLab/HumanSupContrastiveClustering", "project": "", "reviewers": "nRzh;2cDu;o67U;F5P5", "site": "https://openreview.net/forum?id=-8Kt5J-9AI", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16592115594997230244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "00OqOl4txhe", "title": "Learning Interpretable BEV Based VIO without Deep Neural Networks", "track": "main", "status": "Poster", "tldr": "A fully differentiable, and interpretable, BEV based VIO model for robots with local planar motion that can be trained without deep neural networks. ", "abstract": "Monocular visual-inertial odometry (VIO) is a critical problem in robotics and autonomous driving. Traditional methods solve this problem based on filtering or optimization. While being fully interpretable, they rely on manual interference and empirical parameter tuning. On the other hand, learning-based approaches allow for end-to-end training but require a large number of training data to learn millions of parameters. However, the non-interpretable and heavy models hinder the generalization ability. In this paper, we propose a fully differentiable, and interpretable, bird-eye-view (BEV) based VIO model for robots with local planar motion that can be trained without deep neural networks. Specifically, we first adopt Unscented Kalman Filter as a differentiable layer to predict the pitch and roll, where the covariance matrices of noise are learned to filter out the noise of the IMU raw data. Second, the refined pitch and roll are adopted to retrieve a gravity-aligned BEV image of each frame using differentiable camera projection. Finally, a differentiable pose estimator is utilized to estimate the remaining 3 DoF poses between the BEV frames: leading to a 5 DoF pose estimation. Our method allows for learning the covariance matrices end-to-end supervised by the pose estimation loss, demonstrating superior performance to empirical baselines. Experimental results on synthetic and real-world datasets demonstrate that our simple approach is competitive with state-of-the-art methods and generalizes well on unseen scenes.", "keywords": "VIO;Interpretable Learning", "primary_area": "", "supplementary_material": "/attachment/628db0c9334ab9d7d31ee5981cfbf6baa0bf0ade.zip", "author": "Zexi Chen;Haozhe Du;Xuecheng XU;Rong Xiong;Yiyi Liao;Yue Wang", "authorids": "~Zexi_Chen1;hzdu@zju.edu.cn;~Xuecheng_XU1;~Rong_Xiong1;~Yiyi_Liao2;~Yue_Wang1", "gender": "M;;M;;F;M", "homepage": "https://github.com/jessychen1016;;https://maverickpeter.github.io/;;https://yiyiliao.github.io/;https://ywang-zju.github.io/", "dblp": "194/7622;;;;139/0761;", "google_scholar": "UOqXUW4AAAAJ;;qsU_c2MAAAAJ;1hI9bqUAAAAJ;lTBMax0AAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Zexi_Chen1;hzdu@zju.edu.cn;~Xuecheng_XU1;~Rong_Xiong1;~Yiyi_Liao2;~Yue_Wang1", "aff": "Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;;PhD student;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2022learning,\ntitle={Learning Interpretable {BEV} Based {VIO} without Deep Neural Networks},\nauthor={Zexi Chen and Haozhe Du and Xuecheng XU and Rong Xiong and Yiyi Liao and Yue Wang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=00OqOl4txhe}\n}", "github": "", "project": "", "reviewers": "3Som;vD2G;p14S;Me8P", "site": "https://openreview.net/forum?id=00OqOl4txhe", "pdf_size": 0, "rating": "1;4;10;10", "confidence": "", "rating_avg": 6.25, "confidence_avg": 0, "replies_avg": 34, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15549143225391826996&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0nb97NQypbK", "title": "Residual Skill Policies: Learning an Adaptable Skill-based Action Space for Reinforcement Learning for Robotics", "track": "main", "status": "Poster", "tldr": "We introduce a general skill-based RL framework that allows for fine-grained skill-adaptation to task variations not captured by fixed skill spaces and propose a flows-based skill prior for accelerated, state-conditioned exploration.", "abstract": "Skill-based reinforcement learning (RL) has emerged as a promising strategy to leverage prior knowledge for accelerated robot learning. Skills are typically extracted from expert demonstrations and are embedded into a latent space from which they can be sampled as actions by a high-level RL agent. However, this \\textit{skill space} is expansive, and not all skills are relevant for a given robot state, making exploration difficult. Furthermore, the downstream RL agent is limited to learning structurally similar tasks to those used to construct the skill space. We firstly propose accelerating exploration in the skill space using state-conditioned generative models to directly bias the high-level agent towards only \\textit{sampling} skills relevant to a given state based on prior experience. Next, we propose a low-level residual policy for fine-grained \\textit{skill adaptation} enabling downstream RL agents to adapt to unseen task variations. Finally, we validate our approach across four challenging manipulation tasks that differ from those used to build the skill space, demonstrating our ability to learn across task variations while significantly accelerating exploration, outperforming prior works.", "keywords": "Reinforcement Learning;Skill Learning;Transfer Learning;Residual Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/06699e38ee009fcf5158cfbf44921b16ce2868ec.zip", "author": "Krishan Rana;Ming Xu;Brendan Tidd;Michael Milford;Niko Suenderhauf", "authorids": "~Krishan_Rana1;~Ming_Xu5;~Brendan_Tidd1;~Michael_Milford1;~Niko_Suenderhauf1", "gender": "M;M;M;M;M", "homepage": "https://krishanrana.github.io/;;https://data61.csiro.au/;https://staff.qut.edu.au/staff/michael.milford;http://nikosuenderhauf.info", "dblp": "70/4142;43/3362-15;;01/4027;", "google_scholar": "-hYjPxsAAAAJ;https://scholar.google.com/citations?hl=en;-oTAAHQAAAAJ;TDSmCKgAAAAJ;https://scholar.google.com.au/citations?user=WnKjfFEAAAAJ", "orcid": "0000-0002-9028-9295;;;0000-0002-5162-1793;", "linkedin": "krishanrana/;ming-xu-2a21a754/;brendan-tidd-8100101b8/;michaeljmilford/;nikosuenderhauf/", "or_profile": "~Krishan_Rana1;~Ming_Xu5;~Brendan_Tidd1;~Michael_Milford1;~Niko_Suenderhauf1", "aff": "Queensland University of Technology;Queensland University of Technology;Queensland University of Technology;Queensland University of Technology;Queensland University of Technology", "aff_domain": "qut.edu.au;qut.edu.au;qut.edu.au;qut.edu.au;qut.edu.au", "position": "PhD student;PhD student;PhD student;Joint Director;Associate Professor", "bibtex": "@inproceedings{\nrana2022residual,\ntitle={Residual Skill Policies: Learning an Adaptable Skill-based Action Space for Reinforcement Learning for Robotics},\nauthor={Krishan Rana and Ming Xu and Brendan Tidd and Michael Milford and Niko Suenderhauf},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=0nb97NQypbK}\n}", "github": "https://github.com/krishanrana/reskill", "project": "", "reviewers": "HjV5;fBFD;daKY;eiyG", "site": "https://openreview.net/forum?id=0nb97NQypbK", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5066870300546776766&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Queensland University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.qut.edu.au", "aff_unique_abbr": "QUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "id": "1Pt2FM5BLdi", "title": "Towards Online 3D Bin Packing: Learning Synergies between Packing and Unpacking via DRL", "track": "main", "status": "Poster", "tldr": "", "abstract": "There is an emerging research interest in addressing the online 3D bin packing problem (3D-BPP), which has a wide range of applications in logistics industry. However, neither heuristic methods nor those based on deep reinforcement learning (DRL) outperform human packers in real logistics scenarios. One important reason is that humans can make corrections after performing inappropriate packing actions by unpacking incorrectly packed items. Inspired by such an unpacking mechanism, we present a DRL-based packing-and-unpacking network (PUN) to learn the synergies between the two actions for the online 3D-BPP. Experimental results demonstrate that PUN achieves the state-of-the-art performance and the supplementary video shows that the system based on PUN can reliably complete the online 3D bin packing task in the real world.", "keywords": "Bin packing;Robotics;Deep reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/964a45a927921994d9b3f426b340b1dfe25c75ab.zip", "author": "Shuai Song;Shuo Yang;Ran Song;Shilei Chu;yibin Li;Wei Zhang", "authorids": "~Shuai_Song1;~Shuo_Yang10;~Ran_Song2;~Shilei_Chu2;~yibin_Li1;~Wei_Zhang7", "gender": ";;M;;M;", "homepage": ";;https://faculty.sdu.edu.cn/songran/en/index.htm;;https://www.vsislab.com;http://www.sleichu.cn/", "dblp": ";;10/8738;;;", "google_scholar": ";;;;;", "orcid": "0000-0002-4879-0780;0000-0002-3126-8021;;;;0000-0002-3008-3164", "linkedin": ";;;;;", "or_profile": "~Shuai_Song1;~Shuo_Yang10;~Ran_Song2;~yibin_Li1;~Wei_Zhang7;~Shilei_CHU1", "aff": "Shandong University;Shandong University;Shandong University;;Shandong University;Shandong University", "aff_domain": "sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;;sdu.edu.cn;sdu.edu.cn", "position": "MS student;PhD student;Full Professor;;Full Professor;MS student", "bibtex": "@inproceedings{\nsong2022towards,\ntitle={Towards Online 3D Bin Packing: Learning Synergies between Packing and Unpacking via {DRL}},\nauthor={Shuai Song and Shuo Yang and Ran Song and Shilei Chu and yibin Li and Wei Zhang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=1Pt2FM5BLdi}\n}", "github": "", "project": "", "reviewers": "axHb;Wn5v;WbWp;G14Q", "site": "https://openreview.net/forum?id=1Pt2FM5BLdi", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8844434045634742466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shandong University", "aff_unique_dep": "", "aff_unique_url": "http://www.sdu.edu.cn", "aff_unique_abbr": "SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1vV0JRA2HY0", "title": "Do we use the Right Measure? Challenges in Evaluating Reward Learning Algorithms", "track": "main", "status": "Poster", "tldr": "We describe shortcomings in different measures used to evaluate reward learning algorithms in human robot interaction.", "abstract": "Reward learning is a highly active area of research in human-robot interaction (HRI), allowing a broad range of users to specify complex robot behaviour. Experiments with simulated user input play a major role in the development and evaluation of reward learning algorithms due to the availability of a ground truth. In this paper, we review measures for evaluating reward learning algorithms used in HRI, most of which fall into two classes. In a theoretical worst case analysis and several examples, we show that both classes of measures can fail to effectively indicate how good the learned robot behaviour is. Thus, our work contributes to the characterization of sim-to-real gaps of reward learning in HRI.", "keywords": "Human Robot Interaction;Reward Learning", "primary_area": "", "supplementary_material": "/attachment/470e70c389938522be563a858c427cff47f4f850.zip", "author": "Nils Wilde;Javier Alonso-Mora", "authorids": "~Nils_Wilde1;j.alonsomora@tudelft.nl", "gender": "M;", "homepage": "https://sites.google.com/view/nwilde/home;", "dblp": ";", "google_scholar": ";", "orcid": "0000-0003-3238-8153;", "linkedin": ";", "or_profile": "~Nils_Wilde1;j.alonsomora@tudelft.nl", "aff": "Delft University of Technology;", "aff_domain": "tudelft.nl;", "position": "Postdoc;", "bibtex": "@inproceedings{\nwilde2022do,\ntitle={Do we use the Right Measure? Challenges in Evaluating Reward Learning Algorithms},\nauthor={Nils Wilde and Javier Alonso-Mora},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=1vV0JRA2HY0}\n}", "github": "", "project": "", "reviewers": "PQx1;ktP2;LhcT;GMPv", "site": "https://openreview.net/forum?id=1vV0JRA2HY0", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 39, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14969874562218263687&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "id": "1zbWQxFIU-", "title": "MegaPose: 6D Pose Estimation of Novel Objects via Render & Compare", "track": "main", "status": "Poster", "tldr": "A method for 6D pose estimation trained on synthetic data that can be used for any previously unseen object with CAD model.", "abstract": "We introduce MegaPose, a method to estimate the 6D pose of novel objects, that is, objects unseen during training. At inference time, the method only assumes knowledge of (i) a region of interest displaying the object in the image and (ii) a CAD model of the observed object. The contributions of this work are threefold. First, we present a 6D pose refiner based on a render&compare strategy which can be applied to novel objects. The shape and coordinate system of the novel object are provided as inputs to the network by rendering multiple synthetic views of the object's CAD model. Second, we introduce a novel approach for coarse pose estimation which leverages a network trained to classify whether the pose error between a synthetic rendering and an observed image of the same object can be corrected by the refiner. Third, we introduce a large-scale synthetic dataset of photorealistic images of thousands of objects with diverse visual and shape properties and show that this diversity is crucial to obtain good generalization performance on novel objects. We train our approach on this large synthetic dataset and apply it without retraining to hundreds of novel objects in real images from several pose estimation benchmarks. Our approach achieves state-of-the-art performance on the ModelNet and YCB-Video datasets. An extensive evaluation on the 7 core datasets of the BOP challenge demonstrates that our approach achieves performance competitive with existing approaches that require access to the target objects during training. Code, dataset and trained models are available on the project page: https://megapose6d.github.io/.", "keywords": "Object Pose Estimation;Robot Manipulation;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/defcbc76d22089c7356c7ca0be3204aa596a90b6.zip", "author": "Yann Labb\u00e9;Lucas Manuelli;Arsalan Mousavian;Stephen Tyree;Stan Birchfield;Jonathan Tremblay;Justin Carpentier;Mathieu Aubry;Dieter Fox;Josef Sivic", "authorids": "~Yann_Labb\u00e91;~Lucas_Manuelli1;~Arsalan_Mousavian1;~Stephen_Tyree1;~Stan_Birchfield1;~Jonathan_Tremblay1;~Justin_Carpentier1;~Mathieu_Aubry3;~Dieter_Fox1;~Josef_Sivic1", "gender": ";M;M;M;M;Not Specified;M;;M;M", "homepage": ";http://lucasmanuelli.com;https://cs.gmu.edu/~amousavi/;https://swtyree.github.io;https://cecas.clemson.edu/~stb/;https://jtremblay.org/;https://jcarpent.github.io;http://imagine.enpc.fr/~aubrym/;https://homes.cs.washington.edu/~fox/;http://people.ciirc.cvut.cz/~sivic", "dblp": ";;164/8572;60/1032;b/StanBirchfield;17/8925;173/7498;57/10067;f/DieterFox;71/5006", "google_scholar": ";0pxg5ssAAAAJ;fcA9m88AAAAJ;;_bKTUqAAAAAJ;https://scholar.google.ca/citations?user=zeS5UJEAAAAJ;https://scholar.google.fr/citations?user=CyhIdmMAAAAJ;https://scholar.google.fr/citations?user=0MiPsosAAAAJ;DqXsbPAAAAAJ;https://scholar.google.fr/citations?user=NCtKHnQAAAAJ", "orcid": ";;;;;;;0000-0002-3804-0193;;", "linkedin": ";;;;;;;;;", "or_profile": "~Yann_Labb\u00e91;~Lucas_Manuelli1;~Arsalan_Mousavian1;~Stephen_Tyree1;~Stan_Birchfield1;~Jonathan_Tremblay1;~Justin_Carpentier1;~Mathieu_Aubry3;~Dieter_Fox1;~Josef_Sivic1", "aff": ";NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;INRIA;ENPC;Department of Computer Science;Czech Technical University in Prague", "aff_domain": ";nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;inria.fr;enpc.fr;cs.washington.edu;cvut.cz", "position": ";Researcher;Research Scientist;Research scientist;Principal Research Scientist;Researcher;Researcher;Principal Researcher;Full Professor;Principal investigator", "bibtex": "@inproceedings{\nlabb{\\'e}2022megapose,\ntitle={MegaPose: 6D Pose Estimation of Novel Objects via Render \\& Compare},\nauthor={Yann Labb{\\'e} and Lucas Manuelli and Arsalan Mousavian and Stephen Tyree and Stan Birchfield and Jonathan Tremblay and Justin Carpentier and Mathieu Aubry and Dieter Fox and Josef Sivic},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=1zbWQxFIU-}\n}", "github": "", "project": "", "reviewers": "sFyM;KfxR;VhYw", "site": "https://openreview.net/forum?id=1zbWQxFIU-", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 10, "corr_rating_confidence": 0, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11293341730318769061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;0;1;2;3;4", "aff_unique_norm": "NVIDIA;INRIA;\u00c9cole Nationale des Ponts et Chauss\u00e9es;Unknown Institution;Czech Technical University", "aff_unique_dep": "NVIDIA Corporation;;;Department of Computer Science;", "aff_unique_url": "https://www.nvidia.com;https://www.inria.fr;https://www.enpc.fr;;https://www.ctu.cz", "aff_unique_abbr": "NVIDIA;INRIA;ENPC;;CTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Prague", "aff_country_unique_index": "0;0;0;0;0;1;1;3", "aff_country_unique": "United States;France;;Czech Republic" }, { "id": "2CSj965d9O4", "title": "Is Anyone There? Learning a Planner Contingent on Perceptual Uncertainty", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robots in complex multi-agent environments should reason about the intentions of observed and currently unobserved agents. In this paper, we present a new learning-based method for prediction and planning in complex multi-agent environments where the states of the other agents are partially-observed. Our approach, Active Visual Planning (AVP), uses high-dimensional observations to learn a flow-based generative model of multi-agent joint trajectories, including unobserved agents that may be revealed in the near future, depending on the robot's actions. Our predictive model is implemented using deep neural networks that map raw observations to future detection and pose trajectories and is learned entirely offline using a dataset of recorded observations (not ground-truth states). Once learned, our predictive model can be used for contingency planning over the potential existence, intentions, and positions of unobserved agents. We demonstrate the effectiveness of AVP on a set of autonomous driving environments inspired by real-world scenarios that require reasoning about the existence of other unobserved agents for safe and efficient driving. In these environments, AVP achieves optimal closed-loop performance, while methods that do not reason about potential unobserved agents exhibit either overconfident or underconfident behavior.", "keywords": "Forecasting;Planning;Partial Observability;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/1ece18d2c293aaafbe19608f278c92a02daafa01.zip", "author": "Charles Packer;Nicholas Rhinehart;Rowan Thomas McAllister;Matthew A. Wright;Xin Wang;Jeff He;Sergey Levine;Joseph E. Gonzalez", "authorids": "~Charles_Packer1;~Nicholas_Rhinehart1;~Rowan_Thomas_McAllister1;~Matthew_A._Wright1;~Xin_Wang1;~Jeff_He1;~Sergey_Levine1;~Joseph_E._Gonzalez1", "gender": ";M;M;;F;M;M;M", "homepage": ";https://leaf.utias.utoronto.ca/;https://rowanmcallister.github.io/;;https://people.eecs.berkeley.edu/~xinw/;;https://people.eecs.berkeley.edu/~svlevine/;http://eecs.berkeley.edu/~jegonzal", "dblp": ";153/2193;123/6416;;;;80/7594;61/8262", "google_scholar": ";xUGZX_MAAAAJ;https://scholar.google.co.uk/citations?user=6uIhh6MAAAAJ;;e9gUdKwAAAAJ;;8R35rCwAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ", "orcid": ";;0000-0002-9519-2345;;;;;0000-0003-2921-956X", "linkedin": ";;rowantmcallister;;xin-wang-aa83a577;jeff-he-99a96b163/;;", "or_profile": "~Charles_Packer1;~Nicholas_Rhinehart1;~Rowan_Thomas_McAllister1;~Matthew_A._Wright1;~Xin_Wang1;~Jeff_He1;~Sergey_Levine1;~Joseph_E._Gonzalez1", "aff": ";University of California, Berkeley;Toyota Research Institute;;Microsoft;University of California, Berkeley;Google;University of California, Berkeley", "aff_domain": ";berkeley.edu;tri.global;;microsoft.com;berkeley.edu;google.com;berkeley.edu", "position": ";Postdoc;Machine Learning Scientist;;Senior Researcher;Undergrad student;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\npacker2022is,\ntitle={Is Anyone There? Learning a Planner Contingent on Perceptual Uncertainty},\nauthor={Charles Packer and Nicholas Rhinehart and Rowan Thomas McAllister and Matthew A. Wright and Xin Wang and Jeff He and Sergey Levine and Joseph E. Gonzalez},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=2CSj965d9O4}\n}", "github": "", "project": "", "reviewers": "uLnS;ec4g;56zH;D5KJ", "site": "https://openreview.net/forum?id=2CSj965d9O4", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17211854915369718490&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "University of California, Berkeley;Toyota Research Institute;Microsoft;Google", "aff_unique_dep": ";;Microsoft Corporation;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.tri.global;https://www.microsoft.com;https://www.google.com", "aff_unique_abbr": "UC Berkeley;TRI;Microsoft;Google", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2cEjfernc5P", "title": "Selective Object Rearrangement in Clutter", "track": "main", "status": "Poster", "tldr": "We propose a method for tabletop object rearrangement that deals with cluttered initial scene, target object selectivity, and occupied goal positions -- the first system to address all three concurrently in a purely image-based setting.", "abstract": "We propose an image-based, learned method for selective tabletop object rearrangement in clutter using a parallel jaw gripper. Our method consists of three stages: graph-based object sequencing (which object to move), feature-based action selection (whether to push or grasp, and at what position and orientation) and a visual correspondence-based placement policy (where to place a grasped object). Experiments show that this decomposition works well in challenging settings requiring the robot to begin with an initially cluttered scene, selecting only the objects that need to be rearranged while discarding others, and dealing with cases where the goal location for an object is already occupied \u2013 making it the first system to address all these concurrently in a purely image-based setting. We also achieve an $\\sim$ 8% improvement in task success rate over the previously best reported result that handles both translation and orientation in less restrictive (un-cluttered, non-selective) settings. We demonstrate zero-shot transfer of our system solely trained in simulation to a real robot selectively rearranging up to everyday objects, many unseen during learning, on a crowded tabletop. Videos:https://sites.google.com/view/selective-rearrangement", "keywords": "Rearrangement;Robot Manipulation;Task and Motion Planning", "primary_area": "", "supplementary_material": "/attachment/684c073b150a24ef2372df3844673e55f03c5492.zip", "author": "Bingjie Tang;Gaurav S. Sukhatme", "authorids": "~Bingjie_Tang1;~Gaurav_S._Sukhatme1", "gender": ";M", "homepage": ";http://www-robotics.usc.edu/~gaurav/", "dblp": ";s/GauravSSukhatme", "google_scholar": ";https://scholar.google.com.tw/citations?user=lRUi-A8AAAAJ", "orcid": ";0000-0003-2408-474X", "linkedin": ";gaurav-sukhatme-9b6420b/", "or_profile": "~Bingjie_Tang1;~Gaurav_S._Sukhatme1", "aff": ";University of Southern California", "aff_domain": ";usc.edu", "position": ";Full Professor", "bibtex": "@inproceedings{\ntang2022selective,\ntitle={Selective Object Rearrangement in Clutter},\nauthor={Bingjie Tang and Gaurav S. Sukhatme},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=2cEjfernc5P}\n}", "github": "", "project": "", "reviewers": "Z5Z1;W89x;C9hA;Exub", "site": "https://openreview.net/forum?id=2cEjfernc5P", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8594926107751580496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "2gfB_kMVFvP", "title": "ToolFlowNet: Robotic Manipulation with Tools via Predicting Tool Flow from Point Clouds", "track": "main", "status": "Poster", "tldr": "A neural network which given a segmented point cloud, predicts per-point flow on a tool for manipulation.", "abstract": "Point clouds are a widely available and canonical data modality which convey the 3D geometry of a scene. Despite significant progress in classification and segmentation from point clouds, policy learning from such a modality remains challenging, and most prior works in imitation learning focus on learning policies from images or state information.\nIn this paper, we propose a novel framework for learning policies from point clouds for robotic manipulation with tools. We use a novel neural network, ToolFlowNet, which predicts dense per-point flow on the tool that the robot controls, and then uses the flow to derive the transformation that the robot should execute. \nWe apply this framework to imitation learning of challenging deformable object manipulation tasks with continuous movement of tools, including scooping and pouring, and demonstrate significantly improved performance over baselines which do not use flow. We perform physical scooping experiments with ToolFlowNet and find that we can attain 82% scooping success.\nSee https://sites.google.com/view/point-cloud-policy/home for supplementary material.", "keywords": "Flow;Point Clouds;Tool Manipulation;Deformables", "primary_area": "", "supplementary_material": "/attachment/4066699be23e44e3dd65a3a6994542e3f6d2ffb4.zip", "author": "Daniel Seita;Yufei Wang;Sarthak J Shetty;Edward Yao Li;Zackory Erickson;David Held", "authorids": "~Daniel_Seita1;~Yufei_Wang4;~Sarthak_J_Shetty1;~Edward_Yao_Li1;~Zackory_Erickson1;~David_Held1", "gender": ";M;M;M;M;M", "homepage": "https://yufeiwang63.github.io/;https://www.ri.cmu.edu/ri-people/sarthak-shetty/;;https://zackory.com;http://davheld.github.io/;https://danielseita.github.io/", "dblp": ";;;;22/11147;172/0917", "google_scholar": "HQl9718AAAAJ;;;wElkTtIAAAAJ;0QtU-NsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";;edwardli-5775/;;;", "or_profile": "~Yufei_Wang4;~Sarthak_J_Shetty1;~Edward_Yao_Li1;~Zackory_Erickson1;~David_Held1;~Daniel_Takeshi_Seita1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;andrew.cmu.edu", "position": "PhD student;MS student;Undergrad student;Assistant Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nseita2022toolflownet,\ntitle={ToolFlowNet: Robotic Manipulation with Tools via Predicting Tool Flow from Point Clouds},\nauthor={Daniel Seita and Yufei Wang and Sarthak J Shetty and Edward Yao Li and Zackory Erickson and David Held},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=2gfB_kMVFvP}\n}", "github": "", "project": "", "reviewers": "sFXa;Vzsj;ckzp;LESm", "site": "https://openreview.net/forum?id=2gfB_kMVFvP", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1846918562672217597&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3CQ3Vt0v99", "title": "INQUIRE: INteractive Querying for User-aware Informative REasoning", "track": "main", "status": "Poster", "tldr": "INQUIRE is a robot learning system that optimizes over multiple query types: demonstrations, preferences, corrections, and binary rewards. It results in faster, more robust learning than baselines that use single or fixed-pattern query types.", "abstract": "Research on Interactive Robot Learning has yielded several modalities for querying a human for training data, including demonstrations, preferences, and corrections. While prior work in this space has focused on optimizing the robot's queries within each interaction type, there has been little work on optimizing over the selection of the interaction type itself. We present INQUIRE, the first algorithm to implement and optimize over a generalized representation of information gain across multiple interaction types. Our evaluations show that INQUIRE can dynamically optimize its interaction type (and respective optimal query) based on its current learning status and the robot's state in the world, resulting in more robust performance across tasks in comparison to state-of-the art baseline methods. Additionally, INQUIRE allows for customizable cost metrics to bias its selection of interaction types, enabling this algorithm to be tailored to a robot's particular deployment domain and formulate cost-aware, informative queries.", "keywords": "Active Learning;Learning from Demonstration;Human-Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/f243babfe403623d0db61587b4931b971b357293.zip", "author": "Tesca Fitzgerald;Pallavi Koppol;Patrick Callaghan;Russell Quinlan Jun Hei Wong;Reid Simmons;Oliver Kroemer;Henny Admoni", "authorids": "~Tesca_Fitzgerald1;~Pallavi_Koppol1;~Patrick_Callaghan1;~Russell_Quinlan_Jun_Hei_Wong1;~Reid_Simmons1;~Oliver_Kroemer1;~Henny_Admoni1", "gender": "F;;M;;M;M;", "homepage": "http://www.tescafitzgerald.com;;https://www.ri.cmu.edu/ri-people/patrick-callaghan/;http://www.russellwongtech.com/;https://www.cs.cmu.edu/~reids;https://www.ri.cmu.edu/ri-faculty/oliver-kroemer/;https://hennyadmoni.com", "dblp": "159/0410;;;;;04/7743;44/7075", "google_scholar": "UTmj6K4AAAAJ;naFAwxsAAAAJ;;;;_tbXjP4AAAAJ;XXiZaA4AAAAJ", "orcid": "0000-0003-0867-0546;;;;;;", "linkedin": ";;;russwong89/;;;", "or_profile": "~Tesca_Fitzgerald1;~Pallavi_Koppol1;~Patrick_Callaghan1;~Russell_Quinlan_Jun_Hei_Wong1;~Reid_Simmons1;~Oliver_Kroemer1;~Henny_Admoni1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "Postdoc;PhD student;MS student;MS student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nfitzgerald2022inquire,\ntitle={{INQUIRE}: {IN}teractive Querying for User-aware Informative {RE}asoning},\nauthor={Tesca Fitzgerald and Pallavi Koppol and Patrick Callaghan and Russell Quinlan Jun Hei Wong and Reid Simmons and Oliver Kroemer and Henny Admoni},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=3CQ3Vt0v99}\n}", "github": "https://github.com/HARPLab/inquire", "project": "", "reviewers": "pRs8;DFDJ;rWdS;pgRm", "site": "https://openreview.net/forum?id=3CQ3Vt0v99", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 23, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10975993501137473795&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3R3Pz5i0tye", "title": "Inner Monologue: Embodied Reasoning through Planning with Language Models", "track": "main", "status": "Poster", "tldr": "We propose to use an \"inner monologue\" as a way for LLM-based task planners to incorporate feedback from multiple sources and we significantly improved high level instruction completion on simulated and real long-horizon (mobile) manipulation tasks.", "abstract": "Recent works have shown how the reasoning capabilities of Large Language Models (LLMs) can be applied to domains beyond natural language processing, such as planning and interaction for robots. These embodied problems require an agent to understand many semantic aspects of the world: the repertoire of skills available, how these skills influence the world, and how changes to the world map back to the language. LLMs planning in embodied environments need to consider not just what skills to do, but also how and when to do them - answers that change over time in response to the agent's own choices. In this work, we investigate to what extent LLMs used in such embodied contexts can reason over sources of feedback provided through natural language, without any additional training. We propose that by leveraging environment feedback, LLMs are able to form an inner monologue that allows them to more richly process and plan in robotic control scenarios. We investigate a variety of sources of feedback, such as success detection, scene description, and human interaction. We find that closed-loop language feedback significantly improves high level instruction completion on three domains, including simulated and real table top rearrangement tasks and long-horizon mobile manipulation tasks in a kitchen environment in the real world.", "keywords": "Large Language Models;Grounding Models;Multi-modal Control;Task and Motion Planning;Mobile Manipulation", "primary_area": "", "supplementary_material": "/attachment/eaae84cd658312b205159dc258c7ebc189eb48ee.zip", "author": "Wenlong Huang;Fei Xia;Ted Xiao;Harris Chan;Jacky Liang;Pete Florence;Andy Zeng;Jonathan Tompson;Igor Mordatch;Yevgen Chebotar;Pierre Sermanet;Tomas Jackson;Noah Brown;Linda Luu;Sergey Levine;Karol Hausman;brian ichter", "authorids": "~Wenlong_Huang1;~Fei_Xia1;~Ted_Xiao1;~Harris_Chan1;~Jacky_Liang1;~Pete_Florence1;~Andy_Zeng3;~Jonathan_Tompson1;~Igor_Mordatch5;~Yevgen_Chebotar1;~Pierre_Sermanet1;tomasjackson@google.com;noahbrown@google.com;luulinda@google.com;~Sergey_Levine1;~Karol_Hausman2;~brian_ichter1", "gender": "M;M;M;M;M;;;M;;M;;;;;M;;", "homepage": "https://wenlong.page;;https://www.tedxiao.me;http://www.cs.toronto.edu/~hchan/;https://www.jacky.io;http://www.peteflorence.com/;;http://jonathantompson.com;;;https://sermanet.github.io/;;;;https://people.eecs.berkeley.edu/~svlevine/;;", "dblp": "82/2872;;198/0598;227/3248;;;;139/0769;;01/11424;28/6457;;;;80/7594;;", "google_scholar": "hYVMrzsAAAAJ;pqP5_PgAAAAJ;;0tLCTHYAAAAJ;K29Sv1EAAAAJ;;;U_Jw8DUAAAAJ;Vzr1RukAAAAJ;ADkiClQAAAAJ;0nPi5YYAAAAJ;;;;8R35rCwAAAAJ;;-w5DuHgAAAAJ", "orcid": ";0000-0003-4343-1444;;;;;;;;;;;;;;;", "linkedin": ";;;theharrischan/;jackyliang42;;;;;;sermanet/;;;;;;", "or_profile": "~Wenlong_Huang1;~Fei_Xia1;~Ted_Xiao1;~Harris_Chan1;~Jacky_Liang1;~Pete_Florence1;~Andy_Zeng3;~Jonathan_Tompson1;~Igor_Mordatch5;~Yevgen_Chebotar1;~Pierre_Sermanet1;tomasjackson@google.com;noahbrown@google.com;luulinda@google.com;~Sergey_Levine1;~Karol_Hausman2;~brian_ichter1", "aff": "Google;Google;;Google Brain;Carnegie Mellon University;Google;;Google DeepMind;Research, Google;Google;Google;;;;Google;;Google", "aff_domain": "google.com;google.com;;google.com;cmu.edu;google.com;;google.com;research.google.com;google.com;google.com;;;;google.com;;google.com", "position": "Intern;Researcher;;Student Researcher;PhD student;Research Scientist;;Researcher;Researcher;Research Scientist;Research Scientist;;;;Research Scientist;;Research Scientist", "bibtex": "@inproceedings{\nhuang2022inner,\ntitle={Inner Monologue: Embodied Reasoning through Planning with Language Models},\nauthor={Wenlong Huang and Fei Xia and Ted Xiao and Harris Chan and Jacky Liang and Pete Florence and Andy Zeng and Jonathan Tompson and Igor Mordatch and Yevgen Chebotar and Pierre Sermanet and Tomas Jackson and Noah Brown and Linda Luu and Sergey Levine and Karol Hausman and brian ichter},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=3R3Pz5i0tye}\n}", "github": "", "project": "", "reviewers": "Ywcr;YS48;JWy9", "site": "https://openreview.net/forum?id=3R3Pz5i0tye", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 17, "corr_rating_confidence": 0, "gs_citation": 1022, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17957175255238019436&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_unique_norm": "Google;Carnegie Mellon University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.cmu.edu", "aff_unique_abbr": "Google;CMU", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "3RBY8fKjHeu", "title": "DayDreamer: World Models for Physical Robot Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "To solve tasks in complex environments, robots need to learn from experience. Deep reinforcement learning is a common approach to robot learning but requires a large amount of trial and error to learn, limiting its deployment in the physical world. As a consequence, many advances in robot learning rely on simulators. On the other hand, learning inside of simulators fails to capture the complexity of the real world, is prone to simulator inaccuracies, and the resulting behaviors do not adapt to changes in the world. The Dreamer algorithm has recently shown great promise for learning from small amounts of interaction by planning within a learned world model, outperforming pure reinforcement learning in video games. Learning a world model to predict the outcomes of potential actions enables planning in imagination, reducing the amount of trial and error needed in the real environment. However, it is unknown whether Dreamer can facilitate faster learning on physical robots. In this paper, we apply Dreamer to 4 robots to learn online and directly in the real world, without any simulators. Dreamer trains a quadruped robot to roll off its back, stand up, and walk from scratch and without resets in only 1 hour. We then push the robot and find that Dreamer adapts within 10 minutes to withstand perturbations or quickly roll over and stand back up. On two different robotic arms, Dreamer learns to pick and place objects from camera images and sparse rewards, approaching human-level teleoperation performance. On a wheeled robot, Dreamer learns to navigate to a goal position purely from camera images, automatically resolving ambiguity about the robot orientation. Using the same hyperparameters across all experiments, we find that Dreamer is capable of online learning in the real world, which establishes a strong baseline. We release our infrastructure for future applications of world models to robot learning.", "keywords": "reinforcement learning;model-based reinforcement learning;world models;locomotion;sample-efficiency", "primary_area": "", "supplementary_material": "/attachment/dd742f98c5a2cf7dfb5a0a57d2129f7bd247bb60.zip", "author": "Philipp Wu;Alejandro Escontrela;Danijar Hafner;Pieter Abbeel;Ken Goldberg", "authorids": "~Philipp_Wu1;~Alejandro_Escontrela2;~Danijar_Hafner1;~Pieter_Abbeel2;~Ken_Goldberg1", "gender": "M;;M;M;M", "homepage": "https://github.com/wuphilipp;https://danijar.com;https://people.eecs.berkeley.edu/~pabbeel/;http://goldberg.berkeley.edu/;https://www.escontrela.me", "dblp": ";184/8088;;g/KennethYGoldberg;", "google_scholar": ";VINmGpYAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ;53OxjmYAAAAJ", "orcid": ";0000-0002-9534-7271;;0000-0001-6747-9499;", "linkedin": ";;;goldbergken/;alejandro-escontrela/", "or_profile": "~Philipp_Wu1;~Danijar_Hafner1;~Pieter_Abbeel2;~Ken_Goldberg1;~Alejandro_Escontrela1", "aff": "University of California, Berkeley;University of Toronto;Covariant;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;cs.toronto;covariant.ai;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;Founder;Full Professor;PhD student", "bibtex": "@inproceedings{\nwu2022daydreamer,\ntitle={DayDreamer: World Models for Physical Robot Learning},\nauthor={Philipp Wu and Alejandro Escontrela and Danijar Hafner and Pieter Abbeel and Ken Goldberg},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=3RBY8fKjHeu}\n}", "github": "", "project": "", "reviewers": "VhL5;4BTT;BYdE;HuH8;niYi", "site": "https://openreview.net/forum?id=3RBY8fKjHeu", "pdf_size": 0, "rating": "4;4;6;6;6", "confidence": "", "rating_avg": 5.2, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 328, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10896773227699305049&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of California, Berkeley;University of Toronto;Covariant", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.utoronto.ca;", "aff_unique_abbr": "UC Berkeley;U of T;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada;" }, { "id": "4ffLQu_O-Dl", "title": "Safe Control Under Input Limits with Neural Control Barrier Functions", "track": "main", "status": "Poster", "tldr": "", "abstract": "We propose new methods to synthesize control barrier function (CBF) based safe controllers that avoid input saturation, which can cause safety violations. In particular, our method is created for high-dimensional, general nonlinear systems, for which such tools are scarce. We leverage techniques from machine learning, like neural networks and deep learning, to simplify this challenging problem in nonlinear control design. The method consists of a learner-critic architecture, in which the critic gives counterexamples of input saturation and the learner optimizes a neural CBF to eliminate those counterexamples. We provide empirical results on a 10D state, 4D input quadcopter-pendulum system. Our learned CBF avoids input saturation and maintains safety over nearly 100% of trials. ", "keywords": "safe control;input limits", "primary_area": "", "supplementary_material": "/attachment/fa9f8f6d318f7000c86e7f08cd395e9a20ad6833.zip", "author": "Simin Liu;Changliu Liu;John Dolan", "authorids": "~Simin_Liu1;~Changliu_Liu1;~John_Dolan1", "gender": ";F;M", "homepage": "https://www.ri.cmu.edu/ri-people/simin-liu/;http://www.cs.cmu.edu/~cliu6/index.html;https://www.ri.cmu.edu/ri-faculty/john-m-dolan/", "dblp": ";166/3563;52/532.html", "google_scholar": ";;xLk_w7kAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Simin_Liu1;~Changliu_Liu1;~John_Dolan1", "aff": "Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cs.cmu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2022safe,\ntitle={Safe Control Under Input Limits with Neural Control Barrier Functions},\nauthor={Simin Liu and Changliu Liu and John Dolan},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=4ffLQu_O-Dl}\n}", "github": "", "project": "", "reviewers": "V3nm;A9jW;wTA7;8LBY", "site": "https://openreview.net/forum?id=4ffLQu_O-Dl", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=823997035005390477&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4g3PwAp5nsX", "title": "SE(2)-Equivariant Pushing Dynamics Models for Tabletop Object Manipulations", "track": "main", "status": "Oral", "tldr": "We propose a SE(2)-equivariant pushing dynamics model for tabletop object manipulations.", "abstract": "For tabletop object manipulation tasks, learning an accurate pushing dynamics model, which predicts the objects' motions when a robot pushes an object, is very important. In this work, we claim that an ideal pushing dynamics model should have the SE(2)-equivariance property, i.e., if tabletop objects' poses and pushing action are transformed by some same planar rigid-body transformation, then the resulting motion should also be the result of the same transformation. Existing state-of-the-art data-driven approaches do not have this equivariance property, resulting in less-than-desirable learning performances. In this paper, we propose a new neural network architecture that by construction has the above equivariance property. Through extensive empirical validations, we show that the proposed model shows significantly improved learning performances over the existing methods. Also, we verify that our pushing dynamics model can be used for various downstream pushing manipulation tasks such as the object moving, singulation, and grasping in both simulation and real robot experiments. Code is available at https://github.com/seungyeon-k/SQPDNet-public.\n\n", "keywords": "Pushing dynamics learning;Pushing manipulation;Symmetry and Equivariance", "primary_area": "", "supplementary_material": "/attachment/04e30ee289eaf9cd1747e2bf6e148a298be8b993.zip", "author": "Seungyeon Kim;Byeongdo Lim;Yonghyeon Lee;Frank C. Park", "authorids": "~Seungyeon_Kim2;~Byeongdo_Lim1;~Yonghyeon_Lee2;~Frank_C._Park1", "gender": "M;;M;M", "homepage": "https://seungyeon-k.github.io/;http://robot.snu.ac.kr;https://www.gabe-yhlee.com;http://robotics.snu.ac.kr", "dblp": "74/7997-3;341/9568;182/6796;p/FrankChongwooPark", "google_scholar": "https://scholar.google.com/citations?hl=en;;;u-h3PJIAAAAJ", "orcid": "0000-0001-6708-5684;;;0000-0002-0293-6975", "linkedin": "seungyeon-kim-45a20b263/;;;", "or_profile": "~Seungyeon_Kim2;~Byeongdo_Lim1;~Yonghyeon_Lee2;~Frank_C._Park1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2022seequivariant,\ntitle={{SE}(2)-Equivariant Pushing Dynamics Models for Tabletop Object Manipulations},\nauthor={Seungyeon Kim and Byeongdo Lim and Yonghyeon Lee and Frank C. Park},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=4g3PwAp5nsX}\n}", "github": "https://github.com/seungyeon-k/SQPDNet-public", "project": "", "reviewers": "PwZa;tXbi;T36J;uaxP", "site": "https://openreview.net/forum?id=4g3PwAp5nsX", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5813255828595310736&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "4nt6RUGmILw", "title": "i-Sim2Real: Reinforcement Learning of Robotic Policies in Tight Human-Robot Interaction Loops", "track": "main", "status": "Oral", "tldr": "We present a method for iteratively learning human behavior models for effective sim-to-real learning of robotic policies for human-robot interaction tasks.", "abstract": "Sim-to-real transfer is a powerful paradigm for robotic reinforcement learning. The ability to train policies in simulation enables safe exploration and large-scale data collection quickly at low cost. However, prior works in sim-to-real transfer of robotic policies typically do not involve any human-robot interaction because accurately simulating human behavior is an open problem. In this work, our goal is to leverage the power of simulation to train robotic policies that are proficient at interacting with humans upon deployment. But there is a chicken and egg problem --- how to gather examples of a human interacting with a physical robot so as to model human behavior in simulation without already having a robot that is able to interact with a human? Our proposed method, Iterative-Sim-to-Real (i-S2R), attempts to address this. i-S2R bootstraps from a simple model of human behavior and alternates between training in simulation and deploying in the real world. In each iteration, both the human behavior model and the policy are refined. For all training we apply a new evolutionary search algorithm called Blackbox Gradient Sensing (BGS). We evaluate our method on a real world robotic table tennis setting, where the objective for the robot is to play cooperatively with a human player for as long as possible. Table tennis is a high-speed, dynamic task that requires the two players to react quickly to each other\u2019s moves, making for a challenging test bed for research on human-robot interaction. We present results on an industrial robotic arm that is able to cooperatively play table tennis with human players, achieving rallies of 22 successive hits on average and 150 at best. Further, for 80% of players, rally lengths are 70% to 175% longer compared to the sim-to-real plus fine-tuning (S2R+FT) baseline. For videos of our system in action please see https://sites.google.com/view/is2r.", "keywords": "sim-to-real;human-robot interaction;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/1a6c89d8b4510903163b2dbb8a1e58baac26ad75.zip", "author": "Saminda Wishwajith Abeyruwan;Laura Graesser;David B D'Ambrosio;Avi Singh;Anish Shankar;Alex Bewley;Deepali Jain;Krzysztof Marcin Choromanski;Pannag R Sanketi", "authorids": "~Saminda_Wishwajith_Abeyruwan1;~Laura_Graesser1;~David_B_D'Ambrosio1;~Avi_Singh1;~Anish_Shankar1;~Alex_Bewley1;~Deepali_Jain1;~Krzysztof_Marcin_Choromanski1;~Pannag_R_Sanketi1", "gender": ";F;M;;M;Unspecified;F;;M", "homepage": "http://saminda.org;;;https://www.avisingh.org/;;https://alex.bewley.ai/;;;", "dblp": ";;;https://dblp.org/pers/s/Singh:Avi.html;;39/9969;84/8010;78/11411;", "google_scholar": "https://scholar.google.com/citations?hl=en;0NHagNQAAAAJ;https://scholar.google.com/citations?hl=en;C2_ZXdcAAAAJ;;https://scholar.google.co.uk/citations?user=UO32CB0AAAAJ;;;GuU6oA4AAAAJ", "orcid": ";;;;;0000-0002-8428-9264;;;", "linkedin": "samindaa;;;;anish-shankar-3290573a;;;;", "or_profile": "~Saminda_Wishwajith_Abeyruwan1;~Laura_Graesser1;~David_B_D'Ambrosio1;~Avi_Singh1;~Anish_Shankar1;~Alex_Bewley1;~Deepali_Jain1;~Krzysztof_Marcin_Choromanski1;~Pannag_R_Sanketi1", "aff": "Google;Google;Google;Google;Google;Google;Google;Google Brain Robotics & Columbia University;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;columbia.edu;google.com", "position": "Software Engineer;Researcher;Researcher;Researcher;Software Engineer;Research Scientist;Researcher;research scientist & adjunct assistant professor;Researcher", "bibtex": "@inproceedings{\nabeyruwan2022isimreal,\ntitle={i-Sim2Real: Reinforcement Learning of Robotic Policies in Tight Human-Robot Interaction Loops},\nauthor={Saminda Wishwajith Abeyruwan and Laura Graesser and David B D'Ambrosio and Avi Singh and Anish Shankar and Alex Bewley and Deepali Jain and Krzysztof Marcin Choromanski and Pannag R Sanketi},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=4nt6RUGmILw}\n}", "github": "", "project": "", "reviewers": "LWg8;ksqZ;PRVu;WKtY", "site": "https://openreview.net/forum?id=4nt6RUGmILw", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 9, "corr_rating_confidence": 0, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1297890259054919004&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "52c5e73SlS2", "title": "Walk These Ways: Tuning Robot Control for Generalization with Multiplicity of Behavior", "track": "main", "status": "Oral", "tldr": "Learning quadrupedal locomotion with an expanded task specification enables online tuning of a small quadruped to out-of-distribution environments and tasks.", "abstract": "Learned locomotion policies can rapidly adapt to diverse environments similar to those experienced during training but lack a mechanism for fast tuning when they fail in an out-of-distribution test environment. This necessitates a slow and iterative cycle of reward and environment redesign to achieve good performance on a new task. As an alternative, we propose learning a single policy that encodes a structured family of locomotion strategies that solve training tasks in different ways, resulting in Multiplicity of Behavior (MoB). Different strategies generalize differently and can be chosen in real-time for new tasks or environments, bypassing the need for time-consuming retraining. We release a fast, robust open-source MoB locomotion controller, Walk These Ways, that can execute diverse gaits with variable footswing, posture, and speed, unlocking diverse downstream tasks: crouching, hopping, high-speed running, stair traversal, bracing against shoves, rhythmic dance, and more. Video and code release: https://gmargo11.github.io/walk-these-ways", "keywords": "Locomotion;Reinforcement Learning;Task Specification", "primary_area": "", "supplementary_material": "/attachment/533ecf7302d8bc59f5348b1841f84f6206460f87.zip", "author": "Gabriel B. Margolis;Pulkit Agrawal", "authorids": "~Gabriel_B._Margolis1;~Pulkit_Agrawal1", "gender": "M;M", "homepage": "https://people.eecs.berkeley.edu/~pulkitag/;https://gmargo11.github.io/", "dblp": "149/2672;305/0205", "google_scholar": "UpZmJI0AAAAJ;Jzt5uNAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Pulkit_Agrawal1;~Gabriel_B_Margolis1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "Assistant Professor;PhD Student", "bibtex": "@inproceedings{\nmargolis2022walk,\ntitle={Walk These Ways: Tuning Robot Control for Generalization with Multiplicity of Behavior},\nauthor={Gabriel B. Margolis and Pulkit Agrawal},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=52c5e73SlS2}\n}", "github": "https://github.com/Improbable-AI/walk-these-ways", "project": "", "reviewers": "fCqU;E9ME;kWnF;hEjf", "site": "https://openreview.net/forum?id=52c5e73SlS2", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 24, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 176, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11602068780397920576&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "52uzsIGV32_", "title": "Efficient and Stable Off-policy Training via Behavior-aware Evolutionary Learning", "track": "main", "status": "Poster", "tldr": "An evolutionary training framework for off-policy reinforcement learning inspired by evolution strategies (ES) called Behavior-aware Evolutionary Learning (BEL).", "abstract": "Applying reinforcement learning (RL) algorithms to real-world continuos control problems faces many challenges in terms of sample efficiency, stability and exploration. Off-policy RL algorithms show great sample efficiency but can be unstable to train and require effective exploration techniques for sparse reward environments. A simple yet effective approach to address these challenges is to train a population of policies and ensemble them in certain ways. In this work, a novel population based evolutionary training framework inspired by evolution strategies (ES) called Behavior-aware Evolutionary Learning (BEL) is proposed. The main idea is to train a population of behaviorally diverse policies in parallel and conduct selection with simple linear recombination. BEL consists of two mechanisms called behavior-regularized perturbation (BRP) and behavior-targeted training (BTT) to accomplish stable and fine control of the population behavior divergence. Experimental studies showed that BEL not only has superior sample efficiency and stability compared to existing methods, but can also produce diverse agents in sparse reward environments. Due to the parallel implementation, BEL also exhibits relatively good computation efficiency, making it a practical and competitive method to train policies for real-world robots.", "keywords": "Continuos control;Reinforcement learning;Evolution strategies", "primary_area": "", "supplementary_material": "/attachment/c0bb9defbd98e016edbba817ba61cccf6066b07f.zip", "author": "Maiyue Chen;Guangyi He", "authorids": "~Maiyue_Chen1;gyhe@pku.edu.cn", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "%E8%BF%88%E8%B6%8A-%E9%99%88-18649611b/;", "or_profile": "~Maiyue_Chen1;gyhe@pku.edu.cn", "aff": "Peking University;", "aff_domain": "pku.edu.cn;", "position": "PhD student;", "bibtex": "@inproceedings{\nchen2022efficient,\ntitle={Efficient and Stable Off-policy Training via Behavior-aware Evolutionary Learning},\nauthor={Maiyue Chen and Guangyi He},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=52uzsIGV32_}\n}", "github": "https://github.com/raymond-myc/BEL", "project": "", "reviewers": "4o9R;XDim;1HqG;gZYS", "site": "https://openreview.net/forum?id=52uzsIGV32_", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IV4K7YQEi-MJ:scholar.google.com/&scioq=Efficient+and+Stable+Off-policy+Training+via+Behavior-aware+Evolutionary+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "5GJ-_KMLASa", "title": "Learning the Dynamics of Compliant Tool-Environment Interaction for Visuo-Tactile Contact Servoing", "track": "main", "status": "Poster", "tldr": "We propose a method to learn the dynamics of compliant tool-environment interaction for visuo-tactile extrinsic contact servoing with applications to tasks such as scraping.", "abstract": "Many manipulation tasks require the robot to control the contact between a grasped compliant tool and the environment, e.g. scraping a frying pan with a spatula. However, modeling tool-environment interaction is difficult, especially when the tool is compliant, and the robot cannot be expected to have the full geometry and physical properties (e.g., mass, stiffness, and friction) of all the tools it must use. We propose a framework that learns to predict the effects of a robot's actions on the contact between the tool and the environment given visuo-tactile perception. Key to our framework is a novel contact feature representation that consists of a binary contact value, the line of contact, and an end-effector wrench. We propose a method to learn the dynamics of these contact features from real world data that does not require predicting the geometry of the compliant tool. We then propose a controller that uses this dynamics model for visuo-tactile contact servoing and show that it is effective at performing scraping tasks with a spatula, even in scenarios where precise contact needs to be made to avoid obstacles.", "keywords": "Contact-Rich Manipulation;Multi-Modal Dynamics Learning", "primary_area": "", "supplementary_material": "/attachment/9bc0310e56b91b6267a9674bdb315abd77c4ac9f.zip", "author": "Mark Van der Merwe;Dmitry Berenson;Nima Fazeli", "authorids": "~Mark_Van_der_Merwe1;~Dmitry_Berenson1;~Nima_Fazeli1", "gender": "M;M;", "homepage": "https://mvandermerwe.github.io/;http://web.eecs.umich.edu/~dmitryb/;https://www.mmintlab.com", "dblp": "249/5378;;", "google_scholar": "cKmwbi0AAAAJ;x-n9rIMAAAAJ;", "orcid": ";0000-0002-9712-109X;", "linkedin": ";;", "or_profile": "~Mark_Van_der_Merwe1;~Dmitry_Berenson1;~Nima_Fazeli1", "aff": "University of Michigan - Ann Arbor;University of Michigan;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nmerwe2022learning,\ntitle={Learning the Dynamics of Compliant Tool-Environment Interaction for Visuo-Tactile Contact Servoing},\nauthor={Mark Van der Merwe and Dmitry Berenson and Nima Fazeli},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=5GJ-_KMLASa}\n}", "github": "", "project": "", "reviewers": "gMWE;DDzt;r4to;yQUC", "site": "https://openreview.net/forum?id=5GJ-_KMLASa", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16428897096254329055&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6BIffCl6gsM", "title": "Efficient Tactile Simulation with Differentiability for Robotic Manipulation", "track": "main", "status": "Poster", "tldr": "We present an efficient simulator for simulating dense tactile force fieds wth both normal and shear components. We conduct extensitve policy learning experiments in simulation and a sim-to-real experiment on tactile insertion task.", "abstract": "Efficient simulation of tactile sensors can unlock new opportunities for learning tactile-based manipulation policies in simulation and then transferring the learned policy to real systems, but fast and reliable simulators for dense tactile normal and shear force fields are still under-explored. We present a novel approach for efficiently simulating both the normal and shear tactile force field covering the entire contact surface with an arbitrary tactile sensor spatial layout. Our simulator also provides analytical gradients of the tactile forces to accelerate policy learning. We conduct extensive simulation experiments to showcase our approach and demonstrate successful zero-shot sim-to-real transfer for a high-precision peg-insertion task with high-resolution vision-based GelSlim tactile sensors.", "keywords": "Tactile Simulation;Tactile Manipulation;Differentiable Simulation;Sim-to-Real", "primary_area": "", "supplementary_material": "/attachment/31f891f73b1fd4795ae4ded104a38abd182118b7.zip", "author": "Jie Xu;Sangwoon Kim;Tao Chen;Alberto Rodriguez Garcia;Pulkit Agrawal;Wojciech Matusik;Shinjiro Sueda", "authorids": "~Jie_Xu7;~Sangwoon_Kim1;~Tao_Chen1;~Alberto_Rodriguez_Garcia1;~Pulkit_Agrawal1;~Wojciech_Matusik2;~Shinjiro_Sueda1", "gender": "M;;M;M;M;M;M", "homepage": "https://people.csail.mit.edu/jiex;;https://taochenshh.github.io;http://mcube.mit.edu/;https://people.eecs.berkeley.edu/~pulkitag/;https://cdfg.mit.edu/wojciech;http://people.tamu.edu/~sueda/", "dblp": "37/5126-28;;;;149/2672;;69/4137.html", "google_scholar": "3Tj5lWEAAAAJ;qkaOHLoAAAAJ;gdUv1PIAAAAJ;AC93g9kAAAAJ;UpZmJI0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=mBqenhsAAAAJ", "orcid": ";0000-0002-1662-9132;;;;0000-0003-0212-5643;0000-0003-4656-498X", "linkedin": ";sangwoon-kim-0a0484150/;;;;wojciech-matusik-67238126/;", "or_profile": "~Jie_Xu7;~Sangwoon_Kim1;~Tao_Chen1;~Alberto_Rodriguez_Garcia1;~Pulkit_Agrawal1;~Wojciech_Matusik2;~Shinjiro_Sueda1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Texas A&M University - College Station", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxu2022efficient,\ntitle={Efficient Tactile Simulation with Differentiability for Robotic Manipulation},\nauthor={Jie Xu and Sangwoon Kim and Tao Chen and Alberto Rodriguez Garcia and Pulkit Agrawal and Wojciech Matusik and Shinjiro Sueda},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=6BIffCl6gsM}\n}", "github": "", "project": "", "reviewers": "BP21;Led2;VkwB;4x5o", "site": "https://openreview.net/forum?id=6BIffCl6gsM", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6778698166550271237&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.tamu.edu", "aff_unique_abbr": "MIT;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6gEyD5zg0dt", "title": "USHER: Unbiased Sampling for Hindsight Experience Replay", "track": "main", "status": "Poster", "tldr": "We derive a provably unbiased variant of Hindsight Experience Replay without sacrificing HER's low variance or high sample efficiency. ", "abstract": " Dealing with sparse rewards is a long-standing challenge in reinforcement learning (RL). Hindsight Experience Replay (HER) addresses this problem by reusing failed trajectories for one goal as successful trajectories for another. This allows for both a minimum density of reward and for generalization across multiple goals. However, this strategy is known to result in a biased value function, as the update rule underestimates the likelihood of bad outcomes in a stochastic environment. We propose an asymptotically unbiased importance-sampling-based algorithm to address this problem without sacrificing performance on deterministic environments. We show its effectiveness on a range of robotic systems, including challenging high dimensional stochastic environments.", "keywords": "Reinforcement Learning;Multi-goal reinforcement learning;Reinforcement learning theory", "primary_area": "", "supplementary_material": "/attachment/77a0e880d2a6459075a59ac2457ac6be9f1ff87a.zip", "author": "Liam Schramm;Yunfu Deng;Edgar Granados;Abdeslam Boularias", "authorids": "~Liam_Schramm2;~Yunfu_Deng1;gary.granados@gmail.com;~Abdeslam_Boularias1", "gender": ";;;M", "homepage": "https://liamschramm.com/;https://yfdeng.com/;;http://rl.cs.rutgers.edu/", "dblp": "https://dblp.org/pers/hd/s/Schramm:Liam;;;57/2269", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=8AF3RCsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Liam_Schramm2;~Yunfu_Deng1;gary.granados@gmail.com;~Abdeslam_Boularias1", "aff": "Rutgers University;Rutgers University, New Brunswick;;, Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;;cs.rutgers.edu", "position": "PhD student;MS student;;Associate Professor", "bibtex": "@inproceedings{\nschramm2022usher,\ntitle={{USHER}: Unbiased Sampling for Hindsight Experience Replay},\nauthor={Liam Schramm and Yunfu Deng and Edgar Granados and Abdeslam Boularias},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=6gEyD5zg0dt}\n}", "github": "https://github.com/schrammlb2/USHER_Implementation", "project": "", "reviewers": "Z2DH;KeLa;mpQg;d9ww", "site": "https://openreview.net/forum?id=6gEyD5zg0dt", "pdf_size": 0, "rating": "1;4;6;10", "confidence": "", "rating_avg": 5.25, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=516743869118475089&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "1", "aff_campus_unique": ";New Brunswick", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7CrXRhmzVVR", "title": "Solving Complex Manipulation Tasks with Model-Assisted Model-Free Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "Exploratory actions in learned model with optimistically-initialized critic improve value estimation and policy performance.", "abstract": "In this paper, we propose a novel deep reinforcement learning approach for improving the sample efficiency of a model-free actor-critic method by using a learned model to encourage exploration. The basic idea consists in generating artificial transitions with noisy actions, which can be used to update the critic. To counteract the model bias, we introduce a high initialization for the critic and two filters for the artificial transitions. Finally, we evaluate our approach with the TD3 algorithm on different robotic tasks and demonstrate that it achieves a better performance with higher sample efficiency than several other model-based and model-free methods.", "keywords": "Reinforcement learning;Data augmentation;Imaginary exploration;Optimistic initialization", "primary_area": "", "supplementary_material": "/attachment/04d6f56b732d6de4cdac6473d93ef8959df9bd79.zip", "author": "Jianshu Hu;Paul Weng", "authorids": "~Jianshu_Hu1;~Paul_Weng1", "gender": "M;M", "homepage": "https://jianshu-hu.github.io/;http://weng.fr", "dblp": "337/1942;http://dblp.uni-trier.de/pers/hd/w/Weng:Paul", "google_scholar": ";_Hd6AeQAAAAJ", "orcid": "0009-0006-2837-425X;", "linkedin": ";paul-weng-69a15980/", "or_profile": "~Jianshu_Hu1;~Paul_Weng1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2022solving,\ntitle={Solving Complex Manipulation Tasks with Model-Assisted Model-Free Reinforcement Learning},\nauthor={Jianshu Hu and Paul Weng},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=7CrXRhmzVVR}\n}", "github": "", "project": "", "reviewers": "4nCH;4eEP;jm78;BZ4p", "site": "https://openreview.net/forum?id=7CrXRhmzVVR", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1099550482190900022&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "7JVNhaMbZUu", "title": "Particle-Based Score Estimation for State Space Model Learning in Autonomous Driving", "track": "main", "status": "Poster", "tldr": "Learning state space models for multi-object tracking in autonomous driving using particle methods.", "abstract": "Multi-object state estimation is a fundamental problem for robotic applications where a robot must interact with other moving objects. Typically, other objects' relevant state features are not directly observable, and must instead be inferred from observations. Particle filtering can perform such inference given approximate transition and observation models. However, these models are often unknown a priori, yielding a difficult parameter estimation problem since observations jointly carry transition and observation noise. In this work, we consider learning maximum-likelihood parameters using particle methods. Recent methods addressing this problem typically differentiate through time in a particle filter, which requires workarounds to the non-differentiable resampling step, that yield biased or high variance gradient estimates. By contrast, we exploit Fisher's identity to obtain a particle-based approximation of the score function (the gradient of the log likelihood) that yields a low variance estimate while only requiring stepwise differentiation through the transition and observation models. We apply our method to real data collected from autonomous vehicles (AVs) and show that it learns better models than existing techniques and is more stable in training, yielding an effective smoother for tracking the trajectories of vehicles around an AV.", "keywords": "Autonomous Driving;Particle Filtering;Self-supervised Learning", "primary_area": "", "supplementary_material": "/attachment/924ee973705ad7dffe7723e85db20daf3abc3112.zip", "author": "Angad Singh;Omar Makhlouf;Maximilian Igl;Joao Messias;Arnaud Doucet;Shimon Whiteson", "authorids": "~Angad_Singh2;makhlouf@waymo.com;~Maximilian_Igl1;messiasj@waymo.com;~Arnaud_Doucet2;~Shimon_Whiteson1", "gender": "M;;M;;;", "homepage": ";;https://maximilianigl.com;;https://www.stats.ox.ac.uk/~doucet/;", "dblp": ";;207/8245.html;;68/1628;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;W4SZGV8AAAAJ;", "orcid": ";;;;0000-0002-7662-419X;", "linkedin": "https://uk.linkedin.com/in/angad-singh9;;maximilian-igl-21116992/;;;", "or_profile": "~Angad_Singh2;makhlouf@waymo.com;~Maximilian_Igl1;messiasj@waymo.com;~Arnaud_Doucet2;~Shimon_Whiteson1", "aff": "Waymo;;Waymo;;University of Oxford;University of Oxford", "aff_domain": "waymo.com;;waymo.com;;ox.ac.uk;ox.ac.uk", "position": "Researcher;;Researcher;;Full Professor;Professor", "bibtex": "@inproceedings{\nsingh2022particlebased,\ntitle={Particle-Based Score Estimation for State Space Model Learning in Autonomous Driving},\nauthor={Angad Singh and Omar Makhlouf and Maximilian Igl and Joao Messias and Arnaud Doucet and Shimon Whiteson},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=7JVNhaMbZUu}\n}", "github": "", "project": "", "reviewers": "8rdy;swzV;KijS;ScN7", "site": "https://openreview.net/forum?id=7JVNhaMbZUu", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6833539995807046015&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Waymo;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.waymo.com;https://www.ox.ac.uk", "aff_unique_abbr": "Waymo;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "7Nwds2LjN1s", "title": "Learning Model Predictive Controllers with Real-Time Attention for Real-World Navigation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Despite decades of research, existing navigation systems still face real-world challenges when deployed in the wild, e.g., in cluttered home environments or in human-occupied public spaces. \nTo address this, we present a new class of implicit control policies combining the benefits of imitation learning with the robust handling of system constraints from Model Predictive Control (MPC). \nOur approach, called Performer-MPC, uses a learned cost function parameterized by vision context embeddings provided by Performers---a low-rank implicit-attention Transformer. We jointly train the cost function and construct the controller relying on it, effectively solving end-to-end the corresponding bi-level optimization problem. \nWe show that the resulting policy improves standard MPC performance by leveraging a few expert demonstrations of the desired navigation behavior in different challenging real-world scenarios. \nCompared with a standard MPC policy, Performer-MPC achieves >40% better goal reached in cluttered environments and >65% better on social metrics when navigating around humans. ", "keywords": "Model Predictive Control;Transformers;Performers;Highly-Constrained Navigation;Social Navigation;Learning-based Control", "primary_area": "", "supplementary_material": "/attachment/6991b9afa47240d1641ae9df16cb286fb6a2de64.zip", "author": "Xuesu Xiao;Tingnan Zhang;Krzysztof Marcin Choromanski;Tsang-Wei Edward Lee;Anthony Francis;Jake Varley;Stephen Tu;Sumeet Singh;Peng Xu;Fei Xia;Sven Mikael Persson;Dmitry Kalashnikov;Leila Takayama;Roy Frostig;Jie Tan;Carolina Parada;Vikas Sindhwani", "authorids": "~Xuesu_Xiao1;~Tingnan_Zhang1;~Krzysztof_Marcin_Choromanski1;~Tsang-Wei_Edward_Lee1;~Anthony_Francis1;~Jake_Varley1;~Stephen_Tu1;~Sumeet_Singh3;~Peng_Xu9;~Fei_Xia1;~Sven_Mikael_Persson1;~Dmitry_Kalashnikov1;~Leila_Takayama1;~Roy_Frostig1;~Jie_Tan1;~Carolina_Parada1;~Vikas_Sindhwani1", "gender": "M;M;;M;M;;M;M;M;M;;F;;M;;M;M", "homepage": "https://cs.gmu.edu/~xiao/;;;;http://www.cs.columbia.edu/~jvarley/;https://stephentu.github.io/;;;;;;https://www.leilatakayama.org;https://cs.stanford.edu/~rfrostig/;http://www.jie-tan.net;;http://vikas.sindhwani.org;http://www.dresan.com/", "dblp": "164/8375.html;https://dblp.uni-trier.de/pers/hd/z/Zhang:Tingnan;78/11411;236/6317.html;;09/8165;;;;;222/2882;;136/9091;81/7419;71/5134;26/4825;", "google_scholar": "bWbsbjAAAAAJ;RM2vMNcAAAAJ;;;UJcm1MoAAAAJ;JQcDmB8AAAAJ;ZGpE5cYAAAAJ;460NWeQAAAAJ;pqP5_PgAAAAJ;;;yFEHsv4AAAAJ;UoATnWEAAAAJ;neGbgzYAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;;;0000-0003-4343-1444;;;;;;;;", "linkedin": ";;;;;;;;;mikaelspersson1;;;;jie-tan/;;vikassindhwani;", "or_profile": "~Xuesu_Xiao1;~Tingnan_Zhang1;~Krzysztof_Marcin_Choromanski1;~Tsang-Wei_Edward_Lee1;~Jake_Varley1;~Stephen_Tu1;~Sumeet_Singh3;~Peng_Xu9;~Fei_Xia1;~Sven_Mikael_Persson1;~Dmitry_Kalashnikov1;~Leila_Takayama1;~Roy_Frostig1;~Jie_Tan1;~Carolina_Parada1;~Vikas_Sindhwani1;~Anthony_Gerald_Francis1", "aff": "George Mason University;Google;Google Brain Robotics & Columbia University;;Google;Google;Google Brain Robotics;Google;Google;Google;Google;University of California, Santa Cruz;Google;Google;;Google;Google", "aff_domain": "gmu.edu;google.com;columbia.edu;;google.com;google.com;google.com;google.com;google.com;google.com;google.com;ucsc.edu;google.com;google.com;;google.com;google.com", "position": "Assistant Professor;Software Engineer;research scientist & adjunct assistant professor;;Engineer;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Associate Professor;Research scientist;Research Scientist;;Senior Staff Research Scientist;Researcher", "bibtex": "@inproceedings{\nxiao2022learning,\ntitle={Learning Model Predictive Controllers with Real-Time Attention for Real-World Navigation},\nauthor={Xuesu Xiao and Tingnan Zhang and Krzysztof Marcin Choromanski and Tsang-Wei Edward Lee and Anthony Francis and Jake Varley and Stephen Tu and Sumeet Singh and Peng Xu and Fei Xia and Leila Takayama and Roy Frostig and Jie Tan and Carolina Parada and Vikas Sindhwani},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=7Nwds2LjN1s}\n}", "github": "", "project": "", "reviewers": "Xcqt;yaP4;3ALJ;UiTD", "site": "https://openreview.net/forum?id=7Nwds2LjN1s", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 8, "authors#_avg": 17, "corr_rating_confidence": 0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7486063819393402001&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;1;1;1;1;1;1;1;2;1;1;1;1", "aff_unique_norm": "George Mason University;Google;University of California, Santa Cruz", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.gmu.edu;https://www.google.com;https://www.ucsc.edu", "aff_unique_abbr": "GMU;Google;UCSC", "aff_campus_unique_index": "1;1;1;1;1;1;1;1;1;2;1;1;1;1", "aff_campus_unique": ";Mountain View;Santa Cruz", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7RyzGWLk79H", "title": "When the Sun Goes Down: Repairing Photometric Losses for All-Day Depth Estimation", "track": "main", "status": "Poster", "tldr": "This paper addresses the problems associated with the daytime photometric losses to make them work for nighttime as well. ", "abstract": "Self-supervised deep learning methods for joint depth and ego-motion estimation can yield accurate trajectories without needing ground-truth training data. However, as they typically use photometric losses, their performance can degrade significantly when the assumptions these losses make (e.g. temporal illumination consistency, a static scene, and the absence of noise and occlusions) are violated. This limits their use for e.g. nighttime sequences, which tend to contain many point light sources (including on dynamic objects) and low signal-to-noise ratio (SNR) in darker image regions. In this paper, we show how to use a combination of three techniques to allow the existing photometric losses to work for both day and nighttime images. First, we introduce a per-pixel neural intensity transformation to compensate for the light changes that occur between successive frames. Second, we predict a per-pixel residual flow map that we use to correct the reprojection correspondences induced by the estimated ego-motion and depth from the networks. And third, we denoise the training images to improve the robustness and accuracy of our approach. These changes allow us to train a single model for both day and nighttime images without needing separate encoders or extra feature networks like existing methods. We perform extensive experiments and ablation studies on the challenging Oxford RobotCar dataset to demonstrate the efficacy of our approach for both day and nighttime sequences.", "keywords": "All day depth estimation;3D reconstruction;Photometric losses", "primary_area": "", "supplementary_material": "/attachment/af40503a1f9812a6cce12e951caaa6d7d760fd3c.zip", "author": "Madhu Vankadari;Stuart Golodetz;Sourav Garg;Sangyun Shin;Andrew Markham;Niki Trigoni", "authorids": "~Madhu_Vankadari1;~Stuart_Golodetz4;~Sourav_Garg1;sangyun.shin@cs.ox.ac.uk;~Andrew_Markham2;~Niki_Trigoni1", "gender": "M;M;M;;M;F", "homepage": "https://madhubabuv.github.io;;https://oravus.github.io/;;;https://www.cs.ox.ac.uk/people/niki.trigoni/", "dblp": "205/3815;34/4052;142/0073;;83/7169;t/NikiTrigoni", "google_scholar": "St1130EAAAAJ;https://scholar.google.co.uk/citations?user=Kg_w2tEAAAAJ;oVS3HHIAAAAJ;;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ;", "orcid": ";;0000-0001-6068-3307;;;", "linkedin": ";;gargsourav/;;;", "or_profile": "~Madhu_Vankadari1;~Stuart_Golodetz4;~Sourav_Garg1;sangyun.shin@cs.ox.ac.uk;~Andrew_Markham2;~Niki_Trigoni1", "aff": "Department of Computer Science, University of Oxford;Department of Computer Science, University of Oxford;Queensland University of Technology;;University of Oxford;University of Oxford", "aff_domain": "cs.ox.ac.uk;cs.ox.ac.uk;qut.edu.au;;ox.ac.uk;ox.ac.uk", "position": "PhD student;Postdoc;Postdoc;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nvankadari2022when,\ntitle={When the Sun Goes Down: Repairing Photometric Losses for All-Day Depth Estimation},\nauthor={Madhu Vankadari and Stuart Golodetz and Sourav Garg and Sangyun Shin and Andrew Markham and Niki Trigoni},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=7RyzGWLk79H}\n}", "github": "", "project": "", "reviewers": "iBSx;JhM1;BWdU;YCUH", "site": "https://openreview.net/forum?id=7RyzGWLk79H", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12081001728196238367&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Oxford;Queensland University of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ox.ac.uk;https://www.qut.edu.au", "aff_unique_abbr": "Oxford;QUT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;Australia" }, { "id": "7ZcePvChS7u", "title": "Human-Robot Commensality: Bite Timing Prediction for Robot-Assisted Feeding in Groups", "track": "main", "status": "Poster", "tldr": "We use our self-collected dataset to develop data-driven models that predicts when a robot should feed during social dining scenarios and evaluate with user study.", "abstract": "We develop data-driven models to predict when a robot should feed during social dining scenarios. Being able to eat independently with friends and family is considered one of the most memorable and important activities for people with mobility limitations. While existing robotic systems for feeding people with mobility limitations focus on solitary dining, commensality, the act of eating together, is often the practice of choice. Sharing meals with others introduces the problem of socially appropriate bite timing for a robot, i.e. the appropriate timing for the robot to feed without disrupting the social dynamics of a shared meal. Our key insight is that bite timing strategies that take into account the delicate balance of social cues can lead to seamless interactions during robot-assisted feeding in a social dining scenario. We approach this problem by collecting a Human-Human Commensality Dataset (HHCD) containing 30 groups of three people eating together. We use this dataset to analyze human-human commensality behaviors and develop bite timing prediction models in social dining scenarios. We also transfer these models to human-robot commensality scenarios. Our user studies show that prediction improves when our algorithm uses multimodal social signaling cues between diners to model bite timing. The HHCD dataset, videos of user studies, and code are available at https://emprise.cs.cornell.edu/hrcom/", "keywords": "Multimodal Learning;HRI;Assistive Robotics;Group Dynamics", "primary_area": "", "supplementary_material": "/attachment/2ab1f58abc43f8321e1e5536ee7a6c4a74afdced.zip", "author": "Jan Ondras;Abrar Anwar;Tong Wu;Fanjun Bu;Malte Jung;Jorge Jose Ortiz;Tapomayukh Bhattacharjee", "authorids": "~Jan_Ondras1;~Abrar_Anwar1;~Tong_Wu7;~Fanjun_Bu1;~Malte_Jung1;~Jorge_Jose_Ortiz1;~Tapomayukh_Bhattacharjee1", "gender": ";M;;M;M;M;M", "homepage": "https://janondras.wordpress.com/;http://abraranwar.github.io/;;http://frankbu.com;http://infosci.cornell.edu/faculty/malte-jung;https://jorgeortizphd.info/;http://www.tapomayukh.com", "dblp": "211/1454;294/1347.html;;;;53/4663-1;74/8368", "google_scholar": "https://scholar.google.com/citations?hl=en;c6E-5tcAAAAJ;;;https://scholar.google.com.tw/citations?user=pfBh4zEAAAAJ;AcyG538AAAAJ;X1zsXTgAAAAJ", "orcid": "0000-0003-1075-7755;0000-0003-4442-4369;0000-0003-1182-0488;;;0000-0003-3325-1298;0000-0001-9457-5726", "linkedin": "jancio/;abraranwar;;;;jorgeortizphd/;tapomayukh", "or_profile": "~Jan_Ondras1;~Abrar_Anwar1;~Tong_Wu7;~Fanjun_Bu1;~Malte_Jung1;~Jorge_Jose_Ortiz1;~Tapomayukh_Bhattacharjee1", "aff": "University of Oxford;Cornell University;Rutgers University;Cornell University;Cornell University;Rutgers University;Cornell University", "aff_domain": "oxford.ac.uk;cornell.edu;rutgers.edu;cornell.edu;cornell.edu;rutgers.edu;cornell.edu", "position": "MS student;Visiting Scholar;PhD student;PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nondras2022humanrobot,\ntitle={Human-Robot Commensality: Bite Timing Prediction for Robot-Assisted Feeding in Groups},\nauthor={Jan Ondras and Abrar Anwar and Tong Wu and Fanjun Bu and Malte Jung and Jorge Jose Ortiz and Tapomayukh Bhattacharjee},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=7ZcePvChS7u}\n}", "github": "", "project": "", "reviewers": "iJwW;tBiN;gH2e", "site": "https://openreview.net/forum?id=7ZcePvChS7u", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4295575949303351886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;1;2;1", "aff_unique_norm": "University of Oxford;Cornell University;Rutgers University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.cornell.edu;https://www.rutgers.edu", "aff_unique_abbr": "Oxford;Cornell;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "8-8e18idYLD", "title": "Online Dynamics Learning for Predictive Control with an Application to Aerial Robots", "track": "main", "status": "Poster", "tldr": "We tackle the task of improving the accuracy of dynamic models for model predictive control (MPC) using a proposed online learning algorithm.", "abstract": "In this work, we consider the task of improving the accuracy of dynamic models for model predictive control (MPC) in an online setting. Although prediction models can be learned and applied to model-based controllers, these models are often learned offline. In this offline setting, training data is first collected and a prediction model is learned through an elaborated training procedure. However, since the model is learned offline, it does not adapt to disturbances or model errors observed during deployment. To improve the adaptiveness of the model and the controller, we propose an online dynamics learning framework that continually improves the accuracy of the dynamic model during deployment. We adopt knowledge-based neural ordinary differential equations (KNODE) as the dynamic models, and use techniques inspired by transfer learning to continually improve the model accuracy. We demonstrate the efficacy of our framework with a quadrotor, and verify the framework in both simulations and physical experiments. Results show that our approach can account for disturbances that are possibly time-varying, while maintaining good trajectory tracking performance.", "keywords": "Online Learning;Model Learning;Model Predictive Control;Aerial Robotics", "primary_area": "", "supplementary_material": "/attachment/7ffc43924779db27339b9e2c11cf042313870637.zip", "author": "Tom Z. Jiahao;Kong Yao Chee;M. Ani Hsieh", "authorids": "~Tom_Z._Jiahao1;~Kong_Yao_Chee1;~M._Ani_Hsieh1", "gender": "M;M;", "homepage": ";;", "dblp": "276/0969;;", "google_scholar": "wfDVygMAAAAJ;;", "orcid": ";;", "linkedin": ";kong-yao-chee-36bb0523/;", "or_profile": "~Tom_Z._Jiahao1;~Kong_Yao_Chee1;~M._Ani_Hsieh1", "aff": "University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;", "aff_domain": "seas.upenn.edu;seas.upenn.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\njiahao2022online,\ntitle={Online Dynamics Learning for Predictive Control with an Application to Aerial Robots},\nauthor={Tom Z. Jiahao and Kong Yao Chee and M. Ani Hsieh},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=8-8e18idYLD}\n}", "github": "https://github.com/TomJZ/Online-KNODE-MPC", "project": "", "reviewers": "sQ7A;GTzB;YuBR;Jxoq", "site": "https://openreview.net/forum?id=8-8e18idYLD", "pdf_size": 0, "rating": "1;6;6;10", "confidence": "", "rating_avg": 5.75, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11211125867546633757&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "80vpxjt3vq", "title": "PlanT: Explainable Planning Transformers via Object-Level Representations", "track": "main", "status": "Poster", "tldr": "We propose PlanT, a state-of-the-art planner for self-driving based on object-level representations and a transformer architecture which can explain its decisions by identifying the most relevant object.", "abstract": "Planning an optimal route in a complex environment requires efficient reasoning about the surrounding scene. While human drivers prioritize important objects and ignore details not relevant to the decision, learning-based planners typically extract features from dense, high-dimensional grid representations containing all vehicle and road context information. In this paper, we propose PlanT, a novel approach for planning in the context of self-driving that uses a standard transformer architecture. PlanT is based on imitation learning with a compact object-level input representation. On the Longest6 benchmark for CARLA, PlanT outperforms all prior methods (matching the driving score of the expert) while being 5.3\u00d7 faster than equivalent pixel-based planning baselines during inference. Combining PlanT with an off-the-shelf perception module provides a sensor-based driving system that is more than 10 points better in terms of driving score than the existing state of the art. Furthermore, we propose an evaluation protocol to quantify the ability of planners to identify relevant objects, providing insights regarding their decision-making. Our results indicate that PlanT can focus on the most relevant object in the scene, even when this object is geometrically distant.", "keywords": "Autonomous Driving;Transformers;Explainability", "primary_area": "", "supplementary_material": "/attachment/d7c6938b38bec5e2240fa07de2a3eb5269ecec39.zip", "author": "Katrin Renz;Kashyap Chitta;Otniel-Bogdan Mercea;A. Sophia Koepke;Zeynep Akata;Andreas Geiger", "authorids": "~Katrin_Renz1;~Kashyap_Chitta1;~Otniel-Bogdan_Mercea1;~A._Sophia_Koepke1;~Zeynep_Akata1;~Andreas_Geiger3", "gender": "F;M;F;F;M;", "homepage": "https://www.katrinrenz.de/;https://kashyap7x.github.io/;https://www.robots.ox.ac.uk/~koepke/;https://eml-unitue.de/people/zeynep-akata;http://www.cvlibs.net;https://merceaotniel.github.io/", "dblp": "279/6579;220/3765;223/9859;117/4838;40/5825-1;", "google_scholar": "https://scholar.google.de/citations?user=t9ahuxsAAAAJ;vX5i2CcAAAAJ;q9zQhj8AAAAJ;jQl9RtkAAAAJ;https://scholar.google.ca/citations?hl=en;eSPY7nMAAAAJ", "orcid": ";;;0000-0002-1432-7747;0000-0002-8151-3726;", "linkedin": ";;;zeynep-akata-36182045/?ppe=1;;", "or_profile": "~Katrin_Renz1;~Kashyap_Chitta1;~A._Sophia_Koepke1;~Zeynep_Akata1;~Andreas_Geiger3;~Otniel_Bogdan_Mercea1", "aff": "University of T\u00fcbingen;University of T\u00fcbingen;University of Tuebingen;University of T\u00fcbingen;University of Tuebingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;PhD student;Postdoc;Full Professor;Professor;PhD student", "bibtex": "@inproceedings{\nrenz2022plant,\ntitle={PlanT: Explainable Planning Transformers via Object-Level Representations},\nauthor={Katrin Renz and Kashyap Chitta and Otniel-Bogdan Mercea and A. Sophia Koepke and Zeynep Akata and Andreas Geiger},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=80vpxjt3vq}\n}", "github": "https://github.com/autonomousvision/plant", "project": "", "reviewers": "ZSWX;H8PS;QCRj;LmXW", "site": "https://openreview.net/forum?id=80vpxjt3vq", "pdf_size": 0, "rating": "1;6;6;10", "confidence": "", "rating_avg": 5.75, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 114, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7744445869134445994&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1;0;1;2", "aff_unique_norm": "University of T\u00fcbingen;University of Tuebingen;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "8ktEdb5NHEh", "title": "Learning Sampling Distributions for Model Predictive Control", "track": "main", "status": "Poster", "tldr": "We learn sampling distributions for MPC with normalizing flows, performing all online parameter updates and warm-start operations in the latent space of the flow.", "abstract": "Sampling-based methods have become a cornerstone of contemporary approaches to Model Predictive Control (MPC), as they make no restrictions on the differentiability of the dynamics or cost function and are straightforward to parallelize. However, their efficacy is highly dependent on the quality of the sampling distribution itself, which is often assumed to be simple, like a Gaussian. This restriction can result in samples which are far from optimal, leading to poor performance. Recent work has explored improving the performance of MPC by sampling in a learned latent space of controls. However, these methods ultimately perform all MPC parameter updates and warm-starting between time steps in the control space. This requires us to rely on a number of heuristics for generating samples and updating the distribution and may lead to sub-optimal performance. Instead, we propose to carry out all operations in the latent space, allowing us to take full advantage of the learned distribution. Specifically, we frame the learning problem as bi-level optimization and show how to train the controller with backpropagation-through-time. By using a normalizing flow parameterization of the distribution, we can leverage its tractable density to avoid requiring differentiability of the dynamics and cost function. Finally, we evaluate the proposed approach on simulated robotics tasks and demonstrate its ability to surpass the performance of prior methods and scale better with a reduced number of samples.", "keywords": "Model Predictive Control;Normalizing Flows", "primary_area": "", "supplementary_material": "/attachment/bc393dc970eb8f1330742898481997f4654dd85e.zip", "author": "Jacob Sacks;Byron Boots", "authorids": "~Jacob_Sacks1;~Byron_Boots1", "gender": "M;", "homepage": "https://jisacks.github.io/;", "dblp": "208/8837;", "google_scholar": "Th4PuGkAAAAJ;", "orcid": ";", "linkedin": "jsacks3/;", "or_profile": "~Jacob_Sacks1;~Byron_Boots1", "aff": "Department of Computer Science, University of Washington;", "aff_domain": "cs.washington.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nsacks2022learning,\ntitle={Learning Sampling Distributions for Model Predictive Control},\nauthor={Jacob Sacks and Byron Boots},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=8ktEdb5NHEh}\n}", "github": "", "project": "", "reviewers": "bXam;aRE3;BrmL", "site": "https://openreview.net/forum?id=8ktEdb5NHEh", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 7, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8492239769190251727&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "8tmKW-NG2bH", "title": "Learning Goal-Conditioned Policies Offline with Self-Supervised Reward Shaping", "track": "main", "status": "Poster", "tldr": "We propose a self-supervised reward shaping method for training goal-conditioned policies on pre-collected dataset without performing a single action in the environment.", "abstract": "Developing agents that can execute multiple skills by learning from pre-collected datasets is an important problem in robotics, where online interaction with the environment is extremely time-consuming. Moreover, manually designing reward functions for every single desired skill is prohibitive. Prior works targeted these challenges by learning goal-conditioned policies from offline datasets without manually specified rewards, through hindsight relabeling. These methods suffer from the issue of sparsity of rewards, and fail at long-horizon tasks. In this work, we propose a novel self-supervised learning phase on the pre-collected dataset to understand the structure and the dynamics of the model, and shape a dense reward function for learning policies offline. We evaluate our method on three continuous control tasks, and show that our model significantly outperforms existing approaches, especially on tasks that involve long-term planning.", "keywords": "Offline Reinforcement Learning;Self-Supervised Learning;Goal-Conditioned RL", "primary_area": "", "supplementary_material": "/attachment/f8bb647ebb4e448173da9f847eeb2a93f752348b.zip", "author": "Lina Mezghani;Sainbayar Sukhbaatar;Piotr Bojanowski;Alessandro Lazaric;Karteek Alahari", "authorids": "~Lina_Mezghani1;~Sainbayar_Sukhbaatar1;~Piotr_Bojanowski1;~Alessandro_Lazaric2;~Karteek_Alahari1", "gender": "F;M;M;M;M", "homepage": "https://linamezghani.github.io/;;;;http://thoth.inrialpes.fr/people/alahari", "dblp": "230/7713;56/10550;142/2542;36/321;a/KarteekAlahari", "google_scholar": "-2wyKzEAAAAJ;ri1sE34AAAAJ;https://scholar.google.fr/citations?user=lJ_oh2EAAAAJ;6JZ3R6wAAAAJ;https://scholar.google.fr/citations?user=qcyG7rwAAAAJ", "orcid": "0000-0002-4371-8202;;;;", "linkedin": "lina-mezghani/;;piotr-bojanowski-9a94402a;;", "or_profile": "~Lina_Mezghani1;~Sainbayar_Sukhbaatar1;~Piotr_Bojanowski1;~Alessandro_Lazaric2;~Karteek_Alahari1", "aff": "Meta AI;Meta Facebook;Meta;Meta Facebook;Inria", "aff_domain": "meta.com;fb.com;meta.com;fb.com;inria.fr", "position": "PhD student;Research Scientist;Researcher;Research Scientist;Tenured researcher (eq. Asso. prof.)", "bibtex": "@inproceedings{\nmezghani2022learning,\ntitle={Learning Goal-Conditioned Policies Offline with Self-Supervised Reward Shaping},\nauthor={Lina Mezghani and Sainbayar Sukhbaatar and Piotr Bojanowski and Alessandro Lazaric and Karteek Alahari},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=8tmKW-NG2bH}\n}", "github": "", "project": "", "reviewers": "4D4F;nCHc;iP9J;4myo", "site": "https://openreview.net/forum?id=8tmKW-NG2bH", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10512745005963774615&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Meta;INRIA", "aff_unique_dep": "Meta AI;", "aff_unique_url": "https://meta.com;https://www.inria.fr", "aff_unique_abbr": "Meta;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;France" }, { "id": "A12nd105kFr", "title": "Learning Diverse and Physically Feasible Dexterous Grasps with Generative Model and Bilevel Optimization", "track": "main", "status": "Poster", "tldr": "Proposal and hardware validation of a pipeline combining generative model and bilevel optimization for generating diverse physically feasible dexterous grasps.", "abstract": "To fully utilize the versatility of a multi-fingered dexterous robotic hand for executing diverse object grasps, one must consider the rich physical constraints introduced by hand-object interaction and object geometry. We propose an integrative approach of combining a generative model and a bilevel optimization (BO) to plan diverse grasp configurations on novel objects. First, a conditional variational autoencoder trained on merely six YCB objects predicts the finger placement directly from the object point cloud. The prediction is then used to seed a nonconvex BO that solves for a grasp configuration under collision, reachability, wrench closure, and friction constraints. Our method achieved an 86.7% success over 120 real world grasping trials on 20 household objects, including unseen and challenging geometries. Through quantitative empirical evaluations, we confirm that grasp configurations produced by our pipeline are indeed guaranteed to satisfy kinematic and dynamic constraints. A video summary of our results is available at youtu.be/9DTrImbN99I.", "keywords": "dexterous grasping;grasp planning;bilevel optimization;generative model", "primary_area": "", "supplementary_material": "/attachment/4325091024e06a02c75d64e2b76f0afedb1410b2.zip", "author": "Albert Wu;Michelle Guo;Karen Liu", "authorids": "~Albert_Wu2;~Michelle_Guo1;~Karen_Liu1", "gender": ";F;", "homepage": ";https://shellguo.com;https://cs.stanford.edu/~karenliu", "dblp": ";185/0671;", "google_scholar": ";lyjjpNMAAAAJ;i28fU0MAAAAJ", "orcid": ";0000-0002-6574-6669;0000-0001-5926-0905", "linkedin": ";;", "or_profile": "~Albert_Wu2;~Michelle_Guo1;~Karen_Liu1", "aff": ";Computer Science Department, Stanford University;", "aff_domain": ";cs.stanford.edu;", "position": ";PhD student;", "bibtex": "@inproceedings{\nwu2022learning,\ntitle={Learning Diverse and Physically Feasible Dexterous Grasps with Generative Model and Bilevel Optimization},\nauthor={Albert Wu and Michelle Guo and Karen Liu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=A12nd105kFr}\n}", "github": "https://github.com/Stanford-TML/dex_grasp", "project": "", "reviewers": "XLAr;SirZ;GKLm;Fnic", "site": "https://openreview.net/forum?id=A12nd105kFr", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6928768353749131446&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "A5l7wE2uqtM", "title": "BusyBot: Learning to Interact, Reason, and Plan in a BusyBoard Environment", "track": "main", "status": "Poster", "tldr": "We propose a toy-inspired relational environment, BusyBoard, and a learning framework, BusyBot, for embodied AI agents to acquire interaction, reasoning, and planning skills.", "abstract": "We introduce BusyBoard, a toy-inspired robot learning environment that leverages a diverse set of articulated objects and inter-object functional relations to provide rich visual feedback for robot interactions. Based on this environment, we introduce a learning framework, BusyBot, which allows an agent to jointly acquire three fundamental capabilities (interaction, reasoning, and planning) in an integrated and self-supervised manner. With the rich sensory feedback provided by BusyBoard, BusyBot first learns a policy to efficiently interact with the environment; then with data collected using the policy, BusyBot reasons the inter-object functional relations through a causal discovery network; and finally by combining the learned interaction policy and relation reasoning skill, the agent is able to perform goal-conditioned manipulation tasks. We evaluate BusyBot in both simulated and real-world environments, and validate its generalizability to unseen objects and relations.", "keywords": "Manipulation;Scene Understanding;Learning Environment", "primary_area": "", "supplementary_material": "/attachment/6e996cd1a97528e377609cd476f54da90e906a56.zip", "author": "Zeyi Liu;Zhenjia Xu;Shuran Song", "authorids": "~Zeyi_Liu1;~Zhenjia_Xu1;~Shuran_Song3", "gender": "F;M;F", "homepage": "https://lzylucy.github.io;https://www.zhenjiaxu.com/;https://shurans.github.io/", "dblp": ";238/0000;", "google_scholar": ";QE8cLMEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "zeyi-liu;;", "or_profile": "~Zeyi_Liu1;~Zhenjia_Xu1;~Shuran_Song3", "aff": "Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;cs.columbia.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2022busybot,\ntitle={BusyBot: Learning to Interact, Reason, and Plan in a BusyBoard Environment},\nauthor={Zeyi Liu and Zhenjia Xu and Shuran Song},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=A5l7wE2uqtM}\n}", "github": "https://github.com/columbia-ai-robotics/busybot", "project": "", "reviewers": "yhwf;gcRk;55uE;9A6a", "site": "https://openreview.net/forum?id=A5l7wE2uqtM", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13152213642789953654&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "AdFROt9BoqE", "title": "Cross-Domain Transfer via Semantic Skill Imitation", "track": "main", "status": "Poster", "tldr": "We propose an approach for semantic imitation, which uses demonstrations from a source domain, e.g. human videos, to accelerate reinforcement learning (RL) in a different target domain, e.g. a robotic manipulator in a simulated kitchen.", "abstract": "We propose an approach for semantic imitation, which uses demonstrations from a source domain, e.g. human videos, to accelerate reinforcement learning (RL) in a different target domain, e.g. a robotic manipulator in a simulated kitchen. Instead of imitating low-level actions like joint velocities, our approach imitates the sequence of demonstrated semantic skills like \"opening the microwave\" or \"turning on the stove\". This allows us to transfer demonstrations across environments (e.g. real-world to simulated kitchen) and agent embodiments (e.g. bimanual human demonstration to robotic arm). \nWe evaluate on three challenging cross-domain learning problems and match the performance of demonstration-accelerated RL approaches that require in-domain demonstrations. In a simulated kitchen environment, our approach learns long-horizon robot manipulation tasks, using less than 3 minutes of human video demonstrations from a real-world kitchen. This enables scaling robot learning via the reuse of demonstrations, e.g. collected as human videos, for learning in any number of target domains.", "keywords": "Reinforcement Learning;Imitation;Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/5dbcbdb770a67492e8973aac05ae8896f7e12eb4.zip", "author": "Karl Pertsch;Ruta Desai;Vikash Kumar;Franziska Meier;Joseph J Lim;Dhruv Batra;Akshara Rai", "authorids": "~Karl_Pertsch1;~Ruta_Desai1;~Vikash_Kumar2;~Franziska_Meier2;~Joseph_J_Lim1;~Dhruv_Batra1;~Akshara_Rai1", "gender": ";;M;;M;Not Specified;", "homepage": "https://kpertsch.github.io/;;http://vikashplus.github.io/;;http://people.csail.mit.edu/lim/;https://dhruvbatra.com;https://ai.facebook.com/people/akshara-rai", "dblp": "211/7137;;82/7475;;08/3086;67/6586;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;nu3W--sAAAAJ;;jTnQTBoAAAAJ;_bs7PqgAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Karl_Pertsch1;~Ruta_Desai1;~Vikash_Kumar2;~Franziska_Meier2;~Joseph_J_Lim1;~Dhruv_Batra1;~Akshara_Rai1", "aff": "University of Southern California;;Meta Facebook;;Korea Advanced Institute of Science & Technology;Georgia Institute of Technology;FAIR, Meta AI", "aff_domain": "usc.edu;;facebook.com;;kaist.ac.kr;gatech.edu;meta.com", "position": "PhD student;;Researcher;;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\npertsch2022crossdomain,\ntitle={Cross-Domain Transfer via Semantic Skill Imitation},\nauthor={Karl Pertsch and Ruta Desai and Vikash Kumar and Franziska Meier and Joseph J Lim and Dhruv Batra and Akshara Rai},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=AdFROt9BoqE}\n}", "github": "https://github.com/kpertsch/star", "project": "", "reviewers": "95b2;8M4B;UjLs;YQ7o", "site": "https://openreview.net/forum?id=AdFROt9BoqE", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6276920568403836031&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Southern California;Meta;Korea Advanced Institute of Science and Technology;Georgia Institute of Technology", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.usc.edu;https://meta.com;https://www.kaist.ac.kr;https://www.gatech.edu", "aff_unique_abbr": "USC;Meta;KAIST;Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;South Korea" }, { "id": "Ag-vOezQ0Gw", "title": "ROS-PyBullet Interface: A Framework for Reliable Contact Simulation and Human-Robot Interaction", "track": "main", "status": "Poster", "tldr": "The ROS-PyBullet Interface is a framework between the reliable contact simulator PyBullet and the Robot Operating System (ROS) with additional utilities for Human-Robot Interaction in the simulated environment - we also present several use-cases.", "abstract": "Reliable contact simulation plays a key role in the development of (semi-)autonomous robots, especially when dealing with contact-rich manipulation scenarios, an active robotics research topic. Besides simulation, components such as sensing, perception, data collection, robot hardware control, human interfaces, etc. are all key enablers towards applying machine learning algorithms or model-based approaches in real world systems. However, there is a lack of software connecting reliable contact simulation with the larger robotics ecosystem (i.e. ROS, Orocos), for a more seamless application of novel approaches, found in the literature, to existing robotic hardware. In this paper, we present the ROS-PyBullet Interface, a framework that provides a bridge between the reliable contact/impact simulator PyBullet and the Robot Operating System (ROS). Furthermore, we provide additional utilities for facilitating Human-Robot Interaction (HRI) in the simulated environment. We also present several use-cases that highlight the capabilities and usefulness of our framework. Our code base is open source and can be found at https://github.com/ros-pybullet/ros_pybullet_interface.", "keywords": "Contact-rich Simulation;Manipulation;Teleoperation;Human-Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/228f004bd6a396ee7ca44793746a0bedbef42463.zip", "author": "Christopher Mower;Theodoros Stouraitis;Jo\u00e3o Moura;Christian Rauch;Lei Yan;Nazanin Zamani Behabadi;Michael Gienger;Tom Vercauteren;Christos Bergeles;Sethu Vijayakumar", "authorids": "~Christopher_Mower1;theostou@honda-ri.de;joao.moura@ed.ac.uk;christian.rauch@ed.ac.uk;lei.yan@ed.ac.uk;naz.zamani.b@gmail.com;~Michael_Gienger1;~Tom_Vercauteren1;christos.bergeles@kcl.ac.uk;sethu.vijayakumar@ed.ac.uk", "gender": ";;;;;;M;;;", "homepage": ";;;;;;https://www.honda-ri.de;;;", "dblp": ";;;;;;;;;", "google_scholar": ";;;;;;https://scholar.google.de/citations?user=oU2jyxMAAAAJ;;;", "orcid": ";;;;;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Christopher_Mower1;theostou@honda-ri.de;joao.moura@ed.ac.uk;christian.rauch@ed.ac.uk;lei.yan@ed.ac.uk;naz.zamani.b@gmail.com;~Michael_Gienger1;~Tom_Vercauteren1;christos.bergeles@kcl.ac.uk;sethu.vijayakumar@ed.ac.uk", "aff": ";;;;;;;;;", "aff_domain": ";;;;;;;;;", "position": ";;;;;;;;;", "bibtex": "@inproceedings{\nmower2022rospybullet,\ntitle={{ROS}-PyBullet Interface: A Framework for Reliable Contact Simulation and Human-Robot Interaction},\nauthor={Christopher Mower and Theodoros Stouraitis and Jo{\\~a}o Moura and Christian Rauch and Lei Yan and Nazanin Zamani Behabadi and Michael Gienger and Tom Vercauteren and Christos Bergeles and Sethu Vijayakumar},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Ag-vOezQ0Gw}\n}", "github": "https://github.com/ros-pybullet/ros_pybullet_interface", "project": "", "reviewers": "bKas;E5qq;VFxS", "site": "https://openreview.net/forum?id=Ag-vOezQ0Gw", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 8, "authors#_avg": 10, "corr_rating_confidence": 0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14733093054499849037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12 }, { "id": "AmPeAFzU3a4", "title": "MIRA: Mental Imagery for Robotic Affordances", "track": "main", "status": "Poster", "tldr": "We introduce a method for object rearrangement that optimizes actions with novel-view synthesis and affordance prediction in the loop.", "abstract": "Humans form mental images of 3D scenes to support counterfactual imagination, planning, and motor control. Our abilities to predict the appearance and affordance of the scene from previously unobserved viewpoints aid us in performing manipulation tasks (e.g., 6-DoF kitting) with a level of ease that is currently out of reach for existing robot learning frameworks. In this work, we aim to build artificial systems that can analogously plan actions on top of imagined images. To this end, we introduce Mental Imagery for Robotic Affordances (MIRA), an action reasoning framework that optimizes actions with novel-view synthesis and affordance prediction in the loop. Given a set of 2D RGB images, MIRA builds a consistent 3D scene representation, through which we synthesize novel orthographic views amenable to pixel-wise affordances prediction for action optimization. We illustrate how this optimization process enables us to generalize to unseen out-of-plane rotations for 6-DoF robotic manipulation tasks given a limited number of demonstrations, paving the way toward machines that autonomously learn to understand the world around them for planning actions.", "keywords": "Neural Radiance Fields;Rearrangement;Robotic Manipulation", "primary_area": "", "supplementary_material": "/attachment/70e3f36c45fe6dfff990937642cee46cb5db2e4f.zip", "author": "Yen-Chen Lin;Pete Florence;Andy Zeng;Jonathan T. Barron;Yilun Du;Wei-Chiu Ma;Anthony Simeonov;Alberto Rodriguez Garcia;Phillip Isola", "authorids": "~Yen-Chen_Lin1;~Pete_Florence1;~Andy_Zeng3;~Jonathan_T._Barron1;~Yilun_Du1;~Wei-Chiu_Ma1;~Anthony_Simeonov1;~Alberto_Rodriguez_Garcia1;~Phillip_Isola1", "gender": "M;;;M;;M;M;M;M", "homepage": "http://yenchenlin.me/;http://www.peteflorence.com/;https://yilundu.github.io;https://www.cs.cornell.edu/~weichiu/;https://anthonysimeonov.github.io/;http://mcube.mit.edu/;http://web.mit.edu/phillipi/;http://andyzeng.github.io/;https://jonbarron.info/", "dblp": "180/0954;;204/4379;151/4277;;;36/9988;http://dblp.uni-trier.de/pers/hd/z/Zeng:Andy;30/9988", "google_scholar": "RbCKRPcAAAAJ;;;SVIdh6AAAAAJ;;AC93g9kAAAAJ;ROILf3EAAAAJ;q7nFtUcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0002-1411-6704;;", "linkedin": ";;;;;;phillip-isola-a9955b20/;;", "or_profile": "~Yen-Chen_Lin1;~Pete_Florence1;~Yilun_Du1;~Wei-Chiu_Ma1;~Anthony_Simeonov1;~Alberto_Rodriguez_Garcia1;~Phillip_Isola1;~Andy_Zeng1;~Jonathan_T_Barron2", "aff": "Massachusetts Institute of Technology;Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology;NVIDIA;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Google;Google", "aff_domain": "mit.edu;google.com;mit.edu;mit.edu;nvidia.com;mit.edu;mit.edu;google.com;google.com", "position": "PhD student;Research Scientist;PhD student;PhD student;Intern;Associate Professor;Assistant Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlin2022mira,\ntitle={{MIRA}: Mental Imagery for Robotic Affordances},\nauthor={Yen-Chen Lin and Pete Florence and Andy Zeng and Jonathan T. Barron and Yilun Du and Wei-Chiu Ma and Anthony Simeonov and Alberto Rodriguez Garcia and Phillip Isola},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=AmPeAFzU3a4}\n}", "github": "", "project": "", "reviewers": "DpD8;YteW;jzYJ", "site": "https://openreview.net/forum?id=AmPeAFzU3a4", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4857710515428250519&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;0;0;2;0;0;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google;NVIDIA", "aff_unique_dep": ";Google;NVIDIA Corporation", "aff_unique_url": "https://web.mit.edu;https://www.google.com;https://www.nvidia.com", "aff_unique_abbr": "MIT;Google;NVIDIA", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Bf6on28H0Jv", "title": "Masked World Models for Visual Control", "track": "main", "status": "Poster", "tldr": "We present a visual model-based RL framework that decouples visual representation learning and dynamics learning.", "abstract": "Visual model-based reinforcement learning (RL) has the potential to enable sample-efficient robot learning from visual observations. Yet the current approaches typically train a single model end-to-end for learning both visual representations and dynamics, making it difficult to accurately model the interaction between robots and small objects. In this work, we introduce a visual model-based RL framework that decouples visual representation learning and dynamics learning. Specifically, we train an autoencoder with convolutional layers and vision transformers (ViT) to reconstruct pixels given masked convolutional features, and learn a latent dynamics model that operates on the representations from the autoencoder. Moreover, to encode task-relevant information, we introduce an auxiliary reward prediction objective for the autoencoder. We continually update both autoencoder and dynamics model using online samples collected from environment interaction. We demonstrate that our decoupling approach achieves state-of-the-art performance on a variety of visual robotic tasks from Meta-world and RLBench, e.g., we achieve 81.7% success rate on 50 visual robotic manipulation tasks from Meta-world, while the baseline achieves 67.9%. Code is available on the project website: https://sites.google.com/view/mwm-rl.", "keywords": "Visual model-based RL;World models", "primary_area": "", "supplementary_material": "/attachment/0fa892a27dc424b9a2c03d5131b77fcfdca1348c.zip", "author": "Younggyo Seo;Danijar Hafner;Hao Liu;Fangchen Liu;Stephen James;Kimin Lee;Pieter Abbeel", "authorids": "~Younggyo_Seo1;~Danijar_Hafner1;~Hao_Liu1;~Fangchen_Liu2;~Stephen_James1;~Kimin_Lee1;~Pieter_Abbeel2", "gender": "M;;F;M;M;M;M", "homepage": "https://younggyo.me/;https://danijar.com;https://fangchenliu.github.io/;https://stepjam.github.io/;https://sites.google.com/view/kiminlee;https://people.eecs.berkeley.edu/~pabbeel/;https://haoliu.ai", "dblp": "265/5586;184/8088;;163/5669;183/6849;;09/3214-55", "google_scholar": "tI1-YwIAAAAJ;VINmGpYAAAAJ;;OXtG-isAAAAJ;92M8xv4AAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;wtK4Yh4AAAAJ", "orcid": ";0000-0002-9534-7271;;;;;", "linkedin": ";;;;;;", "or_profile": "~Younggyo_Seo1;~Danijar_Hafner1;~Fangchen_Liu2;~Stephen_James1;~Kimin_Lee1;~Pieter_Abbeel2;~Hao_Liu10", "aff": "University of California, Berkeley;University of Toronto;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Covariant;University of California, Berkeley", "aff_domain": "berkeley.edu;cs.toronto;berkeley.edu;berkeley.edu;berkeley.edu;covariant.ai;berkeley.edu", "position": "Intern;PhD student;PhD student;Postdoc;Postdoc;Founder;PhD student", "bibtex": "@inproceedings{\nseo2022masked,\ntitle={Masked World Models for Visual Control},\nauthor={Younggyo Seo and Danijar Hafner and Hao Liu and Fangchen Liu and Stephen James and Kimin Lee and Pieter Abbeel},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Bf6on28H0Jv}\n}", "github": "https://github.com/younggyoseo/MWM", "project": "", "reviewers": "btzk;GUgv;abdd;DGEG;iNVH", "site": "https://openreview.net/forum?id=Bf6on28H0Jv", "pdf_size": 0, "rating": "4;6;6;6;6", "confidence": "", "rating_avg": 5.6, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=25819232910477634&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0;0;2;0", "aff_unique_norm": "University of California, Berkeley;University of Toronto;Covariant", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.utoronto.ca;", "aff_unique_abbr": "UC Berkeley;U of T;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;Canada;" }, { "id": "BxHcg_Zlpxj", "title": "Rethinking Sim2Real: Lower Fidelity Simulation Leads to Higher Sim2Real Transfer in Navigation", "track": "main", "status": "Poster", "tldr": "Sim2real transfer of robots may be improved not by increasing but by decreasing simulation fidelity; we should instead prioritize simulation speed for tasks that can be represented with abstract action spaces.", "abstract": "If we want to train robots in simulation before deploying them in reality, it seems natural and almost self-evident to presume that reducing the sim2real gap involves creating simulators of increasing fidelity (since reality is what it is). We challenge this assumption and present a contrary hypothesis -- sim2real transfer of robots may be improved with lower (not higher) fidelity simulation. We conduct a systematic large-scale evaluation of this hypothesis on the problem of visual navigation -- in the real world, and on 2 different simulators (Habitat and iGibson) using 3 different robots (A1, AlienGo, Spot). Our results show that, contrary to expectation, adding fidelity does not help with learning; performance is poor due to slow simulation speed (preventing large-scale learning) and overfitting to inaccuracies in simulation physics. Instead, building simple models of the robot motion using real-world data can improve learning and generalization.", "keywords": "Sim2Real;Deep Reinforcement Learning;Visual-Based Navigation", "primary_area": "", "supplementary_material": "/attachment/4b0e917762087acfb7d95957ae43caa7ca28b949.zip", "author": "Joanne Truong;Max Rudolph;Naoki Harrison Yokoyama;Sonia Chernova;Dhruv Batra;Akshara Rai", "authorids": "~Joanne_Truong1;~Max_Rudolph1;~Naoki_Harrison_Yokoyama1;~Sonia_Chernova2;~Dhruv_Batra1;~Akshara_Rai1", "gender": ";M;M;F;Not Specified;", "homepage": ";https://maxrudolph1.github.io/;http://naoki.io/;https://www.cc.gatech.edu/~chernova/;https://dhruvbatra.com;https://ai.facebook.com/people/akshara-rai", "dblp": ";298/8056.html;;27/1140;67/6586;", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;26MOv8wAAAAJ;EYo_WkEAAAAJ;_bs7PqgAAAAJ;", "orcid": ";;;0000-0001-6320-0825;;", "linkedin": ";;;;;", "or_profile": "~Joanne_Truong1;~Max_Rudolph1;~Naoki_Harrison_Yokoyama1;~Sonia_Chernova2;~Dhruv_Batra1;~Akshara_Rai1", "aff": ";University of Texas at Austin;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;FAIR, Meta AI", "aff_domain": ";utexas.edu;gatech.edu;gatech.edu;gatech.edu;meta.com", "position": ";PhD student;PhD student;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\ntruong2022rethinking,\ntitle={Rethinking Sim2Real: Lower Fidelity Simulation Leads to Higher Sim2Real Transfer in Navigation},\nauthor={Joanne Truong and Max Rudolph and Naoki Harrison Yokoyama and Sonia Chernova and Dhruv Batra and Akshara Rai},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=BxHcg_Zlpxj}\n}", "github": "https://github.com/joannetruong/habitat-lab/tree/kin2dyn", "project": "", "reviewers": "voBL;MhUg;BmX5;Soyb", "site": "https://openreview.net/forum?id=BxHcg_Zlpxj", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16640048550071117255&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of Texas at Austin;Georgia Institute of Technology;Meta", "aff_unique_dep": ";;Meta AI", "aff_unique_url": "https://www.utexas.edu;https://www.gatech.edu;https://meta.ai", "aff_unique_abbr": "UT Austin;Georgia Tech;Meta AI", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Bxr45keYrf", "title": "Evo-NeRF: Evolving NeRF for Sequential Robot Grasping of Transparent Objects", "track": "main", "status": "Oral", "tldr": "Reusing NeRF weights between grasp, incorporating NeRF geometry regularization, and training NeRF during motion allows rapid grasping of transparent objects with a grasp prediction network trained on simulated NeRF outputs.", "abstract": "Sequential robot grasping of transparent objects, where a robot removes objects one by one from a workspace, is important in many industrial and household scenarios. We propose Evolving NeRF (Evo-NeRF), leveraging recent speedups in NeRF training and further extending it to rapidly train the NeRF representation concurrently to image capturing. Evo-NeRF terminates training early when a sufficient task confidence is achieved, evolves the NeRF weights from grasp to grasp to rapidly adapt to object removal, and applies additional geometry regularizations to make the reconstruction smoother and faster. General purpose grasp planners such as Dex-Net may underperform with NeRF outputs because there can be unreliable geometry from rapidly trained NeRFs. To mitigate this distribution shift, we propose a Radiance-Adjusted Grasp Network (RAG-Net), a grasping network adapted to NeRF's characteristics through training on depth rendered from NeRFs of synthetic scenes. In experiments, a physical YuMi robot using Evo-NeRF and RAG-Net achieves an 89% grasp success rate over 27 trials on single objects, with early capture termination providing a 41% speed improvement with no loss in reliability. In a sequential grasping task on 6 scenes, Evo-NeRF reusing network weights clears 72% of the objects, retaining similar performance as reconstructing the NeRF from scratch (76%) but using 61% less sensing time. See https://sites.google.com/view/evo-nerf for more materials.", "keywords": "NeRF;grasping;transparent objects;speed", "primary_area": "", "supplementary_material": "/attachment/7504a732c6bb1b9a69856e4ddbafe27d46041ae0.zip", "author": "Justin Kerr;Letian Fu;Huang Huang;Yahav Avigal;Matthew Tancik;Jeffrey Ichnowski;Angjoo Kanazawa;Ken Goldberg", "authorids": "~Justin_Kerr1;~Letian_Fu1;~Huang_Huang1;~Yahav_Avigal1;~Matthew_Tancik1;~Jeffrey_Ichnowski1;~Angjoo_Kanazawa1;~Ken_Goldberg1", "gender": "M;M;;M;M;M;F;M", "homepage": "https://kerrj.github.io/;https://max-fu.github.io/;https://sites.google.com/site/huanghuang9729/home;https://yahavigal.github.io/;https://www.matthewtancik.com;https://ichnow.ski;https://people.eecs.berkeley.edu/~kanazawa/;http://goldberg.berkeley.edu/", "dblp": ";;;;;89/1741;119/1305;g/KennethYGoldberg", "google_scholar": ";aWot7UgAAAAJ;;CCAaFCQAAAAJ;l0Bj7U8AAAAJ;1OdtfywAAAAJ;Ci-_QYIAAAAJ;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ", "orcid": ";;;0000-0003-2062-5983;;0000-0003-4874-9478;;0000-0001-6747-9499", "linkedin": ";;;;;;;goldbergken/", "or_profile": "~Justin_Kerr1;~Letian_Fu1;~Huang_Huang1;~Yahav_Avigal1;~Matthew_Tancik1;~Jeffrey_Ichnowski1;~Angjoo_Kanazawa1;~Ken_Goldberg1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Undergrad student;PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkerr2022evonerf,\ntitle={Evo-Ne{RF}: Evolving Ne{RF} for Sequential Robot Grasping of Transparent Objects},\nauthor={Justin Kerr and Letian Fu and Huang Huang and Yahav Avigal and Matthew Tancik and Jeffrey Ichnowski and Angjoo Kanazawa and Ken Goldberg},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Bxr45keYrf}\n}", "github": "", "project": "", "reviewers": "UJkS;NJjN;iTZS", "site": "https://openreview.net/forum?id=Bxr45keYrf", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15420006098164788766&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "CC4JMO4dzg", "title": "Learning Preconditions of Hybrid Force-Velocity Controllers for Contact-Rich Manipulation", "track": "main", "status": "Poster", "tldr": "Plan with learned preconditions for Hybrid Force-Velocity Controllers to perform contact-rich manipulation tasks in constrained environments (e.g. shelves)", "abstract": "Robots need to manipulate objects in constrained environments like shelves and cabinets when assisting humans in everyday settings like homes and offices. These constraints make manipulation difficult by reducing grasp accessibility, so robots need to use non-prehensile strategies that leverage object-environment contacts to perform manipulation tasks. To tackle the challenge of planning and controlling contact-rich behaviors in such settings, this work uses Hybrid Force-Velocity Controllers (HFVCs) as the skill representation and plans skill sequences with learned preconditions. While HFVCs naturally enable robust and compliant contact-rich behaviors, solvers that synthesize them have traditionally relied on precise object models and closed-loop feedback on object pose, which are difficult to obtain in constrained environments due to occlusions. We first relax HFVCs' need for precise models and feedback with our HFVC synthesis framework, then learn a point-cloud-based precondition function to classify where HFVC executions will still be successful despite modeling inaccuracies. Finally, we use the learned precondition in a search-based task planner to complete contact-rich manipulation tasks in a shelf domain. Our method achieves a task success rate of $73.2\\%$, outperforming the $51.5\\%$ achieved by a baseline without the learned precondition. While the precondition function is trained in simulation, it can also transfer to a real-world setup without further fine-tuning. See supplementary materials and videos at~\\url{https://sites.google.com/view/constrained-manipulation/}. ", "keywords": "Contact-Rich Manipulation;Hybrid Force-Velocity Controllers;Precondition Learning", "primary_area": "", "supplementary_material": "/attachment/541b2303d296fba849bb20e1c8f6bd8988f25879.zip", "author": "Jacky Liang;Xianyi Cheng;Oliver Kroemer", "authorids": "~Jacky_Liang1;xianyic@cmu.edu;~Oliver_Kroemer1", "gender": "M;;M", "homepage": "https://www.jacky.io;;https://www.ri.cmu.edu/ri-faculty/oliver-kroemer/", "dblp": ";;04/7743", "google_scholar": "K29Sv1EAAAAJ;;_tbXjP4AAAAJ", "orcid": ";;", "linkedin": "jackyliang42;;", "or_profile": "~Jacky_Liang1;xianyic@cmu.edu;~Oliver_Kroemer1", "aff": "Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nliang2022learning,\ntitle={Learning Preconditions of Hybrid Force-Velocity Controllers for Contact-Rich Manipulation},\nauthor={Jacky Liang and Xianyi Cheng and Oliver Kroemer},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=CC4JMO4dzg}\n}", "github": "", "project": "", "reviewers": "4YaQ;yUsy;SLZA;6oMV", "site": "https://openreview.net/forum?id=CC4JMO4dzg", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17389873270955226535&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "DE8rdNuGj_7", "title": "LEADER: Learning Attention over Driving Behaviors for Planning under Uncertainty", "track": "main", "status": "Oral", "tldr": "We propose an algorithm that learns attention over human behaviors for planning under uncertainty.", "abstract": "Uncertainty in human behaviors poses a significant challenge to autonomous driving in crowded urban environments. The partially observable Markov decision process (POMDP) offers a principled general framework for decision making under uncertainty and achieves real-time performance for complex tasks by leveraging Monte Carlo sampling. However, sampling may miss rare, but critical events, leading to potential safety concerns. To tackle this challenge, we propose a new algorithm, LEarning Attention over Driving bEhavioRs (LEADER), which learns to attend to critical human behaviors during planning. LEADER learns a neural network generator to provide attention over human behaviors; it integrates the attention into a belief-space planner through importance sampling, which biases planning towards critical events. To train the attention generator, we form a minimax game between the generator and the planner. By solving this minimax game, LEADER learns to perform risk-aware planning without explicit human effort on data labeling.", "keywords": "Planning under uncertainty;Integrating planning and learning;Autonomous driving", "primary_area": "", "supplementary_material": "/attachment/06d1062765e019e4491d1f4d5e52f7e8a9be011e.zip", "author": "Mohamad Hosein Danesh;Panpan Cai;David Hsu", "authorids": "~Mohamad_Hosein_Danesh1;~Panpan_Cai1;~David_Hsu1", "gender": "M;F;M", "homepage": "https://modanesh.github.io/;https://cindycia.github.io/;http://www.comp.nus.edu.sg/~dyhsu/", "dblp": "267/1935;215/4265;29/331", "google_scholar": "AsqcJtAAAAAJ;https://scholar.google.com.sg/citations?user=MZfL0qUAAAAJ;S9LHLKEAAAAJ", "orcid": ";;0000-0002-2309-4535", "linkedin": "mohamad-h-danesh-a766b0185/;;david-hsu-a86200a1/", "or_profile": "~Mohamad_Hosein_Danesh1;~Panpan_Cai1;~David_Hsu1", "aff": ";National University of Singapore;National University of Singapore", "aff_domain": ";nus.edu.sg;nus.edu.sg", "position": ";Postdoc;Professor", "bibtex": "@inproceedings{\ndanesh2022leader,\ntitle={{LEADER}: Learning Attention over Driving Behaviors for Planning under Uncertainty},\nauthor={Mohamad Hosein Danesh and Panpan Cai and David Hsu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=DE8rdNuGj_7}\n}", "github": "https://github.com/modanesh/LEADER", "project": "", "reviewers": "D38s;KBsD;Djif;4p6j;Fx8v", "site": "https://openreview.net/forum?id=DE8rdNuGj_7", "pdf_size": 0, "rating": "6;6;6;10;10", "confidence": "", "rating_avg": 7.6, "confidence_avg": 0, "replies_avg": 29, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9994614890442124266&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "DLkubm-dq-y", "title": "Learning Road Scene-level Representations via Semantic Region Prediction", "track": "main", "status": "Poster", "tldr": "We propose a novel task called Semantic Region Prediction to learn road scene-level representations for two vital tasks in automated driving systems.", "abstract": "In this work, we tackle two vital tasks in automated driving systems, i.e., driver intent prediction and risk object identification from egocentric images. Mainly, we investigate the question: what would be good road scene-level representations for these two tasks? We contend that a scene-level representation must capture higher-level semantic and geometric representations of traffic scenes around ego-vehicle while performing actions to their destinations. To this end, we introduce the representation of semantic regions, which are areas where ego-vehicles visit while taking an afforded action (e.g., left-turn at 4-way intersections). We propose to learn scene-level representations via a novel semantic region prediction task and an automatic semantic region labeling algorithm. Extensive evaluations are conducted on the HDD and nuScenes datasets, and the learned representations lead to state-of-the-art performance for driver intention prediction and risk object identification. ", "keywords": "Semantic Region Prediction;Egocentric Vision;Driver Intent;Risk Object Identification", "primary_area": "", "supplementary_material": "/attachment/c56fb88ea620e9f6b55e4163d5ec2704512f297c.zip", "author": "Zihao Xiao;Alan Yuille;Yi-Ting Chen", "authorids": "~Zihao_Xiao2;~Alan_Yuille1;~Yi-Ting_Chen2", "gender": "M;M;M", "homepage": ";;https://sites.google.com/site/yitingchen0524/", "dblp": "207/2005-1;y/AlanLYuille;12/5268-1", "google_scholar": "https://scholar.google.com/citations?hl=en;;8tRH7RMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zihao_Xiao2;~Alan_Yuille1;~Yi-Ting_Chen2", "aff": "Johns Hopkins University;Johns Hopkins University;National Yang Ming Chiao Tung University", "aff_domain": "jhu.edu;johnshopkins.edu;nycu.edu.tw", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxiao2022learning,\ntitle={Learning Road Scene-level Representations via Semantic Region Prediction},\nauthor={Zihao Xiao and Alan Yuille and Yi-Ting Chen},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=DLkubm-dq-y}\n}", "github": "", "project": "", "reviewers": "1ZBN;4hg8;sBrF;wuu6", "site": "https://openreview.net/forum?id=DLkubm-dq-y", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10922258548743254061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Johns Hopkins University;National Yang Ming Chiao Tung University", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.nycu.edu.tw", "aff_unique_abbr": "JHU;NYCU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "id": "ED0G14V3WeH", "title": "Data-Efficient Model Learning for Control with Jacobian-Regularized Dynamic-Mode Decomposition", "track": "main", "status": "Poster", "tldr": "Using information from an approximate prior model improves sample efficiency when learning Koopman models.", "abstract": "We present a data-efficient algorithm for learning models for model-predictive control (MPC). Our approach, Jacobian-Regularized Dynamic-Mode Decomposition (JDMD), offers improved sample efficiency over traditional Koopman approaches based on Dynamic-Mode Decomposition (DMD) by leveraging Jacobian information from an approximate prior model of the system, and improved tracking performance over traditional model-based MPC. We demonstrate JDMD\u2019s ability to quickly learn bilinear Koopman dynamics representations across several realistic examples in simulation, including a perching maneuver for a fixed-wing aircraft with an empirically derived high-fidelity physics model. In all cases, we show that the models learned by JDMD provide superior tracking and generalization performance within a model-predictive control framework, even in the presence of significant model mismatch, when compared to approximate prior models and models learned by standard Extended DMD (EDMD).", "keywords": "Koopman;learning;model-predictive control;optimal control", "primary_area": "", "supplementary_material": "/attachment/fab8f8cd81936ab60c45d83980c1a9a5ba7aa4b5.zip", "author": "Brian Edward Jackson;Jeong Hun Lee;Kevin Tracy;Zachary Manchester", "authorids": "~Brian_Edward_Jackson1;jeonghunlee@cmu.edu;ktracy@cmu.edu;~Zachary_Manchester1", "gender": ";;;M", "homepage": "https://bjack205.github.io/;;;http://roboticexplorationlab.org/", "dblp": ";;;192/3194", "google_scholar": ";;;utFbPYUAAAAJ", "orcid": ";;;", "linkedin": "https://www.linkedin.com/feed/;;;", "or_profile": "~Brian_Edward_Jackson1;jeonghunlee@cmu.edu;ktracy@cmu.edu;~Zachary_Manchester1", "aff": "Carnegie Mellon University;;;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;;;cmu.edu", "position": "PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\njackson2022dataefficient,\ntitle={Data-Efficient Model Learning for Control with Jacobian-Regularized Dynamic-Mode Decomposition},\nauthor={Brian Edward Jackson and Jeong Hun Lee and Kevin Tracy and Zachary Manchester},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ED0G14V3WeH}\n}", "github": "https://github.com/bjack205/BilinearControl.jl", "project": "", "reviewers": "4PJQ;R4eq;5W5c;Uwdc", "site": "https://openreview.net/forum?id=ED0G14V3WeH", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6017495928257695011&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "EVFrjBgYsPZ", "title": "ROAD: Learning an Implicit Recursive Octree Auto-Decoder to Efficiently Encode 3D Shapes", "track": "main", "status": "Poster", "tldr": "We propose ROAD, an implicit Recursive Octree Auto-Decoder to efficiently and accurately encode large datasets of complex 3D shapes. ", "abstract": "Compact and accurate representations of 3D shapes are central to many perception and robotics tasks. State-of-the-art learning-based methods can reconstruct single objects but scale poorly to large datasets. We present a novel recursive implicit representation to efficiently and accurately encode large datasets of complex 3D shapes by recursively traversing an implicit octree in latent space. Our implicit Recursive Octree Auto-Decoder (ROAD) learns a hierarchically structured latent space enabling state-of-the-art reconstruction results at a compression ratio above 99%. We also propose an efficient curriculum learning scheme that naturally exploits the coarse-to-fine properties of the underlying octree spatial representation. We explore the scaling law relating latent space dimension, dataset size, and reconstruction accuracy, showing that increasing the latent space dimension is enough to scale to large shape datasets. Finally, we show that our learned latent space encodes a coarse-to-fine hierarchical structure yielding reusable latents across different levels of details, and we provide qualitative evidence of generalization to novel shapes outside the training set.\n", "keywords": "Implicit shape representations;Reconstruction;Data compression", "primary_area": "", "supplementary_material": "/attachment/a38045da8c6e10d97a7a420c57aa27011f31dcae.zip", "author": "Sergey Zakharov;Rares Andrei Ambrus;Katherine Liu;Adrien Gaidon", "authorids": "~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Katherine_Liu1;~Adrien_Gaidon1", "gender": "M;M;F;", "homepage": "https://zakharos.github.io/;http://www.csc.kth.se/~raambrus/;https://thekatherineliu.com;https://adriengaidon.com/", "dblp": "195/5832;25/76;226/6398;06/7548.html", "google_scholar": "https://scholar.google.de/citations?user=3DK3I-8AAAAJ;2xjjS3oAAAAJ;PhpQD2YAAAAJ;https://scholar.google.fr/citations?user=2StUgf4AAAAJ", "orcid": ";0000-0002-3111-3812;;", "linkedin": ";rare%C8%99-ambru%C8%99-b04812125/;;adrien-gaidon-63ab2358/", "or_profile": "~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Katherine_Liu1;~Adrien_Gaidon1", "aff": "Toyota Research Institute;Toyota Research Institute;Toyota Research Institute;Toyota Research Institute (TRI)", "aff_domain": "tri.global;tri.global;tri.global;tri.global", "position": "Researcher;Researcher;Researcher;Head of ML", "bibtex": "@inproceedings{\nzakharov2022road,\ntitle={{ROAD}: Learning an Implicit Recursive Octree Auto-Decoder to Efficiently Encode 3D Shapes},\nauthor={Sergey Zakharov and Rares Andrei Ambrus and Katherine Liu and Adrien Gaidon},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=EVFrjBgYsPZ}\n}", "github": "", "project": "", "reviewers": "XRB2;dXT1;SYPp;NizH", "site": "https://openreview.net/forum?id=EVFrjBgYsPZ", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17731440251451989593&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Toyota Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.tri.global", "aff_unique_abbr": "TRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Eal_lL08v_l", "title": "Transformers Are Adaptable Task Planners", "track": "main", "status": "Poster", "tldr": "Transformer Task Planner (TTP) learns high-level pick and place actions from dishwasher loading demonstrations and adapts to unseen preferences using single prompt. ", "abstract": "Every home is different, and every person likes things done in their particular way. Therefore, home robots of the future need to both reason about the sequential nature of day-to-day tasks and generalize to user's preferences. To this end, we propose a Transformer Task Planner (TTP) that learns high-level actions from demonstrations by leveraging object attribute-based representations. TTP can be pre-trained on multiple preferences and shows generalization to unseen preferences using a single demonstration as a prompt in a simulated dishwasher loading task. Further, we demonstrate real-world dish rearrangement using TTP with a Franka Panda robotic arm, prompted using a single human demonstration.", "keywords": "Task Planning;Prompt;Preferences;Object-centric Representation", "primary_area": "", "supplementary_material": "/attachment/29eba03a493c84a15e42977fd1f149bb62f4b846.zip", "author": "Vidhi Jain;Yixin Lin;Eric Undersander;Yonatan Bisk;Akshara Rai", "authorids": "~Vidhi_Jain2;~Yixin_Lin1;~Eric_Undersander2;~Yonatan_Bisk1;~Akshara_Rai1", "gender": "F;M;;M;", "homepage": "http://vidhijain.github.io;https://yixinlin.net;https://www.ericundersander.com/;http://www.YonatanBisk.com;https://ai.facebook.com/people/akshara-rai", "dblp": "199/2574;236/9891;209/9989;38/9282;", "google_scholar": ";;;bWoGh8UAAAAJ;", "orcid": ";;;0000-0002-2111-9081;", "linkedin": "vidhijain96/;;ericu;yonatanbisk/;", "or_profile": "~Vidhi_Jain2;~Yixin_Lin1;~Eric_Undersander2;~Yonatan_Bisk1;~Akshara_Rai1", "aff": "Meta Facebook;Facebook AI Research;Meta ;Meta;FAIR, Meta AI", "aff_domain": "fb.com;facebook.com;meta.com;meta.com;meta.com", "position": "AI resident;Research engineer;Research Engineer;Visiting Professor;Researcher", "bibtex": "@inproceedings{\njain2022transformers,\ntitle={Transformers Are Adaptable Task Planners},\nauthor={Vidhi Jain and Yixin Lin and Eric Undersander and Yonatan Bisk and Akshara Rai},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Eal_lL08v_l}\n}", "github": "", "project": "", "reviewers": "qr8n;jkUB;999X;3v7A", "site": "https://openreview.net/forum?id=Eal_lL08v_l", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14552185801254109161&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ef7xodOrgNW", "title": "Lyapunov Design for Robust and Efficient Robotic Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "We integrate control Lyapunov functions with model-free reinforcement learning to rapidly learn stable controllers on hardware. ", "abstract": "Recent advances in the reinforcement learning (RL) literature have enabled roboticists to automatically train complex policies in simulated environments. However, due to the poor sample complexity of these methods, solving RL problems using real-world data remains a challenging problem. This paper introduces a novel cost-shaping method which aims to reduce the number of samples needed to learn a stabilizing controller. The method adds a term involving a Control Lyapunov Function (CLF) -- an `energy-like' function from the model-based control literature -- to typical cost formulations. Theoretical results demonstrate the new costs lead to stabilizing controllers when smaller discount factors are used, which is well-known to reduce sample complexity. Moreover, the addition of the CLF term `robustifies' the search for a stabilizing controller by ensuring that even highly sub-optimal polices will stabilize the system. We demonstrate our approach with two hardware examples where we learn stabilizing controllers for a cartpole and an A1 quadruped with only seconds and a few minutes of fine-tuning data, respectively. Furthermore, simulation benchmark studies show that obtaining stabilizing policies by optimizing our proposed costs requires orders of magnitude less data compared to standard cost designs.", "keywords": "Reinforcement Learning;Control Lyapunov Functions;Stability;Robustness", "primary_area": "", "supplementary_material": "/attachment/ebabbd9eab74a09cda44ca72b9b7c80322bca79e.zip", "author": "Tyler Westenbroek;Fernando Castaneda;Ayush Agrawal;Shankar Sastry;Koushil Sreenath", "authorids": "~Tyler_Westenbroek1;fcastaneda@berkeley.edu;~Ayush_Agrawal1;~Shankar_Sastry1;~Koushil_Sreenath1", "gender": "M;;M;;M", "homepage": "https://scholar.google.com/citations?user=aqSKwDQAAAAJ&hl=en;;https://sites.google.com/view/ayushagrawal;;", "dblp": ";;;;", "google_scholar": ";;ieBE_0MAAAAJ;;o9aFV8cAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tyler_Westenbroek1;fcastaneda@berkeley.edu;~Ayush_Agrawal1;~Shankar_Sastry1;~Koushil_Sreenath1", "aff": "University of California, Berkeley;;University of California, Berkeley;;University of California, Berkeley", "aff_domain": "berkeley.edu;;berkeley.edu;;berkeley.edu", "position": "PhD student;;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nwestenbroek2022lyapunov,\ntitle={Lyapunov Design for Robust and Efficient Robotic Reinforcement Learning},\nauthor={Tyler Westenbroek and Fernando Castaneda and Ayush Agrawal and Shankar Sastry and Koushil Sreenath},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Ef7xodOrgNW}\n}", "github": "", "project": "", "reviewers": "9gjg;eFRG;ACEu;XRvu", "site": "https://openreview.net/forum?id=Ef7xodOrgNW", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13432330475128529913&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "F6iq9FU2xnc", "title": "Towards Long-Tailed 3D Detection", "track": "main", "status": "Poster", "tldr": "We explore the problem of long-tailed 3D detection (LT3D) and find that multi-modal cues are crucial for LT3D.", "abstract": "Contemporary autonomous vehicle (AV) benchmarks have advanced techniques for training 3D detectors, particularly on large-scale lidar data. Surprisingly, although semantic class labels naturally follow a long-tailed distribution, contemporary benchmarks focus on only a few common classes (e.g., pedestrian and car) and neglect many rare classes in-the-tail (e.g., debris and stroller).\nHowever, AVs must still detect rare classes to ensure safe operation. Moreover, semantic classes are often organized within a hierarchy, e.g., tail classes such as child and construction-worker are arguably subclasses of pedestrian. However, such hierarchical relationships are often ignored, which may lead to misleading estimates of performance and missed opportunities for algorithmic innovation. We address these challenges by formally studying the problem of Long-Tailed 3D Detection (LT3D), which evaluates on all classes, including those in-the-tail. We evaluate and innovate upon popular 3D detection codebases, such as CenterPoint and PointPillars, adapting them for LT3D.\nWe develop hierarchical losses that promote feature sharing across common-vs-rare classes, as well as improved detection metrics that award partial credit to \"reasonable\" mistakes respecting the hierarchy (e.g., mistaking a child for an adult). Finally, we point out that fine-grained tail class accuracy is particularly improved via multimodal fusion of RGB images with LiDAR; simply put, small fine-grained classes are challenging to identify from sparse (lidar) geometry alone, suggesting that multimodal cues are crucial to long-tailed 3D detection. Our modifications improve accuracy by 5% AP on average for all classes, and dramatically improve AP for rare classes (e.g., stroller AP improves from 3.6 to 31.6).", "keywords": "Autonomous Vehicles;Long-Tailed 3D Detection;Multimodal Fusion", "primary_area": "", "supplementary_material": "/attachment/bcbde97b9f917eee42d8eb6f1c8eccb254cbbbb2.zip", "author": "Neehar Peri;Achal Dave;Deva Ramanan;Shu Kong", "authorids": "~Neehar_Peri1;~Achal_Dave1;~Deva_Ramanan1;~Shu_Kong1", "gender": "M;M;M;M", "homepage": "http://neeharperi.com;http://www.achaldave.com/;https://www.cs.cmu.edu/~deva/;https://aimerykong.github.io/", "dblp": "241/5094;156/1161;49/488;26/11141", "google_scholar": "X3cGY7wAAAAJ;oQyYH9kAAAAJ;9B8PoXUAAAAJ;sm9FdLoAAAAJ", "orcid": ";;;0000-0002-1362-5937", "linkedin": "neeharperi/;;;aimerykong/", "or_profile": "~Neehar_Peri1;~Achal_Dave1;~Deva_Ramanan1;~Shu_Kong1", "aff": "Carnegie Mellon University;Amazon;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;amazon.com;cs.cmu.edu;cmu.edu", "position": "PhD student;Researcher;Full Professor;Postdoc Fellow", "bibtex": "@inproceedings{\nperi2022towards,\ntitle={Towards Long-Tailed 3D Detection},\nauthor={Neehar Peri and Achal Dave and Deva Ramanan and Shu Kong},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=F6iq9FU2xnc}\n}", "github": "", "project": "", "reviewers": "bNMv;4cQJ;LdEV;b6Vx", "site": "https://openreview.net/forum?id=F6iq9FU2xnc", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=359896664217839694&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.cmu.edu;https://www.amazon.com", "aff_unique_abbr": "CMU;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "GS8xD_elvgC", "title": "VIRDO++: Real-World, Visuo-tactile Dynamics and Perception of Deformable Objects", "track": "main", "status": "Poster", "tldr": "", "abstract": "Deformable objects manipulation can benefit from representations that seamlessly integrate vision and touch while handling occlusions. In this work, we present a novel approach for, and real-world demonstration of, multimodal visuo-tactile state-estimation and dynamics prediction for deformable objects. Our approach, VIRDO++, builds on recent progress in multimodal neural implicit representations for deformable object state-estimation (VIRDO) via a new formulation for deformation dynamics and a complementary state-estimation algorithm that (i) maintains a belief over deformations, and (ii) enables practical real-world application by removing the need for privileged contact information. In the context of two real-world robotic tasks, we show: (i) high-fidelity cross-modal state-estimation and prediction of deformable objects from partial visuo-tactile feedback, and (ii) generalization to unseen objects and contact formations. ", "keywords": "Deformable Object Manipulation;Multimodal Representation Learning", "primary_area": "", "supplementary_material": "/attachment/d380ffc21400c7c65bbf89ddb128b7a17d821278.zip", "author": "Youngsun Wi;Andy Zeng;Pete Florence;Nima Fazeli", "authorids": "~Youngsun_Wi1;~Andy_Zeng3;~Pete_Florence1;~Nima_Fazeli1", "gender": ";;;M", "homepage": "https://www.mmintlab.com/;http://www.peteflorence.com/;https://www.mmintlab.com;http://andyzeng.github.io/", "dblp": ";;;http://dblp.uni-trier.de/pers/hd/z/Zeng:Andy", "google_scholar": ";;;q7nFtUcAAAAJ", "orcid": ";;;", "linkedin": "youngsun-wi-1332761a0/;;;", "or_profile": "~Youngsun_Wi1;~Pete_Florence1;~Nima_Fazeli1;~Andy_Zeng1", "aff": "University of Michigan;Google;University of Michigan;Google", "aff_domain": "umich.edu;google.com;umich.edu;google.com", "position": "PhD student;Research Scientist;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nwi2022virdo,\ntitle={{VIRDO}++: Real-World, Visuo-tactile Dynamics and Perception of Deformable Objects},\nauthor={Youngsun Wi and Andy Zeng and Pete Florence and Nima Fazeli},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=GS8xD_elvgC}\n}", "github": "", "project": "", "reviewers": "NDvN;8Dpk;fX59;y9NZ", "site": "https://openreview.net/forum?id=GS8xD_elvgC", "pdf_size": 0, "rating": "1;4;6;6", "confidence": "", "rating_avg": 4.25, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3425502289312064441&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Michigan;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.umich.edu;https://www.google.com", "aff_unique_abbr": "UM;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "GTyBkq36tjx", "title": "Neural Geometric Fabrics: Efficiently Learning High-Dimensional Policies from Demonstration", "track": "main", "status": "Poster", "tldr": "We contribute a structured approach for sample-efficient learning of dexterous manipulation skills for a 23 DoF physical hand-arm system from demonstrations, by leveraging Geometric Fabrics, a recent theoretical framework for robot motion generation.", "abstract": "Learning dexterous manipulation policies for multi-fingered robots has been a long-standing challenge in robotics. Existing methods either limit themselves to highly constrained problems and smaller models to achieve extreme sample efficiency or sacrifice sample efficiency to gain capacity to solve more complex tasks with deep neural networks. In this work, we develop a structured approach to sample-efficient learning of dexterous manipulation skills from demonstrations by leveraging recent advances in robot motion generation and control. Specifically, our policy structure is induced by Geometric Fabrics - a recent framework that generalizes classical mechanical systems to allow for flexible design of expressive robot motions. To avoid the cumbersome manual design required by existing motion generators, we introduce Neural Geometric Fabric (NGF) - a framework that learns Geometric Fabric-based policies from data. NGF policies are provably stable and capable of encoding speed-invariant geometries of complex motions in multiple task spaces simultaneously. We demonstrate that NGFs can learn to perform a variety of dexterous manipulation tasks on a 23-DoF hand-arm physical robotic platform purely from demonstrations. Results from comprehensive comparative and ablative experiments show that NGF's structure and action spaces help learn acceleration-based policies that consistently outperform state-of-the-art baselines like Riemannian Motion Policies (RMPs), and other commonly used networks, such as feed-forward and recurrent neural networks. More importantly, we demonstrate that NGFs do not rely on often-used and expertly-designed operational-space controllers, promoting an advancement towards efficiently learning safe, stable, and high-dimensional controllers.", "keywords": "Imitation Learning;Dexterous Manipulation", "primary_area": "", "supplementary_material": "/attachment/79642ea401085f15588289ebff57cbddc5aded3a.zip", "author": "Mandy Xie;Ankur Handa;Stephen Tyree;Dieter Fox;Harish Ravichandar;Nathan D. Ratliff;Karl Van Wyk", "authorids": "~Mandy_Xie1;~Ankur_Handa1;~Stephen_Tyree1;~Dieter_Fox1;~Harish_Ravichandar1;~Nathan_D._Ratliff1;~Karl_Van_Wyk1", "gender": "F;M;M;M;;;", "homepage": "https://mandyxie.github.io/;http://ankurhanda.com;https://swtyree.github.io;https://homes.cs.washington.edu/~fox/;http://harishravichandar.com/;;", "dblp": ";32/8653;60/1032;f/DieterFox;237/9959;43/2704;", "google_scholar": ";sCTJI-0AAAAJ;;DqXsbPAAAAAJ;d2HP6SMAAAAJ;https://scholar.google.com/citations?hl=en;TCYAoF8AAAAJ", "orcid": ";;;;0000-0002-6635-2637;;", "linkedin": ";;;;;nathan-ratliff-b347018b/;", "or_profile": "~Mandy_Xie1;~Ankur_Handa1;~Stephen_Tyree1;~Dieter_Fox1;~Harish_Ravichandar1;~Nathan_D._Ratliff1;~Karl_Van_Wyk1", "aff": "Georgia Institute of Technology;Imperial College London;NVIDIA;Department of Computer Science;Georgia Institute of Technology;NVIDIA;", "aff_domain": "gatech.edu;imperial.ac.uk;nvidia.com;cs.washington.edu;gatech.edu;nvidia.com;", "position": "PhD student;Research Scientist;Research scientist;Full Professor;Assistant Professor;Researcher;", "bibtex": "@inproceedings{\nxie2022neural,\ntitle={Neural Geometric Fabrics: Efficiently Learning High-Dimensional Policies from Demonstration},\nauthor={Mandy Xie and Ankur Handa and Stephen Tyree and Dieter Fox and Harish Ravichandar and Nathan D. Ratliff and Karl Van Wyk},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=GTyBkq36tjx}\n}", "github": "", "project": "", "reviewers": "1QRC;WsPo;oe7Z;zkro", "site": "https://openreview.net/forum?id=GTyBkq36tjx", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16556952486396550589&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;0;2", "aff_unique_norm": "Georgia Institute of Technology;Imperial College London;NVIDIA;Unknown Institution", "aff_unique_dep": ";;NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www.gatech.edu;https://www.imperial.ac.uk;https://www.nvidia.com;", "aff_unique_abbr": "Georgia Tech;ICL;NVIDIA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom;" }, { "id": "GeM6VUwYinO", "title": "One-Shot Transfer of Affordance Regions? AffCorrs!", "track": "main", "status": "Poster", "tldr": "One-shot transfer of affordance masks to unseen objects, both belonging to the same class and other classes. ", "abstract": "In this work, we tackle one-shot visual search of object parts. Given a single reference image of an object with annotated affordance regions, we segment semantically corresponding parts within a target scene. We propose AffCorrs, an unsupervised model that combines the properties of pre-trained DINO-ViT's image descriptors and cyclic correspondences. We use AffCorrs to find corresponding affordances both for intra- and inter-class one-shot part segmentation. This task is more difficult than supervised alternatives, but enables future work such as learning affordances via imitation and assisted teleoperation.", "keywords": "One-shot;Affordance;Correspondence", "primary_area": "", "supplementary_material": "/attachment/d57a6b62bbf8ed2b06c45170ad88b9b093c4cda8.zip", "author": "Denis Hadjivelichkov;Sicelukwanda Zwane;Lourdes Agapito;Marc Peter Deisenroth;Dimitrios Kanoulas", "authorids": "~Denis_Hadjivelichkov1;sicelukwanda.zwane@gmail.com;~Lourdes_Agapito1;~Marc_Peter_Deisenroth1;~Dimitrios_Kanoulas1", "gender": "M;;;;M", "homepage": ";;;;https://dkanou.github.io", "dblp": "303/4527;;;;20/4287.html", "google_scholar": ";;;;cE8_5EsAAAAJ", "orcid": ";;;;0000-0002-3684-1472", "linkedin": "denishadjivelichkov/;;;;", "or_profile": "~Denis_Hadjivelichkov1;sicelukwanda.zwane@gmail.com;~Lourdes_Agapito1;~Marc_Peter_Deisenroth1;~Dimitrios_Kanoulas1", "aff": "University College London;;;;University College London", "aff_domain": "ucl.ac.uk;;;;ucl.ac.uk", "position": "PhD student;;;;Assistant Professor", "bibtex": "@inproceedings{\nhadjivelichkov2022oneshot,\ntitle={One-Shot Transfer of Affordance Regions? AffCorrs!},\nauthor={Denis Hadjivelichkov and Sicelukwanda Zwane and Lourdes Agapito and Marc Peter Deisenroth and Dimitrios Kanoulas},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=GeM6VUwYinO}\n}", "github": "https://github.com/RPL-CS-UCL/UCL-AffCorrs", "project": "", "reviewers": "LmPK;Zz3M;oETv;Mo7Z", "site": "https://openreview.net/forum?id=GeM6VUwYinO", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4850913002387383222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "Go64YOmGwxM", "title": "TRITON: Neural Neural Textures for Better Sim2Real", "track": "main", "status": "Poster", "tldr": "We use differentiable rendering and unsupervised image translation to enhance the photorealism of robotic simulations.", "abstract": "Unpaired image translation algorithms can be used for sim2real tasks, but many fail to generate temporally consistent results. We present a new approach that combines differentiable rendering with image translation to achieve temporal consistency over indefinite timescales, using surface consistency losses and neu- ral neural textures. We call this algorithm TRITON (Texture Recovering Image Translation Network): an unsupervised, end-to-end, stateless sim2real algorithm that leverages the underlying 3D geometry of input scenes by generating realistic- looking learnable neural textures. By settling on a particular texture for the objects in a scene, we ensure consistency between frames statelessly. TRITON is not lim- ited to camera movements \u2014 it can handle the movement and deformation of ob- jects as well, making it useful for downstream tasks such as robotic manipulation. We demonstrate the superiority of our approach both qualitatively and quantita- tively, using robotic experiments and comparisons to ground truth photographs. We show that TRITON generates more useful images than other algorithms do. Please see our project website: tritonpaper.github.io", "keywords": "differentiable rendering;sim2real;image translation", "primary_area": "", "supplementary_material": "/attachment/35740d3635c1dd195d834c3c0cd7bbb8f2617895.zip", "author": "Ryan D Burgert;Jinghuan Shang;Xiang Li;Michael S Ryoo", "authorids": "rburgert@cs.stonybrook.edu;~Jinghuan_Shang1;~Xiang_Li27;~Michael_S_Ryoo1", "gender": ";M;;M", "homepage": ";https://www.cs.stonybrook.edu/~jishang;;http://michaelryoo.com/", "dblp": ";218/7364;;r/MichaelSRyoo", "google_scholar": ";gMvLIDUAAAAJ;;vcw0TJIAAAAJ", "orcid": ";0000-0001-7301-5981;;", "linkedin": ";;;", "or_profile": "rburgert@cs.stonybrook.edu;~Jinghuan_Shang1;~Xiang_Li27;~Michael_S_Ryoo1", "aff": ";Department of Computer Science, State University of New York, Stony Brook;;Google DeepMind", "aff_domain": ";cs.stonybrook.edu;;google.com", "position": ";PhD student;;Research Scientist", "bibtex": "@inproceedings{\nburgert2022triton,\ntitle={{TRITON}: Neural Neural Textures for Better Sim2Real},\nauthor={Ryan D Burgert and Jinghuan Shang and Xiang Li and Michael S Ryoo},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Go64YOmGwxM}\n}", "github": "https://github.com/TritonPaper/TRITON", "project": "", "reviewers": "agLh;uVwa;9MxW;z1oF", "site": "https://openreview.net/forum?id=Go64YOmGwxM", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6761207461183072771&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "State University of New York;Google", "aff_unique_dep": "Department of Computer Science;Google DeepMind", "aff_unique_url": "https://www.stonybrook.edu;https://deepmind.com", "aff_unique_abbr": "SUNY Stony Brook;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "H6rr_CGzV9y", "title": "A Dual Representation Framework for Robot Learning with Human Guidance", "track": "main", "status": "Poster", "tldr": "A robotic agent learns more efficiently from human guidance with two representations, one for learning low-level control policies, the other for representing internal states of humans.", "abstract": "The ability to interactively learn skills from human guidance and adjust behavior according to human preference is crucial to accelerating robot learning. But human guidance is an expensive resource, calling for methods that can learn efficiently. In this work, we argue that learning is more efficient if the agent is equipped with a high-level, symbolic representation. We propose a dual representation framework for robot learning from human guidance. The dual representation used by the robotic agent includes one for learning a sensorimotor control policy, and the other, in the form of a symbolic scene graph, for encoding the task-relevant information that motivates human input. We propose two novel learning algorithms based on this framework for learning from human evaluative feedback and from preference. In five continuous control tasks in simulation and in the real world, we demonstrate that our algorithms lead to significant improvement in task performance and learning speed. Additionally, these algorithms require less human effort and are qualitatively preferred by users.", "keywords": "Human Guidance;Evaluative Feedback;Preference Learning", "primary_area": "", "supplementary_material": "/attachment/5bc1d2931bf61cfbc3f9976d33ad4a4b3640e09b.zip", "author": "Ruohan Zhang;Dhruva Bansal;Yilun Hao;Ayano Hiranaka;Jialu Gao;Chen Wang;Roberto Mart\u00edn-Mart\u00edn;Li Fei-Fei;Jiajun Wu", "authorids": "~Ruohan_Zhang1;~Dhruva_Bansal1;~Yilun_Hao1;~Ayano_Hiranaka1;~Jialu_Gao1;~Chen_Wang16;~Roberto_Mart\u00edn-Mart\u00edn1;~Li_Fei-Fei1;~Jiajun_Wu1", "gender": "M;M;;;F;M;M;F;M", "homepage": "https://ai.stanford.edu/~zharu/;https://www.dhruvabansal.com;https://yih301.github.io;;https://gaojl19.github.io;http://www.chenwangjeremy.net/;https://robertomartinmartin.com/;https://profiles.stanford.edu/fei-fei-li;https://jiajunwu.com", "dblp": ";243/0144;285/4024;;256/3849;;153/7670;79/2528;117/4768", "google_scholar": "-bqvNWoAAAAJ;uUTLG2IAAAAJ;RjQF17YAAAAJ;;PokBJE0AAAAJ;lStkAzsAAAAJ;XOJE8OEAAAAJ;rDfyQnIAAAAJ;2efgcS0AAAAJ", "orcid": ";;;;;;0000-0002-9586-2759;;0000-0002-4176-343X", "linkedin": ";dhruvabansal2k/;yilun-hao-86554a178/;;danikagao/;;;fei-fei-li-4541247/;jiajunwu/", "or_profile": "~Ruohan_Zhang1;~Dhruva_Bansal1;~Yilun_Hao1;~Ayano_Hiranaka1;~Jialu_Gao1;~Chen_Wang16;~Roberto_Mart\u00edn-Mart\u00edn1;~Li_Fei-Fei1;~Jiajun_Wu1", "aff": "Stanford University;Stanford University;Stanford University;;Tsinghua University;Computer Science Department, Stanford University;SalesForce.com;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;;cs.tsinghua.edu.cn;cs.stanford.edu;salesforce.com;stanford.edu;stanford.edu", "position": "Postdoc;MS student;MS student;;Undergrad student;PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2022a,\ntitle={A Dual Representation Framework for Robot Learning with Human Guidance},\nauthor={Ruohan Zhang and Dhruva Bansal and Yilun Hao and Ayano Hiranaka and Jialu Gao and Chen Wang and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n and Li Fei-Fei and Jiajun Wu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=H6rr_CGzV9y}\n}", "github": "", "project": "", "reviewers": "M5fV;JYfb;8h9i;32aJ", "site": "https://openreview.net/forum?id=H6rr_CGzV9y", "pdf_size": 0, "rating": "1;6;6;6", "confidence": "", "rating_avg": 4.75, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5457238602305535682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1;0;2;0;0", "aff_unique_norm": "Stanford University;Tsinghua University;Salesforce", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.tsinghua.edu.cn;https://www.salesforce.com", "aff_unique_abbr": "Stanford;THU;Salesforce", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "HbGgF93Ppoy", "title": "Inferring Smooth Control: Monte Carlo Posterior Policy Iteration with Gaussian Processes", "track": "main", "status": "Oral", "tldr": "Sample-based MPC is a form for Bayesian inference. Use Gaussian processes for smooth actions and choose the temperature to have a healthy effective sample size. ", "abstract": "Monte Carlo methods have become increasingly relevant for control of non-differentiable systems, approximate dynamics models, and learning from data.\nThese methods scale to high-dimensional spaces and are effective at the non-convex optimization often seen in robot learning. We look at sample-based methods from the perspective of inference-based control, specifically posterior policy iteration.\nFrom this perspective, we highlight how Gaussian noise priors produce rough control actions that are unsuitable for physical robot deployment.\nConsidering smoother Gaussian process priors, as used in episodic reinforcement learning and motion planning, we demonstrate how smoother model predictive control can be achieved using online sequential inference.\nThis inference is realized through an efficient factorization of the action distribution, and novel means of optimizing the likelihood temperature for to improve importance sampling accuracy.\nWe evaluate this approach on several high-dimensional robot control tasks, matching the sample efficiency of prior heuristic methods while also ensuring smoothness.\nSimulation results can be seen at monte-carlo-ppi.github.io.", "keywords": "model predictive control;policy search;reinforcement learning;approximate inference", "primary_area": "", "supplementary_material": "/attachment/a3b3e2bb9c57dc3d5caf2a1289628bd2fccb39f9.zip", "author": "Joe Watson;Jan Peters", "authorids": "~Joe_Watson1;~Jan_Peters3", "gender": "M;M", "homepage": "http://joemwatson.github.io/;https://www.jan-peters.net", "dblp": "143/2943;p/JanPeters1", "google_scholar": "https://scholar.google.co.uk/citations?user=xLtXIZAAAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ", "orcid": ";0000-0002-5266-8091", "linkedin": ";janrpeters/", "or_profile": "~Joe_Watson1;~Jan_Peters3", "aff": "TU Darmstadt;TU Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwatson2022inferring,\ntitle={Inferring Smooth Control: Monte Carlo Posterior Policy Iteration with Gaussian Processes},\nauthor={Joe Watson and Jan Peters},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=HbGgF93Ppoy}\n}", "github": "https://github.com/JoeMWatson/monte-carlo-posterior-policy-iteration", "project": "", "reviewers": "vNLR;qNLg;9hau;VNXb", "site": "https://openreview.net/forum?id=HbGgF93Ppoy", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1366828225640359379&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "IKC5TfXLuW0", "title": "Few-Shot Preference Learning for Human-in-the-Loop RL", "track": "main", "status": "Poster", "tldr": "We shift the focus of reward learning from preference to the multi-task setting, and introduce a novel few-shot preference-based RL algorithm that requires 20X fewer queries than previous methods, enabling data collection from real humans.", "abstract": "While reinforcement learning (RL) has become a more popular approach for robotics, designing sufficiently informative reward functions for complex tasks has proven to be extremely difficult due their inability to capture human intent and policy exploitation. Preference based RL algorithms seek to overcome these challenges by directly learning reward functions from human feedback. Unfortunately, prior work either requires an unreasonable number of queries implausible for any human to answer or overly restricts the class of reward functions to guarantee the elicitation of the most informative queries, resulting in models that are insufficiently expressive for realistic robotics tasks. Contrary to most works that focus on query selection to \\emph{minimize} the amount of data required for learning reward functions, we take an opposite approach: \\emph{expanding} the pool of available data by viewing human-in-the-loop RL through the more flexible lens of multi-task learning. Motivated by the success of meta-learning, we pre-train preference models on prior task data and quickly adapt them for new tasks using only a handful of queries. Empirically, we reduce the amount of online feedback needed to train manipulation policies in Meta-World by 20$\\times$, and demonstrate the effectiveness of our method on a real Franka Panda Robot. Moreover, this reduction in query-complexity allows us to train robot policies from actual human users. Videos of our results can be found at \\url{https://sites.google.com/view/few-shot-preference-rl/home}.", "keywords": "preference learning;interactive learning;multi-task learning;human-in-the-loop", "primary_area": "", "supplementary_material": "/attachment/fe39309ccb6b8378b8884c7d7c9e0aac1a0a02f2.zip", "author": "Donald Joseph Hejna III;Dorsa Sadigh", "authorids": "~Donald_Joseph_Hejna_III1;~Dorsa_Sadigh1", "gender": "M;F", "homepage": "https://joeyhejna.com;https://dorsa.fyi/", "dblp": "336/3297;117/3174", "google_scholar": "y_sLoXoAAAAJ;ZaJEZpYAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Donald_Joseph_Hejna_III1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\niii2022fewshot,\ntitle={Few-Shot Preference Learning for Human-in-the-Loop {RL}},\nauthor={Donald Joseph Hejna III and Dorsa Sadigh},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=IKC5TfXLuW0}\n}", "github": "https://github.com/jhejna/few-shot-preference-rl/", "project": "", "reviewers": "m37o;FhLm;pAzu;mqZu", "site": "https://openreview.net/forum?id=IKC5TfXLuW0", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 28, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18299322527024093974&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "JErNvd_lKHr", "title": "Verified Path Following Using Neural Control Lyapunov Functions", "track": "main", "status": "Poster", "tldr": "An expansion of neural Lyapunov control functions successfully tested on a real-world test platform", "abstract": "We present a framework that uses control Lyapunov functions (CLFs) to implement provably stable path-following controllers for autonomous mobile platforms. Our approach is based on learning a guaranteed CLF for path following by using recent approaches --- combining machine learning with automated theorem proving --- to train a neural network feedback law along with a CLF that guarantees stabilization for driving along low-curvature reference paths. We discuss how key properties of the CLF can be exploited to extend the range of the curvatures for which the stability guarantees remain valid. We then demonstrate that our approach yields a controller that obeys theoretical guarantees in simulation, but also performs well in practice. We show our method is both a verified method of control and better than a common MPC implementation in computation time. Additionally, we implement the controller on-board on a $\\frac18$-scale autonomous vehicle testing platform and present results for various robust path following scenarios.", "keywords": "Path Following;Trajectory Tracking;Control Lyapunov Functions;Plan Execution;Verified Autonomy", "primary_area": "", "supplementary_material": "/attachment/82075033d0ee9f8fefb699b696db88f0b4e2e956.zip", "author": "Alec Reed;Guillaume O Berger;Sriram Sankaranarayanan;Chris Heckman", "authorids": "~Alec_Reed1;~Guillaume_O_Berger1;~Sriram_Sankaranarayanan1;~Chris_Heckman1", "gender": "M;M;M;M", "homepage": ";https://guberger.github.io/;http://www.cs.colorado.edu/~srirams;http://www.colorado.edu/cs/christoffer-heckman", "dblp": ";;82/1542.html;170/8568", "google_scholar": ";https://scholar.google.be/citations?user=z_q7fV0AAAAJ;V8RKLEsAAAAJ;-YOtPcIAAAAJ", "orcid": ";0000-0002-0633-8948;0000-0001-7315-4340;", "linkedin": "reeda3/;;;", "or_profile": "~Alec_Reed1;~Guillaume_O_Berger1;~Sriram_Sankaranarayanan1;~Chris_Heckman1", "aff": "University of Colorado at Boulder;University of Colorado at Boulder;University of Colorado at Boulder;Amazon", "aff_domain": "cs.colorado.edu;colorado.edu;colorado.edu;amazon.com", "position": "PhD student;Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\nreed2022verified,\ntitle={Verified Path Following Using Neural Control Lyapunov Functions},\nauthor={Alec Reed and Guillaume O Berger and Sriram Sankaranarayanan and Chris Heckman},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=JErNvd_lKHr}\n}", "github": "", "project": "", "reviewers": "VUEm;mRSR;wkL6;U6aE", "site": "https://openreview.net/forum?id=JErNvd_lKHr", "pdf_size": 0, "rating": "4;4;4;6", "confidence": "", "rating_avg": 4.5, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11029201135570135559&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Colorado;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.colorado.edu;https://www.amazon.com", "aff_unique_abbr": "CU;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Boulder;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "JWROnOf4w-K", "title": "MidasTouch: Monte-Carlo inference over distributions across sliding touch", "track": "main", "status": "Oral", "tldr": "Tracking the pose distribution of a robot finger on an object surface over time, using surface geometry captured by a tactile sensor", "abstract": "We present MidasTouch, a tactile perception system for online global localization of a vision-based touch sensor sliding on an object surface. This framework takes in posed tactile images over time, and outputs an evolving distribution of sensor pose on the object's surface, without the need for visual priors. Our key insight is to estimate local surface geometry with tactile sensing, learn a compact representation for it, and disambiguate these signals over a long time horizon. The backbone of MidasTouch is a Monte-Carlo particle filter, with a measurement model based on a tactile code network learned from tactile simulation. This network, inspired by LIDAR place recognition, compactly summarizes local surface geometries. These generated codes are efficiently compared against a precomputed tactile codebook per-object, to update the pose distribution. We further release the YCB-Slide dataset of real-world and simulated forceful sliding interactions between a vision-based tactile sensor and standard YCB objects. While single-touch localization can be inherently ambiguous, we can quickly localize our sensor by traversing salient surface geometries. Project page: https://suddhu.github.io/midastouch-tactile/", "keywords": "Tactile perception;Localization;3D deep learning", "primary_area": "", "supplementary_material": "/attachment/848ee7fc33b5b3dd1ec01f17646c6e3bc49d7dbb.zip", "author": "Sudharshan Suresh;Zilin Si;Stuart Anderson;Michael Kaess;Mustafa Mukadam", "authorids": "~Sudharshan_Suresh1;zsi@andrew.cmu.edu;~Stuart_Anderson1;~Michael_Kaess1;~Mustafa_Mukadam1", "gender": "M;;M;M;M", "homepage": "http://www.cs.cmu.edu/~sudhars1/;;;https://www.cs.cmu.edu/~kaess/;http://www.mustafamukadam.com", "dblp": ";;;26/6036;", "google_scholar": "xYC738YAAAAJ;;8orqBsYAAAAJ;27eupmsAAAAJ;yYpm9LoAAAAJ", "orcid": "0000-0001-9030-2800;;;0000-0002-7590-3357;", "linkedin": ";;stuartoanderson/;michaelkaess/;mhmukadam/", "or_profile": "~Sudharshan_Suresh1;zsi@andrew.cmu.edu;~Stuart_Anderson1;~Michael_Kaess1;~Mustafa_Mukadam1", "aff": "Carnegie Mellon University;;Meta;Carnegie Mellon University;Meta AI", "aff_domain": "cmu.edu;;meta.com;cmu.edu;meta.com", "position": "PhD student;;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nsuresh2022midastouch,\ntitle={MidasTouch: Monte-Carlo inference over distributions across sliding touch},\nauthor={Sudharshan Suresh and Zilin Si and Stuart Anderson and Michael Kaess and Mustafa Mukadam},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=JWROnOf4w-K}\n}", "github": "https://github.com/facebookresearch/MidasTouch", "project": "", "reviewers": "Cxtg;xg8x;aApi;tqe3", "site": "https://openreview.net/forum?id=JWROnOf4w-K", "pdf_size": 0, "rating": "1;6;10;10", "confidence": "", "rating_avg": 6.75, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12787149302575049798&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "JqqSTgdQ85F", "title": "Visuo-Tactile Transformers for Manipulation", "track": "main", "status": "Poster", "tldr": "VTT uses multimodal feedback together with self and cross-modal attention to build latent heatmap representations that seamlessly integrate vision and touch", "abstract": "Learning representations in the joint domain of vision and touch can improve manipulation dexterity, robustness, and sample-complexity by exploiting mutual information and complementary cues. Here, we present Visuo-Tactile Transformers (VTTs), a novel multimodal representation learning approach suited for model-based reinforcement learning and planning. Our approach extends the Visual Transformer to handle visuo-tactile feedback. Specifically, VTT uses tactile feedback together with self and cross-modal attention to build latent heatmap representations that focus attention on important task features in the visual domain. We demonstrate the efficacy of VTT for representation learning with a comparative evaluation against baselines on four simulated robot tasks and one real world block pushing task. We conduct an ablation study over the components of VTT to highlight the importance of cross-modality in representation learning for robotic manipulation. ", "keywords": "Multimodal Learning;Reinforcement Learning;Manipulation", "primary_area": "", "supplementary_material": "/attachment/515c150d11684fb42e1a4d21abb1dc7070da4029.zip", "author": "Yizhou Chen;Mark Van der Merwe;Andrea Sipos;Nima Fazeli", "authorids": "~Yizhou_Chen4;~Mark_Van_der_Merwe1;asipos@umich.edu;~Nima_Fazeli1", "gender": "M;M;;", "homepage": ";https://mvandermerwe.github.io/;;https://www.mmintlab.com", "dblp": ";249/5378;;", "google_scholar": ";cKmwbi0AAAAJ;;", "orcid": ";;;", "linkedin": "yizhou-chen-325819184/;;;", "or_profile": "~Yizhou_Chen4;~Mark_Van_der_Merwe1;asipos@umich.edu;~Nima_Fazeli1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;;University of Michigan", "aff_domain": "umich.edu;umich.edu;;umich.edu", "position": "MS student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nchen2022visuotactile,\ntitle={Visuo-Tactile Transformers for Manipulation},\nauthor={Yizhou Chen and Mark Van der Merwe and Andrea Sipos and Nima Fazeli},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=JqqSTgdQ85F}\n}", "github": "https://github.com/yich7045/Visuo-Tactile-Transformers-for-Manipulation", "project": "", "reviewers": "Y7uf;nBFB;2Auu;KWDi", "site": "https://openreview.net/forum?id=JqqSTgdQ85F", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7208396236291537310&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "JtK7F6D8t-3", "title": "Learning Semantics-Aware Locomotion Skills from Human Demonstration", "track": "main", "status": "Poster", "tldr": "", "abstract": "The semantics of the environment, such as the terrain type and property, reveals important information for legged robots to adjust their behaviors. In this work, we present a framework that learns semantics-aware locomotion skills from perception for quadrupedal robots, such that the robot can traverse through complex offroad terrains with appropriate speeds and gaits using perception information. Due to the lack of high-fidelity outdoor simulation, our framework needs to be trained directly in the real world, which brings unique challenges in data efficiency and safety. To ensure sample efficiency, we pre-train the perception model with an off-road driving dataset. To avoid the risks of real-world policy exploration, we leverage human demonstration to train a speed policy that selects a desired forward speed from camera image. For maximum traversability, we pair the speed policy with a gait selector, which selects a robust locomotion gait for each forward speed. Using only 40 minutes of human demonstration data, our framework learns to adjust the speed and gait of the robot based on perceived terrain semantics, and enables the robot to walk over 6km without failure at close-to-optimal speed", "keywords": "Legged Locomotion;Semantic Perception;Imitation Learning;Hierarchical Control", "primary_area": "", "supplementary_material": "/attachment/b52ffd6cedece9a69615503ff1a508f564915c95.zip", "author": "Yuxiang Yang;Xiangyun Meng;Wenhao Yu;Tingnan Zhang;Jie Tan;Byron Boots", "authorids": "~Yuxiang_Yang2;~Xiangyun_Meng1;~Wenhao_Yu1;~Tingnan_Zhang1;~Jie_Tan1;~Byron_Boots1", "gender": "M;;M;M;M;", "homepage": "https://yxyang.github.io;https://homes.cs.washington.edu/~xiangyun;https://wenhaoyu.weebly.com/;;http://www.jie-tan.net;", "dblp": ";169/3352;;https://dblp.uni-trier.de/pers/hd/z/Zhang:Tingnan;81/7419;", "google_scholar": "2NQKmzIAAAAJ;;1bF2s2kAAAAJ;RM2vMNcAAAAJ;neGbgzYAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;jie-tan/;", "or_profile": "~Yuxiang_Yang2;~Xiangyun_Meng1;~Wenhao_Yu1;~Tingnan_Zhang1;~Jie_Tan1;~Byron_Boots1", "aff": "Google;University of Washington;Google;Google;Google;", "aff_domain": "google.com;washington.edu;google.com;google.com;google.com;", "position": "Researcher;PhD student;Software Engineer;Software Engineer;Research Scientist;", "bibtex": "@inproceedings{\nyang2022learning,\ntitle={Learning Semantics-Aware Locomotion Skills from Human Demonstration},\nauthor={Yuxiang Yang and Xiangyun Meng and Wenhao Yu and Tingnan Zhang and Jie Tan and Byron Boots},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=JtK7F6D8t-3}\n}", "github": "", "project": "", "reviewers": "Rr19;oYTQ;ofNu;FzJu", "site": "https://openreview.net/forum?id=JtK7F6D8t-3", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10125560197409305200&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Google;University of Washington", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.washington.edu", "aff_unique_abbr": "Google;UW", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "K0gW0A6gi7G", "title": "Learning Visualization Policies of Augmented Reality for Human-Robot Collaboration", "track": "main", "status": "Poster", "tldr": "We present our framework, called VARIL, that for the first time introduces a learning-based Augmented Reality (AR) visualization strategy for human-multi-robot collaboration.", "abstract": "In human-robot collaboration domains, augmented reality (AR) technologies have enabled people to visualize the state of robots. Current AR-based visualization policies are designed manually, which requires a lot of human efforts and domain knowledge. When too little information is visualized, human users find the AR interface not useful; when too much information is visualized, they find it difficult to process the visualized information. In this paper, we develop an intelligent AR agent that learns visualization policies (what to visualize, when, and how) from demonstrations. We created a Unity-based platform for simulating warehouse environments where human-robot teammates work on collaborative delivery tasks. We have collected a dataset that includes demonstrations of visualizing robots' current and planned behaviors. Our results from experiments with real human participants show that, compared with competitive baselines from the literature, our learned visualization strategies significantly increase the efficiency of human-robot teams in delivery tasks, while reducing the distraction level of human users.", "keywords": "Augmented Reality;Multi-robot systems;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/5b1707404aad0ba4a78054399d9660d89bf5b7c5.zip", "author": "Kishan Dhananjay Chandan;Jack Albertson;Shiqi Zhang", "authorids": "~Kishan_Dhananjay_Chandan1;jalbert5@binghamton.edu;~Shiqi_Zhang1", "gender": "M;;M", "homepage": "http://perceptobot.com;;http://www.cs.binghamton.edu/~szhang/", "dblp": ";;03/9964-1", "google_scholar": "dZWHPtwAAAAJ;;D0pzuNoAAAAJ", "orcid": ";;0000-0003-4110-8213", "linkedin": "kishan-chandan/;;", "or_profile": "~Kishan_Dhananjay_Chandan1;jalbert5@binghamton.edu;~Shiqi_Zhang1", "aff": "State University of New York at Binghamton;;State University of New York at Binghamton", "aff_domain": "binghamton.edu;;binghamton.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nchandan2022learning,\ntitle={Learning Visualization Policies of Augmented Reality for Human-Robot Collaboration},\nauthor={Kishan Dhananjay Chandan and Jack Albertson and Shiqi Zhang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=K0gW0A6gi7G}\n}", "github": "", "project": "", "reviewers": "XmHJ;TGsr;um9M", "site": "https://openreview.net/forum?id=K0gW0A6gi7G", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 4, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8670692558267151400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York at Binghamton", "aff_unique_dep": "", "aff_unique_url": "https://www.binghamton.edu", "aff_unique_abbr": "SUNY Binghamton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Binghamton", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "K8W6ObPZQyh", "title": "On-Robot Learning With Equivariant Models", "track": "main", "status": "Poster", "tldr": "This paper demonstrates sample-efficient on-robot learning in manipulation using equivariant models.", "abstract": "Recently, equivariant neural network models have been shown to improve sample efficiency for tasks in computer vision and reinforcement learning. This paper explores this idea in the context of on-robot policy learning in which a policy must be learned entirely on a physical robotic system without reference to a model, a simulator, or an offline dataset. We focus on applications of Equivariant SAC to robotic manipulation and explore a number of variations of the algorithm. Ultimately, we demonstrate the ability to learn several non-trivial manipulation tasks completely through on-robot experiences in less than an hour or two of wall clock time. ", "keywords": "Manipulation;Reinforcement Learning;Equivariance", "primary_area": "", "supplementary_material": "/attachment/848ad8ca16697752c44793d77df0d8d083f3bcbf.zip", "author": "Dian Wang;Mingxi Jia;Xupeng Zhu;Robin Walters;Robert Platt", "authorids": "~Dian_Wang1;~Mingxi_Jia1;~Xupeng_Zhu1;~Robin_Walters1;~Robert_Platt1", "gender": "M;M;M;M;", "homepage": "https://pointw.github.io/;https://saulbatman.github.io/;https://zxp-s-works.github.io/;http://www.robinwalters.com;http://www.ccs.neu.edu/home/rplatt/", "dblp": "191/1369-1;315/4688;257/4426;258/3416;39/5434", "google_scholar": "CckjtfQAAAAJ;1iNSPQIAAAAJ;mwxz-8MAAAAJ;fnprJmUAAAAJ;Z4Y5S2oAAAAJ", "orcid": ";;;;", "linkedin": "dianwang1007;https://www.linkedin.com/mwlite/in/mingxi-jia-6997b9183;xupengzhu-skunk;;", "or_profile": "~Dian_Wang1;~Mingxi_Jia1;~Xupeng_Zhu1;~Robin_Walters1;~Robert_Platt1", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University ;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;northeastern.edu;northeastern.edu;neu.edu", "position": "PhD student;MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2022onrobot,\ntitle={On-Robot Learning With Equivariant Models},\nauthor={Dian Wang and Mingxi Jia and Xupeng Zhu and Robin Walters and Robert Platt},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=K8W6ObPZQyh}\n}", "github": "https://github.com/pointW/equi_rl", "project": "", "reviewers": "vBAF;MYEX;QgU1", "site": "https://openreview.net/forum?id=K8W6ObPZQyh", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=434025900837522558&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "KDuBFXyRuE-", "title": "Iterative Interactive Modeling for Knotting Plastic Bags", "track": "main", "status": "Poster", "tldr": "Knotting plastic bags randomly dropped from the air with a dual-arm robotic system and iterative interactive modeling.", "abstract": "Deformable object manipulation has great research significance for the robotic community and numerous applications in daily life. In this work, we study how to knot plastic bags that are randomly dropped from the air with a dual-arm robot based on image input. The complex initial configuration and terrible material properties of plastic bags pose challenges to reliable perception and planning. Directly knotting it from random initial states is difficult. To tackle this problem, we propose Iterative Interactive Modeling (IIM) to first adjust the plastic bag to a standing pose with imitation learning to establish a high-confidence keypoint skeleton model, then perform a set of learned motion primitives to knot it. We leverage spatial action maps to accomplish the iterative pick-and-place action and a graph convolutional network to evaluate the adjusted pose during the IIM process. In experiments, we achieve an 85.0% success rate in knotting 4 different plastic bags, including one with no demonstration.", "keywords": "Plastic Bag Manipulation;Learning from Demonstrations", "primary_area": "", "supplementary_material": "/attachment/26ffff98104b898ea61c0f1875b914d56247f7ff.zip", "author": "Chongkai Gao;Zekun Li;Haichuan Gao;Feng Chen", "authorids": "~Chongkai_Gao1;~Zekun_Li5;~Haichuan_Gao1;~Feng_Chen1", "gender": "M;M;M;M", "homepage": "https://chongkaigao.com/;;;", "dblp": "295/8658;;285/2988.html;21/3047-7", "google_scholar": "l_mOqY8AAAAJ;;;", "orcid": ";0000-0003-0743-1148;;0000-0003-4813-2494", "linkedin": ";;;", "or_profile": "~Chongkai_Gao1;~Zekun_Li5;~Haichuan_Gao1;~Feng_Chen1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\ngao2022iterative,\ntitle={Iterative Interactive Modeling for Knotting Plastic Bags},\nauthor={Chongkai Gao and Zekun Li and Haichuan Gao and Feng Chen},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=KDuBFXyRuE-}\n}", "github": "", "project": "", "reviewers": "eMeZ;NmgN;6XFW;5bQ2", "site": "https://openreview.net/forum?id=KDuBFXyRuE-", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3395168673360603180&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "KWCZfuqshd", "title": "Real-World Robot Learning with Masked Visual Pre-training", "track": "main", "status": "Oral", "tldr": "", "abstract": "In this work, we explore self-supervised visual pre-training on images from diverse, in-the-wild videos for real-world robotic tasks. Like prior work, our visual representations are pre-trained via a masked autoencoder (MAE), frozen, and then passed into a learnable control module. Unlike prior work, we show that the pre-trained representations are effective across a range of real-world robotic tasks and embodiments. We find that our encoder consistently outperforms CLIP (up to 75\\%), supervised ImageNet pre-training (up to 81\\%), and training from scratch (up to 81\\%). Finally, we train a 307M parameter vision transformer on a massive collection of 4.5M images from the Internet and egocentric videos, and demonstrate clearly the benefits of scaling visual pre-training for robot learning.", "keywords": "Self-Supervised Learning;Visual Representations;Robot Learning", "primary_area": "", "supplementary_material": "", "author": "Ilija Radosavovic;Tete Xiao;Stephen James;Pieter Abbeel;Jitendra Malik;Trevor Darrell", "authorids": "~Ilija_Radosavovic1;~Tete_Xiao1;~Stephen_James1;~Pieter_Abbeel2;~Jitendra_Malik2;~Trevor_Darrell2", "gender": "M;M;M;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~ilija;http://tetexiao.com;https://stepjam.github.io/;https://people.eecs.berkeley.edu/~pabbeel/;https://people.eecs.berkeley.edu/~malik/;https://people.eecs.berkeley.edu/~trevor/", "dblp": "211/6740;200/8130;163/5669;;58/2944;d/TrevorDarrell", "google_scholar": "UKpinl8AAAAJ;U4RqBdAAAAAJ;OXtG-isAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;oY9R5YQAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;0000-0003-3695-1580;", "linkedin": ";;;;;", "or_profile": "~Ilija_Radosavovic1;~Tete_Xiao1;~Stephen_James1;~Pieter_Abbeel2;~Jitendra_Malik2;~trevor_darrell1", "aff": "University of California, Berkeley;Facebook AI Research;University of California, Berkeley;Covariant;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;facebook.com;berkeley.edu;covariant.ai;berkeley.edu;eecs.berkeley.edu", "position": "PhD student;Researcher;Postdoc;Founder;Full Professor;Professor", "bibtex": "@inproceedings{\nradosavovic2022realworld,\ntitle={Real-World Robot Learning with Masked Visual Pre-training},\nauthor={Ilija Radosavovic and Tete Xiao and Stephen James and Pieter Abbeel and Jitendra Malik and Trevor Darrell},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=KWCZfuqshd}\n}", "github": "https://github.com/ir413/mvp", "project": "", "reviewers": "ZgtC;tTKM;yV1f;oQo2", "site": "https://openreview.net/forum?id=KWCZfuqshd", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 272, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13251290333056579360&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;0;3", "aff_unique_norm": "University of California, Berkeley;Meta;Covariant;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";Facebook AI Research;;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;https://research.facebook.com;;", "aff_unique_abbr": "UC Berkeley;FAIR;;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "KXkzplx6H2K", "title": "Socially-Attentive Policy Optimization in Multi-Agent Self-Driving System", "track": "main", "status": "Poster", "tldr": "We focus on how to improve the safety and efficiency of a multi-agent self-driving system, by navigating vehicles to learn socially-compatible behaviors.", "abstract": "As increasing numbers of autonomous vehicles (AVs) are being deployed, it is important to construct a multi-agent self-driving (MASD) system for navigating traffic flows of AVs. In an MASD system, AVs not only navigate themselves to pursue their own goals, but also interact with each other to prevent congestion or collision, especially in scenarios like intersection or lane merging. Multi-agent reinforcement learning (MARL) provides an appealing alternative to generate safe and efficient actions for multiple AVs. However, current MARL methods are limited to describe scenarios where agents interact in either a cooperative of competitive fashion within one episode. Ordinarily, the agents' objectives are defined with a global or team reward function, which fail to deal with the dynamic social preferences (SPs) and mixed motives like human drivers in traffic interactions. To this end, we propose a novel MARL method called Socially-Attentive Policy Optimization (SAPO), which incorporates: (a) a self-attention module to select the most interactive traffic participant for each AV, and (b) a social-aware integration mechanism to integrate objectives of interacting AVs by estimating the dynamic social preferences from their observations. SAPO solves the problem of how to improve the safety and efficiency of MASD systems, by enabling AVs to learn socially-compatible behaviors. Simulation experiments show that SAPO can successfully capture and utilize the variation of the SPs of AVs to achieve superior performance, compared with baselines in MASD scenarios.", "keywords": "self-driving;social preference;multi-agent;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/9ff9440b3aca18699b28217a26a2bf557105e17c.zip", "author": "Zipeng Dai;Tianze Zhou;Kun Shao;David Henry Mguni;Bin Wang;Jianye HAO", "authorids": "~Zipeng_Dai1;~Tianze_Zhou1;~Kun_Shao1;~David_Henry_Mguni1;~Bin_Wang12;~Jianye_HAO1", "gender": ";;;M;M;M", "homepage": "https://github.com/superboySB;;;;http://binwang.top;http://www.icdai.org/jianye.html", "dblp": "266/6184.html;;;217/2369;13/1898-34;21/7664.html", "google_scholar": "e2c7Kt0AAAAJ;;;K-_yzBsAAAAJ;KWZG_YsAAAAJ;", "orcid": ";;;;0000-0002-0267-3749;0000-0002-0422-8235", "linkedin": ";;;;;", "or_profile": "~Zipeng_Dai1;~Tianze_Zhou1;~Kun_Shao1;~David_Henry_Mguni1;~Bin_Wang12;~Jianye_HAO1", "aff": "Beijing Institute of Technology;;;Queen Mary University, London;Huawei Noah's Ark Lab;Tianjin University", "aff_domain": "bit.edu.cn;;;qmul.ac.uk;huawei.com;tju.edu.cn", "position": "PhD student;;;Lecturer;Senior Researcher;Associate Professor", "bibtex": "@inproceedings{\ndai2022sociallyattentive,\ntitle={Socially-Attentive Policy Optimization in Multi-Agent Self-Driving System},\nauthor={Zipeng Dai and Tianze Zhou and Kun Shao and David Henry Mguni and Bin Wang and Jianye HAO},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=KXkzplx6H2K}\n}", "github": "", "project": "", "reviewers": "6Syb;JLzw;zrSF;U4bM", "site": "https://openreview.net/forum?id=KXkzplx6H2K", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 27, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10363471755334087704&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Beijing Institute of Technology;Queen Mary University of London;Huawei;Tianjin University", "aff_unique_dep": ";;Noah's Ark Lab;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.qmul.ac.uk;https://www.huawei.com;http://www.tju.edu.cn", "aff_unique_abbr": "BIT;QMUL;Huawei;TJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "KwdUWypQ5gC", "title": "Bayesian Reinforcement Learning for Single-Episode Missions in Partially Unknown Environments", "track": "main", "status": "Poster", "tldr": "We propose a unified Bayes-optimal framework for single-mission robot planning in unknown environments, and evaluate on two realistic simulated environments.", "abstract": "We consider planning for mobile robots conducting missions in real-world domains where a priori unknown dynamics affect the robot\u2019s costs and transitions. We study single-episode missions where it is crucial that the robot appropriately trades off exploration and exploitation, such that the learning of the environment dynamics is just enough to effectively complete the mission. Thus, we propose modelling unknown dynamics using Gaussian processes, which provide a principled Bayesian framework for incorporating online observations made by the robot, and using them to predict the dynamics in unexplored areas. We then formulate the problem of mission planning in Markov decision processes under Gaussian process predictions as Bayesian model-based reinforcement learning. This allows us to employ solution techniques that plan more efficiently than previous Gaussian process planning methods are able to. We empirically evaluate the benefits of our formulation in an underwater autonomous vehicle navigation task and robot mission planning in a realistic simulation of a nuclear environment.", "keywords": "Planning under Uncertainty;Gaussian Processes;Single-Episode Bayesian Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/b6dc1772b2b96bd3910955ed86f29d673da8598d.zip", "author": "Matthew Budd;Paul Duckworth;Nick Hawes;Bruno Lacerda", "authorids": "~Matthew_Budd1;~Paul_Duckworth1;~Nick_Hawes1;~Bruno_Lacerda1", "gender": ";M;M;M", "homepage": "https://matthewbudd.com;http://www.robots.ox.ac.uk/~scpd/;https://www.robots.ox.ac.uk/~nickh/;https://bfalacerda.github.io/", "dblp": "285/3113;179/2160;35/1190;87/10333", "google_scholar": "GEWpapIAAAAJ;I64MZDoAAAAJ;bRsi4zoAAAAJ;https://scholar.google.co.uk/citations?user=k9XjG_MAAAAJ", "orcid": "0000-0003-0520-403X;0000-0001-9052-6919;0000-0002-7556-6098;0000-0003-0862-331X", "linkedin": "https://linkedin.com/in/matthew-budd;;;", "or_profile": "~Matthew_Budd1;~Paul_Duckworth1;~Nick_Hawes1;~Bruno_Lacerda1", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;robots.ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;Postdoc;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\nbudd2022bayesian,\ntitle={Bayesian Reinforcement Learning for Single-Episode Missions in Partially Unknown Environments},\nauthor={Matthew Budd and Paul Duckworth and Nick Hawes and Bruno Lacerda},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=KwdUWypQ5gC}\n}", "github": "", "project": "", "reviewers": "95C2;8W33;T74h;n5QS", "site": "https://openreview.net/forum?id=KwdUWypQ5gC", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6497245073365175377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "L8hCfhPbFho", "title": "VIOLA: Object-Centric Imitation Learning for Vision-Based Robot Manipulation", "track": "main", "status": "Poster", "tldr": "We introduce an object-centric imitation learning approach for robot manipulation that acquire robust, closed-loop visuomotor poicy.", "abstract": "We introduce VIOLA, an object-centric imitation learning approach to learning closed-loop visuomotor policies for robot manipulation. Our approach constructs object-centric representations based on general object proposals from a pre-trained vision model. VIOLA uses a transformer-based policy to reason over these representations and attend to the task-relevant visual factors for action prediction. Such object-based structural priors improve deep imitation learning algorithm's robustness against object variations and environmental perturbations. We quantitatively evaluate VIOLA in simulation and on real robots. VIOLA outperforms the state-of-the-art imitation learning methods by $45.8%$ in success rate. It has also been deployed successfully on a physical robot to solve challenging long-horizon tasks, such as dining table arrangement and coffee making. More videos and model details can be found in supplementary material and the project website: https://ut-austin-rpl.github.io/VIOLA/.", "keywords": "Imitation Learning;Robot Manipulation;Object-Centric Representation", "primary_area": "", "supplementary_material": "/attachment/8449cb6564533715a8044aa32d39e7568b4ad6f5.zip", "author": "Yifeng Zhu;Abhishek Joshi;Peter Stone;Yuke Zhu", "authorids": "~Yifeng_Zhu2;~Abhishek_Joshi1;~Peter_Stone1;~Yuke_Zhu1", "gender": "M;M;M;M", "homepage": "https://cs.utexas.edu/~yifengz;https://abhihjoshi.github.io/;http://www.cs.utexas.edu/~pstone;https://cs.utexas.edu/~yukez/", "dblp": ";;s/PeterStone;133/1772", "google_scholar": ";M70ahaEAAAAJ;qnwjcfAAAAAJ;mWGyYMsAAAAJ", "orcid": ";;0000-0002-6795-420X;", "linkedin": ";abhishek-joshi-4ab469180/;;", "or_profile": "~Yifeng_Zhu2;~Abhishek_Joshi1;~Peter_Stone1;~Yuke_Zhu1", "aff": "The University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;Computer Science Department, University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;cs.utexas.edu", "position": "PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2022viola,\ntitle={{VIOLA}: Object-Centric Imitation Learning for Vision-Based Robot Manipulation},\nauthor={Yifeng Zhu and Abhishek Joshi and Peter Stone and Yuke Zhu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=L8hCfhPbFho}\n}", "github": "https://github.com/UT-Austin-RPL/VIOLA", "project": "", "reviewers": "77ch;Cdbg;9cZF;n5Tr", "site": "https://openreview.net/forum?id=L8hCfhPbFho", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4834220578977409194&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "LunGpKUNIR", "title": "SE(3)-Equivariant Relational Rearrangement with Neural Descriptor Fields", "track": "main", "status": "Poster", "tldr": "We present a method to execute relational rearrangement tasks on pairs of unseen objects using a few demonstrations.", "abstract": "We present a framework for specifying tasks involving spatial relations between objects using only 5-10 demonstrations and then executing such tasks given point cloud observations of a novel pair of objects in arbitrary initial poses. Our approach structures these rearrangement tasks by assigning a consistent local coordinate frame to the task-relevant object parts, localizing the corresponding coordinate frame on unseen object instances, and executing an action that brings these frames into alignment. We propose an optimization method that uses multiple Neural Descriptor Fields (NDFs) and a single annotated 3D keypoint to assign a set of consistent coordinate frames to the task-relevant object parts. We also propose an energy-based learning scheme to model the joint configuration of the objects that satisfies a desired relational task. We validate our pipeline on three multi-object rearrangement tasks in simulation and on a real robot. Results show that our method can infer relative transformations that satisfy the desired relation between novel objects in unseen initial poses using just a few demonstrations.", "keywords": "Neural Fields;Relations;Manipulation;Object Rearrangement", "primary_area": "", "supplementary_material": "/attachment/d995e047476283fe4ac1fd9c037408d16f136f7d.zip", "author": "Anthony Simeonov;Yilun Du;Yen-Chen Lin;Alberto Rodriguez Garcia;Leslie Pack Kaelbling;Tom\u00e1s Lozano-P\u00e9rez;Pulkit Agrawal", "authorids": "~Anthony_Simeonov1;~Yilun_Du1;~Yen-Chen_Lin1;~Alberto_Rodriguez_Garcia1;~Leslie_Pack_Kaelbling1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Pulkit_Agrawal1", "gender": ";;M;M;F;M;M", "homepage": "https://anthonysimeonov.github.io/;https://yilundu.github.io;http://yenchenlin.me/;http://mcube.mit.edu/;http://people.csail.mit.edu/lpk/;http://people.csail.mit.edu/tlp/;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": ";204/4379;180/0954;;k/LesliePackKaelbling;90/752;149/2672", "google_scholar": ";;RbCKRPcAAAAJ;AC93g9kAAAAJ;IcasIiwAAAAJ;gQOKAggAAAAJ;UpZmJI0AAAAJ", "orcid": ";;;;0000-0001-6054-7145;;", "linkedin": ";;;;;;", "or_profile": "~Anthony_Simeonov1;~Yilun_Du1;~Yen-Chen_Lin1;~Alberto_Rodriguez_Garcia1;~Leslie_Pack_Kaelbling1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Pulkit_Agrawal1", "aff": "NVIDIA;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "nvidia.com;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "Intern;PhD student;PhD student;Associate Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsimeonov2022seequivariant,\ntitle={{SE}(3)-Equivariant Relational Rearrangement with Neural Descriptor Fields},\nauthor={Anthony Simeonov and Yilun Du and Yen-Chen Lin and Alberto Rodriguez Garcia and Leslie Pack Kaelbling and Tom{\\'a}s Lozano-P{\\'e}rez and Pulkit Agrawal},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=LunGpKUNIR}\n}", "github": "https://github.com/anthonysimeonov/relational_ndf", "project": "", "reviewers": "krtE;haL1;1zr8;gkTn", "site": "https://openreview.net/forum?id=LunGpKUNIR", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14250060034434220698&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "NVIDIA;Massachusetts Institute of Technology", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://web.mit.edu", "aff_unique_abbr": "NVIDIA;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "MUcBYHjzqp7", "title": "Transferring Hierarchical Structures with Dual Meta Imitation Learning", "track": "main", "status": "Poster", "tldr": "A dual meta imitation learning framework for fast adaptation of hierarchical imitation learning methods in new tasks.", "abstract": "Hierarchical Imitation Learning (HIL) is an effective way for robots to learn sub-skills from long-horizon unsegmented demonstrations. However, the learned hierarchical structure lacks the mechanism to transfer across multi-tasks or to new tasks, which makes them have to learn from scratch when facing a new situation. Transferring and reorganizing modular sub-skills require fast adaptation ability of the whole hierarchical structure. In this work, we propose Dual Meta Imitation Learning (DMIL), a hierarchical meta imitation learning method where the high-level network and sub-skills are iteratively meta-learned with model-agnostic meta-learning. DMIL uses the likelihood of state-action pairs from each sub-skill as the supervision for the high-level network adaptation and uses the adapted high-level network to determine different data set for each sub-skill adaptation. We theoretically prove the convergence of the iterative training process of DMIL and establish the connection between DMIL and Expectation-Maximization algorithm. Empirically, we achieve state-of-the-art few-shot imitation learning performance on the Meta-world benchmark and competitive results on long-horizon tasks in Kitchen environments.", "keywords": "Hierarchical Imitation Learning;Meta Learning", "primary_area": "", "supplementary_material": "/attachment/73b64395d215ba5df85b439a32cd992d287bc564.zip", "author": "Chongkai Gao;Yizhou Jiang;Feng Chen", "authorids": "~Chongkai_Gao1;~Yizhou_Jiang1;~Feng_Chen1", "gender": "M;M;M", "homepage": "https://chongkaigao.com/;;", "dblp": "295/8658;201/8247;21/3047-7", "google_scholar": "l_mOqY8AAAAJ;oM8ue_UAAAAJ;", "orcid": ";;0000-0003-4813-2494", "linkedin": ";;", "or_profile": "~Chongkai_Gao1;~Yizhou_Jiang1;~Feng_Chen1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\ngao2022transferring,\ntitle={Transferring Hierarchical Structures with Dual Meta Imitation Learning},\nauthor={Chongkai Gao and Yizhou Jiang and Feng Chen},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=MUcBYHjzqp7}\n}", "github": "", "project": "", "reviewers": "aty5;dgTJ;ZhJq;iytY", "site": "https://openreview.net/forum?id=MUcBYHjzqp7", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11619969074939056136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "MoSC0pziRd", "title": "Fleet-DAgger: Interactive Robot Fleet Learning with Scalable Human Supervision", "track": "main", "status": "Oral", "tldr": "We present a formalism, algorithms, and benchmarks for interactive fleet learning: interactive learning with multiple robots and multiple humans.", "abstract": "Commercial and industrial deployments of robot fleets at Amazon, Nimble, Plus One, Waymo, and Zoox query remote human teleoperators when robots are at risk or unable to make task progress. With continual learning, interventions from the remote pool of humans can also be used to improve the robot fleet control policy over time. A central question is how to effectively allocate limited human attention. Prior work addresses this in the single-robot, single-human setting; we formalize the Interactive Fleet Learning (IFL) setting, in which multiple robots interactively query and learn from multiple human supervisors. We propose Return on Human Effort (ROHE) as a new metric and Fleet-DAgger, a family of IFL algorithms. We present an open-source IFL benchmark suite of GPU-accelerated Isaac Gym environments for standardized evaluation and development of IFL algorithms. We compare a novel Fleet-DAgger algorithm to 4 baselines with 100 robots in simulation. We also perform a physical block-pushing experiment with 4 ABB YuMi robot arms and 2 remote humans. Experiments suggest that the allocation of humans to robots significantly affects the performance of the fleet, and that the novel Fleet-DAgger algorithm can achieve up to 8.8x higher ROHE than baselines. See https://tinyurl.com/fleet-dagger for supplemental material.", "keywords": "Fleet Learning;Interactive Learning;Human Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/91411d5cc9630d22edee5a328071aded1920baa5.zip", "author": "Ryan Hoque;Lawrence Yunliang Chen;Satvik Sharma;Karthik Dharmarajan;Brijen Thananjeyan;Pieter Abbeel;Ken Goldberg", "authorids": "~Ryan_Hoque1;yunliang.chen@berkeley.edu;satvik.sharma@berkeley.edu;~Karthik_Dharmarajan1;~Brijen_Thananjeyan1;~Pieter_Abbeel2;~Ken_Goldberg1", "gender": "M;;;;M;M;M", "homepage": "https://ryanhoque.github.io;;;;http://bthananjeyan.github.io/;https://people.eecs.berkeley.edu/~pabbeel/;http://goldberg.berkeley.edu/", "dblp": "250/9457;;;;203/5466;;g/KennethYGoldberg", "google_scholar": "ywv6tDUAAAAJ;;;;fftO_HsAAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ", "orcid": ";;;;;;0000-0001-6747-9499", "linkedin": "https://linkedin.com/in/ryanhoque;;;karthik-dharmarajan/;;;goldbergken/", "or_profile": "~Ryan_Hoque1;yunliang.chen@berkeley.edu;satvik.sharma@berkeley.edu;~Karthik_Dharmarajan1;~Brijen_Thananjeyan1;~Pieter_Abbeel2;~Ken_Goldberg1", "aff": "University of California, Berkeley;;;Electrical Engineering & Computer Science Department, University of California, Berkeley;University of California, Berkeley;Covariant;University of California, Berkeley", "aff_domain": "berkeley.edu;;;eecs.berkeley.edu;berkeley.edu;covariant.ai;berkeley.edu", "position": "PhD student;;;Undergrad student;PhD student;Founder;Full Professor", "bibtex": "@inproceedings{\nhoque2022fleetdagger,\ntitle={Fleet-{DA}gger: Interactive Robot Fleet Learning with Scalable Human Supervision},\nauthor={Ryan Hoque and Lawrence Yunliang Chen and Satvik Sharma and Karthik Dharmarajan and Brijen Thananjeyan and Pieter Abbeel and Ken Goldberg},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=MoSC0pziRd}\n}", "github": "https://github.com/BerkeleyAutomation/ifl_benchmark", "project": "", "reviewers": "3zng;45kd;tye5;oMQS", "site": "https://openreview.net/forum?id=MoSC0pziRd", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13691273048470653245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Covariant", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "Mp3Y5jd7rnW", "title": "CADSim: Robust and Scalable in-the-wild 3D Reconstruction for Controllable Sensor Simulation", "track": "main", "status": "Poster", "tldr": "We propose a new method to reconstruct objects from sensory observations that are of high fidelity, part-aware, geometry-aligned and compatible to graphics engine thus enable realistic and controllable simulation efficiently", "abstract": "Realistic simulation is key to enabling safe and scalable development of self-driving vehicles. A core component is simulating the sensors so that the entire autonomy system can be tested in simulation. Sensor simulation involves modeling traffic participants, such as vehicles, with high-quality appearance and articulated geometry, and rendering them in real-time. The self-driving industry has employed artists to build these assets. However, this is expensive, slow, and may not reflect reality. Instead, reconstructing assets automatically from sensor data collected in the wild would provide a better path to generating a diverse and large set that provides good real-world coverage. However, current reconstruction approaches struggle on in-the-wild sensor data, due to its sparsity and noise. To tackle these issues, we present CADSim which combines part-aware object-class priors via a small set of CAD models with differentiable rendering to automatically reconstruct vehicle geometry, including articulated wheels, with high-quality appearance. Our experiments show our approach recovers more accurate shape from sparse data compared to existing approaches. Importantly, it also trains and renders efficiently. We demonstrate our reconstructed vehicles in a wide range of applications, including accurate testing of autonomy perception systems.", "keywords": "3D Reconstruction;CAD models;Sensor Simulation;Self-Driving", "primary_area": "", "supplementary_material": "/attachment/1599f47abd99ec1eb83e3123948be69be27d11ff.zip", "author": "Jingkang Wang;Sivabalan Manivasagam;Yun Chen;Ze Yang;Ioan Andrei B\u00e2rsan;Anqi Joyce Yang;Wei-Chiu Ma;Raquel Urtasun", "authorids": "~Jingkang_Wang1;~Sivabalan_Manivasagam1;~Yun_Chen3;~Ze_Yang5;~Ioan_Andrei_B\u00e2rsan1;~Anqi_Joyce_Yang1;~Wei-Chiu_Ma1;~Raquel_Urtasun1", "gender": "M;;;;;F;M;F", "homepage": "http://www.cs.toronto.edu/~wangjk/;;;;;https://www.cs.toronto.edu/~ajyang/;https://www.cs.cornell.edu/~weichiu/;http://www.cs.toronto.edu/~urtasun/", "dblp": "223/9910;;;;;283/5790;151/4277;u/RaquelUrtasun", "google_scholar": "c0BTYC4AAAAJ;;;;;DxnwQqgAAAAJ;SVIdh6AAAAAJ;https://scholar.google.ca/citations?user=jyxO2akAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;ajyang99/;;", "or_profile": "~Jingkang_Wang1;~Sivabalan_Manivasagam1;~Yun_Chen3;~Ze_Yang5;~Ioan_Andrei_B\u00e2rsan1;~Anqi_Joyce_Yang1;~Wei-Chiu_Ma1;~Raquel_Urtasun1", "aff": "University of Toronto;;;;;Waabi Innovation Inc;Massachusetts Institute of Technology;Department of Computer Science, University of Toronto", "aff_domain": "toronto.edu;;;;;waabi.ai;mit.edu;cs.toronto.edu", "position": "PhD student;;;;;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2022cadsim,\ntitle={{CADS}im: Robust and Scalable in-the-wild 3D Reconstruction for Controllable Sensor Simulation},\nauthor={Jingkang Wang and Sivabalan Manivasagam and Yun Chen and Ze Yang and Ioan Andrei B{\\^a}rsan and Anqi Joyce Yang and Wei-Chiu Ma and Raquel Urtasun},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Mp3Y5jd7rnW}\n}", "github": "", "project": "", "reviewers": "NPXi;wSHy;iXu8;gqRW", "site": "https://openreview.net/forum?id=Mp3Y5jd7rnW", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13911398903704803222&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Toronto;Waabi Innovation Inc;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://www.waabi.ai;https://web.mit.edu", "aff_unique_abbr": "U of T;;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "Mzqn__AxyA6", "title": "Inferring Versatile Behavior from Demonstrations by Matching Geometric Descriptors", "track": "main", "status": "Poster", "tldr": "We combine skill learning with a geometric feature representation to generalize to novel task configurations from versatile human demonstrations.", "abstract": "Humans intuitively solve tasks in versatile ways, varying their behavior in terms of trajectory-based planning and for individual steps. Thus, they can easily generalize and adapt to new and changing environments. Current Imitation Learning algorithms often only consider unimodal expert demonstrations and act in a state-action-based setting, making it difficult for them to imitate human behavior in case of versatile demonstrations. Instead, we combine a mixture of movement primitives with a distribution matching objective to learn versatile behaviors that match the expert\u2019s behavior and versatility. To facilitate generalization to novel task configurations, we do not directly match the agent\u2019s and expert\u2019s trajectory distributions but rather work with concise geometric descriptors which generalize well to unseen task configurations. We empirically validate our method on various robot tasks using versatile human demonstrations and compare to imitation learning algorithms in a state-action setting as well as a trajectory-based setting. We find that the geometric descriptors greatly help in generalizing to new task configurations and that combining them with our distribution-matching objective is crucial for representing and reproducing versatile behavior.", "keywords": "Imitation Learning;Versatile Skill Learning;Distribution Matching", "primary_area": "", "supplementary_material": "/attachment/275030298dd1b21049ff1d972e54e54063a403a5.zip", "author": "Niklas Freymuth;Nicolas Schreiber;Aleksandar Taranovic;Philipp Becker;Gerhard Neumann", "authorids": "~Niklas_Freymuth1;nicolas.schreiber@kit.edu;~Aleksandar_Taranovic1;~Philipp_Becker1;~Gerhard_Neumann2", "gender": "M;;M;M;", "homepage": ";;;;", "dblp": "255/7209;;;66/1316;", "google_scholar": "FK1DbrcAAAAJ;;2IovJsIAAAAJ;https://scholar.google.de/citations?user=jXx-LuQAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Niklas_Freymuth1;nicolas.schreiber@kit.edu;~Aleksandar_Taranovic1;~Philipp_Becker1;~Gerhard_Neumann2", "aff": "Karlsruhe Institute of Technology;;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology;", "aff_domain": "kit.edu;;kit.edu;kit.edu;", "position": "PhD student;;PhD student;PhD student;", "bibtex": "@inproceedings{\nfreymuth2022inferring,\ntitle={Inferring Versatile Behavior from Demonstrations by Matching Geometric Descriptors},\nauthor={Niklas Freymuth and Nicolas Schreiber and Aleksandar Taranovic and Philipp Becker and Gerhard Neumann},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Mzqn__AxyA6}\n}", "github": "www.github.com/NiklasFreymuth/VIGOR.git", "project": "", "reviewers": "9xkZ;TvrN;24LK", "site": "https://openreview.net/forum?id=Mzqn__AxyA6", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=283263858990729433&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Karlsruhe Institute of Technology;Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.kit.edu", "aff_unique_abbr": "KIT;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "N-HtsQkRotI", "title": "Hypernetworks in Meta-Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "Hypernetworks can drastically improve performance in meta-RL, but initialization methods such as Bias-HyperInit are required.", "abstract": "Training a reinforcement learning (RL) agent on a real-world robotics task remains generally impractical due to sample inefficiency. Multi-task RL and meta-RL aim to improve sample efficiency by generalizing over a distribution of related tasks. However, doing so is difficult in practice: In multi-task RL, state of the art methods often fail to outperform a degenerate solution that simply learns each task separately. Hypernetworks are a promising path forward since they replicate the separate policies of the degenerate solution while also allowing for generalization across tasks, and are applicable to meta-RL. However, evidence from supervised learning suggests hypernetwork performance is highly sensitive to the initialization. In this paper, we 1) show that hypernetwork initialization is also a critical factor in meta-RL, and that naive initializations yield poor performance; 2) propose a novel hypernetwork initialization scheme that matches or exceeds the performance of a state-of-the-art approach proposed for supervised settings, as well as being simpler and more general; and 3) use this method to show that hypernetworks can improve performance in meta-RL by evaluating on multiple simulated robotics benchmarks.", "keywords": "Hypernetwork;Deep Learning;Reinforcement Learning;Reinforcement;RL;Meta-Learning;Meta;Meta-RL;Meta-World;Robotics", "primary_area": "", "supplementary_material": "/attachment/05296f1f780bc8ac851d989a0165ec610cee6db3.zip", "author": "Jacob Beck;Matthew Thomas Jackson;Risto Vuorio;Shimon Whiteson", "authorids": "~Jacob_Beck1;~Matthew_Thomas_Jackson1;~Risto_Vuorio1;~Shimon_Whiteson1", "gender": "M;;M;", "homepage": "https://matthewtjackson.com;;http://jakebeck.com;https://vuoristo.github.io/", "dblp": "331/5748;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;;222/2614", "google_scholar": "SdGawnwAAAAJ;;https://scholar.google.ca/citations?user=PrS_dHMAAAAJ;qCk3GFAAAAAJ", "orcid": ";;;", "linkedin": "matthew-t-jackson/;;;", "or_profile": "~Matthew_Thomas_Jackson1;~Shimon_Whiteson1;~Jacob_Austin_Beck1;~Risto_Ilkka_Antero_Vuorio1", "aff": "University of Oxford;University of Oxford;Department of Computer Science, University of Oxford;QualComm", "aff_domain": "oxford.ac.uk;ox.ac.uk;cs.ox.ac.uk;qualcomm.com", "position": "PhD student;Professor;PhD student;Intern", "bibtex": "@inproceedings{\nbeck2022hypernetworks,\ntitle={Hypernetworks in Meta-Reinforcement Learning},\nauthor={Jacob Beck and Matthew Thomas Jackson and Risto Vuorio and Shimon Whiteson},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=N-HtsQkRotI}\n}", "github": "", "project": "", "reviewers": "R1kq;jmaN;7NSX", "site": "https://openreview.net/forum?id=N-HtsQkRotI", "pdf_size": 0, "rating": "4;4;6", "confidence": "", "rating_avg": 4.666666666666667, "confidence_avg": 0, "replies_avg": 8, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14257967488436101476&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Oxford;Qualcomm Incorporated", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.qualcomm.com", "aff_unique_abbr": "Oxford;Qualcomm", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "N78I92JIqOJ", "title": "Generative Category-Level Shape and Pose Estimation with Semantic Primitives", "track": "main", "status": "Poster", "tldr": "We propose a novel framework for category-level object shape and pose estimation and achieve state-of-the-art results on real-scene dataset.", "abstract": "Empowering autonomous agents with 3D understanding for daily objects is a grand challenge in robotics applications. When exploring in an unknown environment, existing methods for object pose estimation are still not satisfactory due to the diversity of object shapes. In this paper, we propose a novel framework for category-level object shape and pose estimation from a single RGB-D image. To handle the intra-category variation, we adopt a semantic primitive representation that encodes diverse shapes into a unified latent space, which is the key to establish reliable correspondences between observed point clouds and estimated shapes. Then, by using a SIM(3)-invariant shape descriptor, we gracefully decouple the shape and pose of an object, thus supporting latent shape optimization of target objects in arbitrary poses. Extensive experiments show that the proposed method achieves SOTA pose estimation performance and better generalization in the real-world dataset. Code and video are available at \\url{https://zju3dv.github.io/gCasp}.", "keywords": "Category-level Pose Estimation;Shape Estimation", "primary_area": "", "supplementary_material": "/attachment/d098b28598cd0a98ebadafbabab8519258fe8a90.zip", "author": "Guanglin Li;Yifeng Li;Zhichao Ye;Qihang Zhang;Tao Kong;Zhaopeng Cui;Guofeng Zhang", "authorids": "~Guanglin_Li1;~Yifeng_Li3;~Zhichao_Ye1;~Qihang_Zhang1;~Tao_Kong3;~Zhaopeng_Cui1;~Guofeng_Zhang3", "gender": "M;;M;M;M;M;M", "homepage": ";https://ailab.bytedance.com/;https://github.com/oneLOH;https://zqh0253.github.io;http://www.taokong.org;https://zhpcui.github.io/;http://www.cad.zju.edu.cn/home/gfzhang", "dblp": ";;;282/1036;01/2492;28/7484;78/5389-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;sWjUHkEAAAAJ;kSUXLPkAAAAJ;https://scholar.google.ca/citations?user=vwIRwDUAAAAJ;F0xfpXAAAAAJ", "orcid": ";;;;;0000-0002-7130-439X;0000-0001-5661-8430", "linkedin": "%E5%B9%BF%E6%9E%97-%E6%9D%8E-199889218/;;;zhang-qihang-39aa09160/;;;", "or_profile": "~Guanglin_Li1;~Yifeng_Li3;~Zhichao_Ye1;~Qihang_Zhang1;~Tao_Kong3;~Zhaopeng_Cui1;~Guofeng_Zhang3", "aff": "Zhejiang University;;Zhejiang University;The Chinese University of Hong Kong;Bytedance;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;cuhk.edu.hk;bytedance.com;zju.edu.cn;zju.edu.cn", "position": "MS student;;PhD student;Postgraduate student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2022generative,\ntitle={Generative Category-Level Shape and Pose Estimation with Semantic Primitives},\nauthor={Guanglin Li and Yifeng Li and Zhichao Ye and Qihang Zhang and Tao Kong and Zhaopeng Cui and Guofeng Zhang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=N78I92JIqOJ}\n}", "github": "https://github.com/zju3dv/gCasp", "project": "", "reviewers": "imVj;jxdo;4hYs", "site": "https://openreview.net/forum?id=N78I92JIqOJ", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7617462889271899942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Zhejiang University;Chinese University of Hong Kong;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.cuhk.edu.hk;https://www.bytedance.com", "aff_unique_abbr": "ZJU;CUHK;Bytedance", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NEGjAH7p0fm", "title": "In-Hand Gravitational Pivoting Using Tactile Sensing", "track": "main", "status": "Poster", "tldr": "We propose a learning-based system for in-hand manipulation using gravitational pivoting", "abstract": "We study gravitational pivoting, a constrained version of in-hand manipulation, where we aim to control the rotation of an object around the grip point of a parallel gripper. To achieve this, instead of controlling the gripper to avoid slip, we \\emph{embrace slip} to allow the object to rotate in-hand. We collect two real-world datasets, a static tracking dataset and a controller-in-the-loop dataset, both annotated with object angle and angular velocity labels. Both datasets contain force-based tactile information on ten different household objects. We train an LSTM model to predict the angular position and velocity of the held object from purely tactile data. We integrate this model with a controller that opens and closes the gripper allowing the object to rotate to desired relative angles. We conduct real-world experiments where the robot is tasked to achieve a relative target angle. We show that our approach outperforms a sliding-window based MLP in a zero-shot generalization setting with unseen objects. Furthermore, we show a 16.6\\% improvement in performance when the LSTM model is fine-tuned on a small set of data collected with both the LSTM model and the controller in-the-loop. Code and videos are available at https://rhys-newbury.github.io/projects/pivoting/.", "keywords": "Tactile Pose Sensing;In Hand Manipulation", "primary_area": "", "supplementary_material": "/attachment/f1087619b5997d36ebeb90962d6d2418e16b9374.zip", "author": "Jason Toskov;Rhys Newbury;Mustafa Mukadam;Dana Kulic;Akansel Cosgun", "authorids": "jtos0003@student.monash.edu;~Rhys_Newbury1;~Mustafa_Mukadam1;~Dana_Kulic1;~Akansel_Cosgun1", "gender": ";M;M;F;", "homepage": ";https://rhys-newbury.github.io/;http://www.mustafamukadam.com;https://www.monash.edu/engineering/danakulic;", "dblp": ";;;;", "google_scholar": ";gBZiDfkAAAAJ;yYpm9LoAAAAJ;https://scholar.google.com.au/citations?user=sL0KJlQAAAAJ;", "orcid": ";;;;", "linkedin": ";;mhmukadam/;;", "or_profile": "jtos0003@student.monash.edu;~Rhys_Newbury1;~Mustafa_Mukadam1;~Dana_Kulic1;~Akansel_Cosgun1", "aff": ";Monash University;Meta AI;Monash University;", "aff_domain": ";monash.edu;meta.com;monash.edu;", "position": ";PhD student;Researcher;Full Professor;", "bibtex": "@inproceedings{\ntoskov2022inhand,\ntitle={In-Hand Gravitational Pivoting Using Tactile Sensing},\nauthor={Jason Toskov and Rhys Newbury and Mustafa Mukadam and Dana Kulic and Akansel Cosgun},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=NEGjAH7p0fm}\n}", "github": "https://github.com/Jason-Toskov/In-Hand-Gravitational-Pivoting-Using-Tactile-Sensing", "project": "", "reviewers": "Yu3a;ynxK;Wnm1;R21W", "site": "https://openreview.net/forum?id=NEGjAH7p0fm", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12927898245614882414&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "Monash University;Meta", "aff_unique_dep": ";Meta AI", "aff_unique_url": "https://www.monash.edu;https://meta.com", "aff_unique_abbr": "Monash;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;United States" }, { "id": "NanNxv92Uih", "title": "Reinforcement learning with Demonstrations from Mismatched Task under Sparse Reward", "track": "main", "status": "Poster", "tldr": "We propose CRSfD method to aid online reinforcement learning with demonstrations from mismatched task under sparse reward environment. ", "abstract": "Reinforcement learning often suffer from the sparse reward issue in real-world robotics problems. Learning from demonstration (LfD) is an effective way to eliminate this problem, which leverages collected expert data to aid online learning. Prior works often assume that the learning agent and the expert aim to accomplish the same task, which requires collecting new data for every new task. In this paper, we consider the case where the target task is mismatched from but similar with that of the expert. Such setting can be challenging and we found existing LfD methods may encounter a phenomenon called reward signal backward propagation blockages so that the agent cannot be effectively guided by the demonstrations from mismatched task. We propose conservative reward shaping from demonstration (CRSfD), which shapes the sparse rewards using estimated expert value function. To accelerate learning processes, CRSfD guides the agent to conservatively explore around demonstrations. Experimental results of robot manipulation tasks show that our approach outperforms baseline LfD methods when transferring demonstrations collected in a single task to other different but similar tasks.", "keywords": "Sparse Reward Reinforcement Learning;Learn from Demonstration;Task Mismatch", "primary_area": "", "supplementary_material": "/attachment/678be1785e922c425251cd3ad98f20cd1f7d580b.zip", "author": "Yanjiang Guo;Jingyue Gao;Zheng Wu;Chengming Shi;Jianyu Chen", "authorids": "~Yanjiang_Guo1;~Jingyue_Gao2;~Zheng_Wu2;~Chengming_Shi1;~Jianyu_Chen1", "gender": "M;M;M;M;M", "homepage": "https://robert-gyj.github.io/;;https://zhengwu.us/;;http://people.iiis.tsinghua.edu.cn/~jychen/", "dblp": ";;;;", "google_scholar": "rBeZZPMAAAAJ;;Lx_cK2YAAAAJ;;", "orcid": ";0009-0005-6825-2049;;;", "linkedin": ";;;http://www.linkedin.com/in/chengming-ruby-shi;", "or_profile": "~Yanjiang_Guo1;~Jingyue_Gao2;~Zheng_Wu2;~Chengming_Shi1;~Jianyu_Chen1", "aff": "Tsinghua University;Tsinghua University;University of California, Berkeley;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;berkeley.edu;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nguo2022reinforcement,\ntitle={Reinforcement learning with Demonstrations from Mismatched Task under Sparse Reward},\nauthor={Yanjiang Guo and Jingyue Gao and Zheng Wu and Chengming Shi and Jianyu Chen},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=NanNxv92Uih}\n}", "github": "", "project": "", "reviewers": "CiFM;Mmxm;hEn1;rKEH", "site": "https://openreview.net/forum?id=NanNxv92Uih", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9727699006087553915&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tsinghua University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "THU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "No3mbanRlZJ", "title": "Touching a NeRF: Leveraging Neural Radiance Fields for Tactile Sensory Data Generation", "track": "main", "status": "Poster", "tldr": "We propose a novel framework for the generation of tactile data using a combination of NeRF and conditional GAN models. Results demonstrate potential to augment existing tactile datasets for downstream task and capability to transfer to a new sensor.", "abstract": "Tactile perception is key for robotics applications such as manipulation. However, tactile data collection is time-consuming, especially when compared to vision. This limits the use of the tactile modality in machine learning solutions in robotics. In this paper, we propose a generative model to simulate realistic tactile sensory data for use in downstream tasks. Starting with easily-obtained camera images, we train Neural Radiance Fields (NeRF) for objects of interest. We then use NeRF-rendered RGB-D images as inputs to a conditional Generative Adversarial Network model (cGAN) to generate tactile images from desired orientations. We evaluate the generated data quantitatively using the Structural Similarity Index and Mean Squared Error metrics, and also using a tactile classification task both in simulation and in the real world. Results show that by augmenting a manually collected dataset, the generated data is able to increase classification accuracy by around 10\\%. In addition, we demonstrate that our model is able to transfer from one tactile sensor to another with a small fine-tuning dataset.", "keywords": "Camera-based tactile sensing;cross-modal tactile data generation", "primary_area": "", "supplementary_material": "/attachment/d71da1310e6d82be37a6260a530244507b1a3054.zip", "author": "Shaohong Zhong;Alessandro Albini;Oiwi Parker Jones;Perla Maiolino;Ingmar Posner", "authorids": "~Shaohong_Zhong1;~Alessandro_Albini1;~Oiwi_Parker_Jones1;~Perla_Maiolino1;~Ingmar_Posner1", "gender": ";M;M;F;", "homepage": "https://www.linkedin.com/in/shaohong-z-982435a2/;;;;", "dblp": ";;;;59/542", "google_scholar": ";kuv7a8EAAAAJ;https://scholar.google.com/citations?hl=en;PbtP5MoAAAAJ;dPk-iwsAAAAJ", "orcid": ";;;;0000-0001-6270-700X", "linkedin": ";;;;ingmar-posner-20b49a", "or_profile": "~Shaohong_Zhong1;~Alessandro_Albini1;~Oiwi_Parker_Jones1;~Perla_Maiolino1;~Ingmar_Posner1", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk;oxford.ac.uk;ox.ac.uk", "position": "PhD student;Postdoc;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhong2022touching,\ntitle={Touching a Ne{RF}: Leveraging Neural Radiance Fields for Tactile Sensory Data Generation},\nauthor={Shaohong Zhong and Alessandro Albini and Oiwi Parker Jones and Perla Maiolino and Ingmar Posner},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=No3mbanRlZJ}\n}", "github": "", "project": "", "reviewers": "gjCP;Vqs5;Fy6z;ozxa", "site": "https://openreview.net/forum?id=No3mbanRlZJ", "pdf_size": 0, "rating": "1;6;6;10", "confidence": "", "rating_avg": 5.75, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9632425482885448242&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "OIaJRUo5UXy", "title": "Learning Neuro-Symbolic Skills for Bilevel Planning", "track": "main", "status": "Poster", "tldr": "We learn neuro-symbolic skills from demonstrations and plan with them using search-then-sample TAMP techniques.", "abstract": "Decision-making is challenging in robotics environments with continuous object-centric states, continuous actions, long horizons, and sparse feedback. Hierarchical approaches, such as task and motion planning (TAMP), address these challenges by decomposing decision-making into two or more levels of abstraction. In a setting where demonstrations and symbolic predicates are given, prior work has shown how to learn symbolic operators and neural samplers for TAMP with manually designed parameterized policies. Our main contribution is a method for learning parameterized polices in combination with operators and samplers. These components are packaged into modular neuro-symbolic skills and sequenced together with search-then-sample TAMP to solve new tasks. In experiments in four robotics domains, we show that our approach --- bilevel planning with neuro-symbolic skills --- can solve a wide range of tasks with varying initial states, goals, and objects, outperforming six baselines and ablations.", "keywords": "Skill Learning;Neuro-Symbolic;Task and Motion Planning", "primary_area": "", "supplementary_material": "/attachment/a7e6d6bf2e236e87f750a56261ba9a816ce7d8d8.zip", "author": "Tom Silver;Ashay Athalye;Joshua B. Tenenbaum;Tom\u00e1s Lozano-P\u00e9rez;Leslie Pack Kaelbling", "authorids": "~Tom_Silver1;~Ashay_Athalye1;~Joshua_B._Tenenbaum1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Leslie_Pack_Kaelbling1", "gender": "M;M;;M;F", "homepage": "https://web.mit.edu/tslvr/www/;http://www.ashay.io;;http://people.csail.mit.edu/tlp/;http://people.csail.mit.edu/lpk/", "dblp": "202/1778;;t/JoshuaBTenenbaum;90/752;k/LesliePackKaelbling", "google_scholar": "CMcsygMAAAAJ;;;gQOKAggAAAAJ;IcasIiwAAAAJ", "orcid": ";;;;0000-0001-6054-7145", "linkedin": ";;;;", "or_profile": "~Tom_Silver1;~Ashay_Athalye1;~Joshua_B._Tenenbaum1;~Tom\u00e1s_Lozano-P\u00e9rez1;~Leslie_Pack_Kaelbling1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;MS student;Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsilver2022learning,\ntitle={Learning Neuro-Symbolic Skills for Bilevel Planning},\nauthor={Tom Silver and Ashay Athalye and Joshua B. Tenenbaum and Tom{\\'a}s Lozano-P{\\'e}rez and Leslie Pack Kaelbling},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=OIaJRUo5UXy}\n}", "github": "https://tinyurl.com/skill-learning", "project": "", "reviewers": "ZRBu;MQ6V;Y3Ww;P5b1", "site": "https://openreview.net/forum?id=OIaJRUo5UXy", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 25, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4387233661916679099&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OzP68WT3UA", "title": "NeuralGrasps: Learning Implicit Representations for Grasps of Multiple Robotic Hands", "track": "main", "status": "Poster", "tldr": "Learning implicit representations for multiple robot hands in a common encoding space for flexible grasp transfer.", "abstract": "We introduce a neural implicit representation for grasps of objects from multiple robotic hands. Different grasps across multiple robotic hands are encoded into a shared latent space. Each latent vector is learned to decode to the 3D shape of an object and the 3D shape of a robotic hand in a grasping pose in terms of the signed distance functions of the two 3D shapes. In addition, the distance metric in the latent space is learned to preserve the similarity between grasps across different robotic hands, where the similarity of grasps is defined according to contact regions of the robotic hands. This property enables our method to transfer grasps between different grippers including a human hand, and grasp transfer has the potential to share grasping skills between robots and enable robots to learn grasping skills from humans. Furthermore, the encoded signed distance functions of objects and grasps in our implicit representation can be used for 6D object pose estimation with grasping contact optimization from partial point clouds, which enables robotic grasping in the real world.", "keywords": "Robot Grasping;Neural Implicit Representations;Grasp Transfer;Grasping Contact Modeling;6D Object Pose Estimation", "primary_area": "", "supplementary_material": "/attachment/28f10a64ec7baad0cd4424891238dbdb1ee47f24.zip", "author": "Ninad Khargonkar;Neil Song;Zesheng Xu;B Prabhakaran;Yu Xiang", "authorids": "~Ninad_Khargonkar1;~Neil_Song1;~Zesheng_Xu1;~B_Prabhakaran1;~Yu_Xiang3", "gender": ";M;;M;M", "homepage": "https://kninad.github.io;https://github.com/neilsong;;http://www.utdallas.edu/~praba/;https://yuxng.github.io/", "dblp": "255/0143;;;p/BPrabhakaran;00/6716-1", "google_scholar": "5eFmqkAAAAAJ;;jMfgMuIAAAAJ;https://scholar.google.com.tw/citations?user=BENEK78AAAAJ;", "orcid": "0000-0001-9191-0250;;;;0000-0001-9431-5131", "linkedin": ";neil-song/;zesheng-xu/;;", "or_profile": "~Ninad_Khargonkar1;~Neil_Song1;~Zesheng_Xu1;~B_Prabhakaran1;~Yu_Xiang1", "aff": "University of Texas at Dallas;University of Texas at Dallas;University of Texas at Dallas;University of Texas - Dallas;University of Texas, Dallas", "aff_domain": "utdallas.edu;utdallas.edu;utdallas.edu;;utdallas.edu", "position": "PhD student;Researcher;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkhargonkar2022neuralgrasps,\ntitle={NeuralGrasps: Learning Implicit Representations for Grasps of Multiple Robotic Hands},\nauthor={Ninad Khargonkar and Neil Song and Zesheng Xu and B Prabhakaran and Yu Xiang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=OzP68WT3UA}\n}", "github": "https://irvlutd.github.io/NeuralGrasps", "project": "", "reviewers": "bHvy;hW79;z2Qy", "site": "https://openreview.net/forum?id=OzP68WT3UA", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14845793865999518159&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Texas at Dallas", "aff_unique_dep": "", "aff_unique_url": "https://www.utdallas.edu", "aff_unique_abbr": "UT Dallas", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Dallas", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PAFEQQtDf8s", "title": "CoBEVT: Cooperative Bird\u2019s Eye View Semantic Segmentation with Sparse Transformers", "track": "main", "status": "Poster", "tldr": "We propose CoBEVT, the first generic multi-agent multi-camera perception framework that can cooperatively generate BEV map predictions via sparse Transformers.", "abstract": "Bird\u2019s eye view (BEV) semantic segmentation plays a crucial role in spatial sensing for autonomous driving. Although recent literature has made significant progress on BEV map understanding, they are all based on single-agent camera-based systems. These solutions sometimes have difficulty handling occlusions or detecting distant objects in complex traffic scenes. Vehicle-to-Vehicle (V2V) communication technologies have enabled autonomous vehicles to share sensing information, dramatically improving the perception performance and range compared to single-agent systems. In this paper, we propose CoBEVT, the first generic multi-agent multi-camera perception framework that can cooperatively generate BEV map predictions. To efficiently fuse camera features from multi-view and multi-agent data in an underlying Transformer architecture, we design a fused axial attention module (FAX), which captures sparsely local and global spatial interactions across views and agents. The extensive experiments on the V2V perception dataset, OPV2V, demonstrate that CoBEVT achieves state-of-the-art performance for cooperative BEV semantic segmentation. Moreover, CoBEVT is shown to be generalizable to other tasks, including 1) BEV segmentation with single-agent multi-camera and 2) 3D object detection with multi-agent LiDAR systems, achieving state-of-the-art performance with real-time inference speed. The code is available at https://github.com/DerrickXuNu/CoBEVT.", "keywords": "Autonomous driving;BEV map understanding;Vehicle-to-Vehicle (V2V) application", "primary_area": "", "supplementary_material": "/attachment/9c600200abeb6bf6867b78cb30241cf78110dc84.zip", "author": "Runsheng Xu;Zhengzhong Tu;Hao Xiang;Wei Shao;Bolei Zhou;Jiaqi Ma", "authorids": "~Runsheng_Xu3;~Zhengzhong_Tu1;~Hao_Xiang1;~Wei_Shao4;~Bolei_Zhou5;jiaqima@ucla.edu", "gender": "M;M;M;M;M;", "homepage": "https://derrickxunu.github.io/;https://vztu.github.io;https://xhwind.github.io/;https://swsamleo.github.io/wei_shao.github.io//;https://boleizhou.github.io/;", "dblp": "214/1446;218/1473.html;;24/803-6;46/8066;", "google_scholar": "QW6Ro8IAAAAJ;9ajdZaEAAAAJ;04j4RzkAAAAJ;https://scholar.google.com.au/citations?user=sdthjnoAAAAJ;9D4aG8AAAAAJ;", "orcid": ";0000-0002-7594-2292;;0000-0002-9873-8331;;", "linkedin": ";zhengzhongtu/;;;;", "or_profile": "~Runsheng_Xu3;~Zhengzhong_Tu1;~Hao_Xiang1;~Wei_Shao4;~Bolei_Zhou5;jiaqima@ucla.edu", "aff": "University of California, Los Angeles;University of Texas at Austin;University of California, Los Angeles;Arizona State University;University of California, Los Angeles;", "aff_domain": "ucla.edu;utexas.edu;ucla.edu;asu.edu;ucla.edu;", "position": "PhD student;PhD student;PhD student;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\nxu2022cobevt,\ntitle={Co{BEVT}: Cooperative Bird{\\textquoteright}s Eye View Semantic Segmentation with Sparse Transformers},\nauthor={Runsheng Xu and Zhengzhong Tu and Hao Xiang and Wei Shao and Bolei Zhou and Jiaqi Ma},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=PAFEQQtDf8s}\n}", "github": "https://github.com/DerrickXuNu/CoBEVT", "project": "", "reviewers": "KVMN;dUwG;gxHq;Kibb", "site": "https://openreview.net/forum?id=PAFEQQtDf8s", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 273, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2000389979125404276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of California, Los Angeles;University of Texas at Austin;Arizona State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.utexas.edu;https://www.asu.edu", "aff_unique_abbr": "UCLA;UT Austin;ASU", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Los Angeles;Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PS_eCS_WCvD", "title": "Perceiver-Actor: A Multi-Task Transformer for Robotic Manipulation", "track": "main", "status": "Poster", "tldr": "A transformer trained to 'detect actions' can imitate a wide variety of tabletop manipulation tasks. ", "abstract": "Transformers have revolutionized vision and natural language processing with their ability to scale with large datasets. But in robotic manipulation, data is both limited and expensive. Can manipulation still benefit from Transformers with the right problem formulation? We investigate this question with PerAct, a language-conditioned behavior-cloning agent for multi-task 6-DoF manipulation. PerAct encodes language goals and RGB-D voxel observations with a Perceiver Transformer, and outputs discretized actions by \u201cdetecting the next best voxel action\u201d. Unlike frameworks that operate on 2D images, the voxelized 3D observation and action space provides a strong structural prior for efficiently learning 6-DoF actions. With this formulation, we train a single multi-task Transformer for 18 RLBench tasks (with 249 variations) and 7 real-world tasks (with 18 variations) from just a few demonstrations per task. Our results show that PerAct significantly outperforms unstructured image-to-action agents and 3D ConvNet baselines for a wide range of tabletop tasks.", "keywords": "Transformers;Language Grounding;Manipulation;Behavior Cloning", "primary_area": "", "supplementary_material": "/attachment/3373f8df58d5252d12b680d4c08fdcc85404f4c4.zip", "author": "Mohit Shridhar;Lucas Manuelli;Dieter Fox", "authorids": "~Mohit_Shridhar1;~Lucas_Manuelli1;~Dieter_Fox1", "gender": "M;M;M", "homepage": "http://mohitshridhar.com/;http://lucasmanuelli.com;https://homes.cs.washington.edu/~fox/", "dblp": "203/8577.html;;f/DieterFox", "google_scholar": "CrfsfFSiS0kC;0pxg5ssAAAAJ;DqXsbPAAAAAJ", "orcid": "0000-0001-7382-763X;;", "linkedin": ";;", "or_profile": "~Mohit_Shridhar1;~Lucas_Manuelli1;~Dieter_Fox1", "aff": "NVIDIA;NVIDIA;Department of Computer Science", "aff_domain": "nvidia.com;nvidia.com;cs.washington.edu", "position": "NVIDIA;Researcher;Full Professor", "bibtex": "@inproceedings{\nshridhar2022perceiveractor,\ntitle={Perceiver-Actor: A Multi-Task Transformer for Robotic Manipulation},\nauthor={Mohit Shridhar and Lucas Manuelli and Dieter Fox},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=PS_eCS_WCvD}\n}", "github": "https://github.com/peract/peract", "project": "", "reviewers": "YHWS;hXJH;vWwj;xg2V", "site": "https://openreview.net/forum?id=PS_eCS_WCvD", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 23, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 530, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1936270468943672305&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "NVIDIA;Unknown Institution", "aff_unique_dep": "NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;", "aff_unique_abbr": "NVIDIA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "id": "PZiKO7mjC43", "title": "Towards Capturing the Temporal Dynamics for Trajectory Prediction: a Coarse-to-Fine Approach", "track": "main", "status": "Poster", "tldr": "We explore ways of modeling the temporal corelation of predicted time-steps in the trajectory prediction task.", "abstract": " Trajectory prediction is one of the basic tasks in the autonomous driving field, which aims to predict the future position of other agents around the ego vehicle so that a safe yet efficient driving plan could be generated in the downstream module. Recently, deep learning based methods dominate the field. State-of-the-art (SOTA) methods usually follow an encoder-decoder paradigm. Specifically, the encoder is responsible for extracting information from agents' history states and HD-Map and providing a representation vector for each agent. Taking these vectors as input, the decoder predicts multi-step future positions for each agent, which is usually accomplished by a single multi-layer perceptron (MLP) to directly output a Tx2 tensor. Though models with adoptation of MLP decoder have dominated the leaderboard of multiple datasets, `the elephant in the room is that the temporal correlation among future time-steps is ignored since there is no direct relation among output neurons of a MLP. In this work, we examine this design choice and investigate several ways to apply the temporal inductive bias into the generation of future trajectories on top of a SOTA encoder. We find that simply using autoregressive RNN to generate future positions would lead to significant performance drop even with techniques such as history highway and teacher forcing. Instead, taking scratch trajectories generated by MLP as input, an additional refinement module based on structures with temporal prior such as RNN or 1D-CNN could remarkably boost the accuracy. Furthermore, we examine several objective functions to emphasize the temporal priors. By the combination of aforementioned techniques to introduce the temporal prior, we improve the top-ranked method's performance by a large margin and achieve SOTA result on the Waymo Open Motion Challenge.", "keywords": "autonomous prediction;trajectory prediction", "primary_area": "", "supplementary_material": "/attachment/465affd1a3c6df2fb6a445192e0c12a9c24b4a08.zip", "author": "Xiaosong Jia;Li Chen;Penghao Wu;Jia Zeng;Junchi Yan;Hongyang Li;Yu Qiao", "authorids": "~Xiaosong_Jia1;~Li_Chen15;~Penghao_Wu1;~Jia_Zeng2;~Junchi_Yan2;~Hongyang_Li1;~Yu_Qiao1", "gender": "M;M;;M;;M;", "homepage": "https://jiaxiaosong1002.github.io/;https://ilnehc.github.io/;;;;https://datascience.hku.hk/people/hongyang-li/;", "dblp": "274/6360;181/2847;320/7785;;;95/8433-1;", "google_scholar": "JeFQwxUAAAAJ;ulZxvY0AAAAJ;9mssd5EAAAAJ;;;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ;", "orcid": ";;;0000-0003-0682-4898;;0000-0001-9110-5534;", "linkedin": ";;;;;hongyangli2020/;", "or_profile": "~Xiaosong_Jia1;~Li_Chen15;~Penghao_Wu1;~Jia_Zeng2;~Junchi_Yan2;~Hongyang_Li1;~Yu_Qiao1", "aff": "Shanghai Jiaotong University;Shanghai AI Laboratory;Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai AI Lab;", "aff_domain": "sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;;pjlab.org.cn;", "position": "PhD student;Researcher;Undergrad student;PhD student;;Researcher;", "bibtex": "@inproceedings{\njia2022towards,\ntitle={Towards Capturing the Temporal Dynamics for Trajectory Prediction: a Coarse-to-Fine Approach},\nauthor={Xiaosong Jia and Li Chen and Penghao Wu and Jia Zeng and Junchi Yan and Hongyang Li and Yu Qiao},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=PZiKO7mjC43}\n}", "github": "", "project": "", "reviewers": "QD7m;z8DX;YWfE;G9vs", "site": "https://openreview.net/forum?id=PZiKO7mjC43", "pdf_size": 0, "rating": "1;4;6;10", "confidence": "", "rating_avg": 5.25, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9036353824608640959&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai AI Laboratory;Shanghai AI Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.shanghai-ai-lab.com;https://www.shanghaiailab.com", "aff_unique_abbr": "SJTU;SAIL;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "QFYq0VwBCLE", "title": "Online Inverse Reinforcement Learning with Learned Observation Model", "track": "main", "status": "Poster", "tldr": "", "abstract": "With the motivation of extending incremental inverse reinforcement learning (I2RL) to real-world robotics applications with noisy observations as well as an unknown observation model, we introduce a new method (RIMEO) that approximates the observation model in order to best estimate the noise-free ground truth underlying the observations. It learns a maximum entropy distribution over the observation features governing the perception process, and then uses the inferred observation model to learn the reward function. Experimental evaluation is performed in two robotics tasks: (1) post-harvest vegetable sorting with a Sawyer arm based on human demonstration, and (2) breaching a perimeter patrol by two Turtlebots. Our experiments reveal that RIMEO learns a more accurate policy compared to (a) a state-of-the-art IRL method that does not directly learn an observation model, and (b) a custom baseline that learns a less sophisticated observation model. Furthermore, we show that RIMEO admits formal guarantees of monotonic convergence and a sample complexity bound.", "keywords": "Observation model;Inverse reinforcement learning;Maximum entropy", "primary_area": "", "supplementary_material": "/attachment/02865b1e122c3dc4b47a7cca29d62ff945656de6.zip", "author": "Saurabh Arora;Prashant Doshi;Bikramjit Banerjee", "authorids": "sa08751@uga.edu;~Prashant_Doshi1;~Bikramjit_Banerjee1", "gender": ";M;", "homepage": ";http://thinc.cs.uga.edu;https://sites.usm.edu/banerjee", "dblp": ";d/PrashantDoshi;18/6694", "google_scholar": ";3PkyzawAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "sa08751@uga.edu;~Prashant_Doshi1;~Bikramjit_Banerjee1", "aff": ";University of Georgia;University of Southern Mississippi", "aff_domain": ";cs.uga.edu;usm.edu", "position": ";Full Professor;Full Professor", "bibtex": "@inproceedings{\narora2022online,\ntitle={Online Inverse Reinforcement Learning with Learned Observation Model},\nauthor={Saurabh Arora and Prashant Doshi and Bikramjit Banerjee},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=QFYq0VwBCLE}\n}", "github": "", "project": "", "reviewers": "MqYS;1CPf;Kim8;F2Vv", "site": "https://openreview.net/forum?id=QFYq0VwBCLE", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10232566072592085275&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of Georgia;University of Southern Mississippi", "aff_unique_dep": ";", "aff_unique_url": "https://www.uga.edu;https://www.usm.edu", "aff_unique_abbr": "UGA;USM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "QSUsBMuw0uV", "title": "Bayesian Object Models for Robotic Interaction with Differentiable Probabilistic Programming", "track": "main", "status": "Poster", "tldr": "We present a differentiable probabilistic program that helps robots build mental representations of complex everyday objects.", "abstract": "A hallmark of human intelligence is the ability to build rich mental models of previously unseen objects from very few interactions. To achieve true, continuous autonomy, robots too must possess this ability. Importantly, to integrate with the probabilistic robotics software stack, such models must encapsulate the uncertainty (resulting from noisy dynamics and observation models) in a prescriptive manner. We present Bayesian Object Models (BOMs): generative (probabilistic) models that encode both the structural and kinodynamic attributes of an object. BOMs are implemented in the form of a differentiable probabilistic program that models latent scene structure, object dynamics, and observation models. This allows for efficient and automated Bayesian inference -- samples (object trajectories) drawn from the BOM are compared with a small set of real-world observations and used to compute a likelihood function. Our model comprises a differentiable tree structure sampler and a differentiable physics engine, enabling gradient computation through this likelihood function. This enables gradient-based Bayesian inference to efficiently update the distributional parameters of our model. BOMs outperform several recent approaches, including differentiable physics-based, gradient-free, and neural inference schemes. Further information at: https://bayesianobjects.github.io/", "keywords": "Simulation;Probabilistic programming;Differentiable physics;Bayesian inference;Causal inference", "primary_area": "", "supplementary_material": "/attachment/f5717254a6b44388e5213a92ab8d995867664e2f.zip", "author": "Krishna Murthy Jatavallabhula;Miles Macklin;Dieter Fox;Animesh Garg;Fabio Ramos", "authorids": "~Krishna_Murthy_Jatavallabhula1;~Miles_Macklin1;~Dieter_Fox1;~Animesh_Garg1;~Fabio_Ramos1", "gender": "M;M;M;M;M", "homepage": "https://mmacklin.com;https://homes.cs.washington.edu/~fox/;http://animesh.garg.tech;https://fabioramos.github.io/;http://krrish94.github.io/", "dblp": ";f/DieterFox;123/5728;22/2488;192/7394", "google_scholar": ";DqXsbPAAAAAJ;zp8V7ZMAAAAJ;https://scholar.google.com.au/citations?user=T_mJiHoAAAAJ;https://scholar.google.co.in/citations?user=kcr8134AAAAJ", "orcid": ";;0000-0003-0482-4296;;", "linkedin": ";;animeshgarg/;fabio-ramos-3256b421/;krrish94/", "or_profile": "~Miles_Macklin1;~Dieter_Fox1;~Animesh_Garg1;~Fabio_Ramos1;~J._Krishna_Murthy1", "aff": "NVIDIA;Department of Computer Science;University of Toronto;NVIDIA;University of Montreal", "aff_domain": "nvidia.com;cs.washington.edu;toronto.edu;nvidia.com;umontreal.ca", "position": "Principal Engineer;Full Professor;Assistant Professor;Principal Research Scientist;PhD student", "bibtex": "@inproceedings{\njatavallabhula2022bayesian,\ntitle={Bayesian Object Models for Robotic Interaction with Differentiable Probabilistic Programming},\nauthor={Krishna Murthy Jatavallabhula and Miles Macklin and Dieter Fox and Animesh Garg and Fabio Ramos},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=QSUsBMuw0uV}\n}", "github": "", "project": "", "reviewers": "b3K8;JsgS;rwAv;8ymB", "site": "https://openreview.net/forum?id=QSUsBMuw0uV", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 6, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1405846526247546816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "NVIDIA;Unknown Institution;University of Toronto;University of Montreal", "aff_unique_dep": "NVIDIA Corporation;Department of Computer Science;;", "aff_unique_url": "https://www.nvidia.com;;https://www.utoronto.ca;https://wwwumontreal.ca", "aff_unique_abbr": "NVIDIA;;U of T;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0;2", "aff_country_unique": "United States;;Canada" }, { "id": "QgXArq7RIh", "title": "Learning and Retrieval from Prior Data for Skill-based Imitation Learning", "track": "main", "status": "Poster", "tldr": "We introduces a retrieval-augmented skill-based imitation learning method that leverages large prior robotic datasets to learn new tasks efficiently using a small number of human demonstrations", "abstract": "Imitation learning offers a promising path for robots to learn general-purpose tasks, but traditionally has enjoyed limited scalability due to high data supervision requirements and brittle generalization. Inspired by recent work on skill-based imitation learning, we investigate whether leveraging prior data from previous related tasks can enable learning novel tasks in a more robust, data-efficient manner. To make effective use of the prior data, the agent must internalize knowledge from the prior data and contextualize this knowledge in novel tasks. To that end we propose a skill-based imitation learning framework that extracts temporally-extended sensorimotor skills from prior data and subsequently learns a policy for the target task with respect to these learned skills. We find a number of modeling choices significantly improve performance on novel tasks, namely representation learning objectives to enable more predictable and consistent skill representations and a retrieval-based data augmentation procedure to increase the scope of supervision for the policy. On a number of multi-task manipulation domains, we demonstrate that our method significantly outperforms existing imitation learning and offline reinforcement learning approaches. Videos and code are available at https://ut-austin-rpl.github.io/sailor", "keywords": "Imitation Learning;Skill Learning;Robot Manipulation", "primary_area": "", "supplementary_material": "/attachment/6e1f658fbb52100be7e9c0b74bd2a522bbf5cc32.zip", "author": "Soroush Nasiriany;Tian Gao;Ajay Mandlekar;Yuke Zhu", "authorids": "~Soroush_Nasiriany1;gaot19@utexas.edu;~Ajay_Mandlekar1;~Yuke_Zhu1", "gender": ";;M;M", "homepage": "http://snasiriany.me/;;https://ai.stanford.edu/~amandlek/;https://cs.utexas.edu/~yukez/", "dblp": "185/5645;;https://dblp.uni-trier.de/pers/hd/m/Mandlekar:Ajay;133/1772", "google_scholar": "bBLqsgkAAAAJ;;MEz23joAAAAJ;mWGyYMsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Soroush_Nasiriany1;gaot19@utexas.edu;~Ajay_Mandlekar1;~Yuke_Zhu1", "aff": "University of Texas, Austin;;Stanford University;Computer Science Department, University of Texas, Austin", "aff_domain": "utexas.edu;;stanford.edu;cs.utexas.edu", "position": "PhD student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnasiriany2022learning,\ntitle={Learning and Retrieval from Prior Data for Skill-based Imitation Learning},\nauthor={Soroush Nasiriany and Tian Gao and Ajay Mandlekar and Yuke Zhu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=QgXArq7RIh}\n}", "github": "", "project": "", "reviewers": "XCRj;Qe4M;m8W5;bYiA", "site": "https://openreview.net/forum?id=QgXArq7RIh", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 25, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5283823341693306238&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.stanford.edu", "aff_unique_abbr": "UT Austin;Stanford", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Austin;Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Re3NjSwf0WF", "title": "Legged Locomotion in Challenging Terrains using Egocentric Vision", "track": "main", "status": "Oral", "tldr": "We show how to use egocentric vision to walk across challenging terrain without explicit terrain mapping on a small A1 robot", "abstract": "Animals are capable of precise and agile locomotion using vision. Replicating this ability has been a long-standing goal in robotics. The traditional approach has been to decompose this problem into elevation mapping and foothold planning phases. The elevation mapping, however, is susceptible to failure and large noise artifacts, requires specialized hardware, and is biologically implausible. In this paper, we present the first end-to-end locomotion system capable of traversing stairs, curbs, stepping stones, and gaps. We show this result on a medium-sized quadruped robot using a single front-facing depth camera. The small size of the robot necessitates discovering specialized gait patterns not seen elsewhere. The egocentric camera requires the policy to remember past information to estimate the terrain under its hind feet. We train our policy in simulation. Training has two phases - first, we train a policy using reinforcement learning with a cheap-to-compute variant of depth image and then in phase 2 distill it into the final policy that uses depth using supervised learning. The resulting policy transfers to the real world and is able to run in real-time on the limited compute of the robot. It can traverse a large variety of terrain while being robust to perturbations like pushes, slippery surfaces, and rocky terrain. Videos are at https://vision-locomotion.github.io", "keywords": "legged robots;reinforcement learning;vision;locomotion;walking", "primary_area": "", "supplementary_material": "/attachment/a4339209f2978fa31e940f46df6f1f5907cdfc6b.zip", "author": "Ananye Agarwal;Ashish Kumar;Jitendra Malik;Deepak Pathak", "authorids": "~Ananye_Agarwal1;~Ashish_Kumar1;~Jitendra_Malik2;~Deepak_Pathak1", "gender": "M;M;M;M", "homepage": "https://anag.me/;https://ashish-kmr.github.io/;https://people.eecs.berkeley.edu/~malik/;https://www.cs.cmu.edu/~dpathak/", "dblp": "294/4812;34/5378;58/2944;155/9860", "google_scholar": "https://scholar.google.com/citations?hl=en;Oj-2ZNEAAAAJ;oY9R5YQAAAAJ;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ", "orcid": ";;0000-0003-3695-1580;", "linkedin": ";;;pathak22/", "or_profile": "~Ananye_Agarwal1;~Ashish_Kumar1;~Jitendra_Malik2;~Deepak_Pathak1", "aff": "Carnegie Mellon University;University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University", "aff_domain": "cmu.edu;berkeley.edu;berkeley.edu;cmu.edu", "position": "PhD student;Graduate Student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nagarwal2022legged,\ntitle={Legged Locomotion in Challenging Terrains using Egocentric Vision},\nauthor={Ananye Agarwal and Ashish Kumar and Jitendra Malik and Deepak Pathak},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Re3NjSwf0WF}\n}", "github": "", "project": "", "reviewers": "YYJj;bvG9;evpZ;9Sia", "site": "https://openreview.net/forum?id=Re3NjSwf0WF", "pdf_size": 0, "rating": "10;10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 242, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4033918213630570580&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "RgJwDQwW82y", "title": "Last-Mile Embodied Visual Navigation", "track": "main", "status": "Poster", "tldr": "A last-mile navigation module that connects to prior policies, leading to improved image-goal navigation results in simulation and real-robot experiments.", "abstract": "Realistic long-horizon tasks like image-goal navigation involve exploratory and exploitative phases. Assigned with an image of the goal, an embodied agent must explore to discover the goal, i.e., search efficiently using learned priors. Once the goal is discovered, the agent must accurately calibrate the last-mile of navigation to the goal. As with any robust system, switches between exploratory goal discovery and exploitative last-mile navigation enable better recovery from errors. Following these intuitive guide rails, we propose SLING to improve the performance of existing image-goal navigation systems. Entirely complementing prior methods, we focus on last-mile navigation and leverage the underlying geometric structure of the problem with neural descriptors. With simple but effective switches, we can easily connect SLING with heuristic, reinforcement learning, and neural modular policies. On a standardized image-goal navigation benchmark (Hahn et al. 2021), we improve performance across policies, scenes, and episode complexity, raising the state-of-the-art from 45% to 55% success rate. Beyond photorealistic simulation, we conduct real-robot experiments in three physical scenes and find these improvements to transfer well to real environments.", "keywords": "Visual Navigation;Embodied AI;Image-Goal Navigation;Perspective-n-Point;AI Habitat;Sim-to-Real", "primary_area": "", "supplementary_material": "/attachment/0d7a25b5aa5e827beb290d12438194474899d9c4.zip", "author": "Justin Wasserman;Karmesh Yadav;Girish Chowdhary;Abhinav Gupta;Unnat Jain", "authorids": "~Justin_Wasserman1;~Karmesh_Yadav1;~Girish_Chowdhary1;~Abhinav_Gupta1;~Unnat_Jain1", "gender": ";M;M;M;", "homepage": "https://jbwasse2.github.io/;https://www.karmeshyadav.com;http://www.daslab.illinois.edu;http://www.cs.cmu.edu/~abhinavg;", "dblp": ";264/3702;09/5775;36/7024-1;", "google_scholar": "6OP2QCkAAAAJ;VsTyEcQAAAAJ;pf2zAXkAAAAJ;https://scholar.google.com.tw/citations?user=bqL73OkAAAAJ;", "orcid": ";;;;", "linkedin": ";karmesh-yadav/;girishchowdhary/;;", "or_profile": "~Justin_Wasserman1;~Karmesh_Yadav1;~Girish_Chowdhary1;~Abhinav_Gupta1;~Unnat_Jain1", "aff": "University of Illinois, Urbana Champaign;Meta AI;University of Illinois, Urbana Champaign;Meta Facebook;", "aff_domain": "illinois.edu;meta.com;illinois.edu;fb.com;", "position": "PhD student;Researcher;Associate Professor;Researcher;", "bibtex": "@inproceedings{\nwasserman2022lastmile,\ntitle={Last-Mile Embodied Visual Navigation},\nauthor={Justin Wasserman and Karmesh Yadav and Girish Chowdhary and Abhinav Gupta and Unnat Jain},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=RgJwDQwW82y}\n}", "github": "https://github.com/Jbwasse2/SLING", "project": "", "reviewers": "wKSA;FqFC;D7Jh", "site": "https://openreview.net/forum?id=RgJwDQwW82y", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12756293700381382368&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Meta", "aff_unique_dep": ";Meta AI", "aff_unique_url": "https://illinois.edu;https://meta.com", "aff_unique_abbr": "UIUC;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "RzhhFh4rkWu", "title": "Discriminator-Guided Model-Based Offline Imitation Learning", "track": "main", "status": "Poster", "tldr": "Model-based offline imitation learning for small datasets", "abstract": "Offline imitation learning (IL) is a powerful method to solve decision-making problems from expert demonstrations without reward labels. Existing offline IL methods suffer from severe performance degeneration under limited expert data. Including a learned dynamics model can potentially improve the state-action space coverage of expert data, however, it also faces challenging issues like model approximation/generalization errors and suboptimality of rollout data. In this paper, we propose the Discriminator-guided Model-based offline Imitation Learning (DMIL) framework, which introduces a discriminator to simultaneously distinguish the dynamics correctness and sub-optimality of model rollout data against real expert demonstrations. DMIL adopts a novel cooperative-yet-adversarial learning strategy, which uses the discriminator to guide and couple the learning process of the policy and dynamics model, resulting in improved model performance and robustness. Our framework can also be extended to the case when demonstrations contain a large proportion of suboptimal data. Experimental results show that DMIL and its extension achieve superior performance and robustness compared to state-of-the-art offline IL methods under small datasets.", "keywords": "Offline Imitation Learning;Model-based Learning", "primary_area": "", "supplementary_material": "/attachment/bb81c5f59246dfbc20271bbd9c0a8bb8364a93e9.zip", "author": "Wenjia Zhang;Haoran Xu;Haoyi Niu;Peng Cheng;Ming Li;Heming Zhang;Guyue Zhou;Xianyuan Zhan", "authorids": "~Wenjia_Zhang2;~Haoran_Xu4;~Haoyi_Niu1;~Peng_Cheng1;~Ming_Li22;~Heming_Zhang2;~Guyue_Zhou2;~Xianyuan_Zhan1", "gender": ";M;M;M;M;;M;M", "homepage": ";https://ryanxhr.github.io/;https://t6-thu.github.io;;https://lmhmx.github.io;https://www.au.tsinghua.edu.cn/info/1180/2109.htm;https://air.tsinghua.edu.cn/en/info/1046/1196.htm;http://zhanxianyuan.xyz/", "dblp": ";;;76/185-13;;;133/4199;181/5081", "google_scholar": ";iX8AJI0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;;;pDMnGloAAAAJ", "orcid": "0000-0001-5212-6500;;0000-0002-7072-3787;;;;;0000-0002-3683-0554", "linkedin": ";;;;;;;", "or_profile": "~Wenjia_Zhang2;~Haoran_Xu4;~Haoyi_Niu1;~Peng_Cheng1;~Ming_Li22;~Heming_Zhang2;~Guyue_Zhou2;~Xianyuan_Zhan1", "aff": "Tsinghua University;JD.com;Department of Automation, Tsinghua University;Beijing Jiaotong University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;jd.com;tsinghua.edu.cn;bjtu.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Researcher;Undergrad student;PhD student;Undergrad student;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2022discriminatorguided,\ntitle={Discriminator-Guided Model-Based Offline Imitation Learning},\nauthor={Wenjia Zhang and Haoran Xu and Haoyi Niu and Peng Cheng and Ming Li and Heming Zhang and Guyue Zhou and Xianyuan Zhan},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=RzhhFh4rkWu}\n}", "github": "", "project": "", "reviewers": "8kmH;ihkf;wiFv;F1vv", "site": "https://openreview.net/forum?id=RzhhFh4rkWu", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17525410821595066278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;0;0;0;0", "aff_unique_norm": "Tsinghua University;JD.com;Beijing Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.jd.com;http://www.njtu.edu.cn/en", "aff_unique_abbr": "THU;JD;BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SM70KHTBG-0", "title": "Vision-based Uneven BEV Representation Learning with Polar Rasterization and Surface Estimation", "track": "main", "status": "Poster", "tldr": "We propose PolarBEV for vision-based uneven BEV representation learning.", "abstract": "In this work, we propose PolarBEV for vision-based uneven BEV representation learning. To adapt to the foreshortening effect of camera imaging, we rasterize the BEV space both angularly and radially, and introduce polar embedding decomposition to model the associations among polar grids. Polar grids are rearranged to an array-like regular representation for efficient processing. Besides, to determine the 2D-to-3D correspondence, we iteratively update the BEV surface based on a hypothetical plane, and adopt height-based feature transformation. PolarBEV keeps real-time inference speed on a single 2080Ti GPU, and outperforms other methods for both BEV semantic segmentation and BEV instance segmentation. Thorough ablations are presented to validate the design. The code will be released for facilitating further research.", "keywords": "Polar Rasterization;Surface Estimation;BEV Segmentation", "primary_area": "", "supplementary_material": "/attachment/17672a7dd7ddc3d63983ef9773d15139733f990b.zip", "author": "Zhi Liu;Shaoyu Chen;Xiaojie Guo;Xinggang Wang;Tianheng Cheng;Hongmei Zhu;Qian Zhang;Wenyu Liu;Yi Zhang", "authorids": "~Zhi_Liu8;~Shaoyu_Chen1;~Xiaojie_Guo2;~Xinggang_Wang1;~Tianheng_Cheng1;~Hongmei_Zhu2;~Qian_Zhang7;~Wenyu_Liu3;~Yi_Zhang39", "gender": "M;M;M;M;F;M;M;F;M", "homepage": "https://github.com/outsidercsy;https://sites.google.com/view/xjguo;https://xwcv.github.io/index.htm;https://github.com/wondervictor;https://www.researchgate.net/scientific-contributions/Hongmei-Zhu-2114637757;;http://eic.hust.edu.cn/professor/liuwenyu/;http://cic.tju.edu.cn/info/1072/1251.htm;https://zhiliu.com", "dblp": "25/1277;43/8066-1;95/3056;230/4157;55/5412.html;04/2024-9;42/4110-1.html;;", "google_scholar": "PIeNN2gAAAAJ;RL7jPuQAAAAJ;qNCTLV0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;pCY-bikAAAAJ;D7jDk7gAAAAJ;;", "orcid": ";;0000-0001-6732-7823;;;;0000-0002-4582-7488;;", "linkedin": ";;;tianheng-cheng-a74362112/;;;;;", "or_profile": "~Shaoyu_Chen1;~Xiaojie_Guo2;~Xinggang_Wang1;~Tianheng_Cheng1;~Hongmei_Zhu2;~Qian_Zhang7;~Wenyu_Liu3;~Yi_Zhang39;~Liu_Zhi1", "aff": "Horizon Robotics;;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Horizon Robotics;Horizon Robotics;Huazhong University of Science and Technology;Tianjin University;Tianjin University", "aff_domain": "horizon.ai;;hust.edu.cn;hust.edu.cn;horizon.ai;horizon.cc;hust.edu.cn;tju.edu.cn;tju.edu", "position": "Researcher;;Full Professor;PhD student;Engineer;Researcher;Full Professor;Associate Professor;MS student", "bibtex": "@inproceedings{\nliu2022visionbased,\ntitle={Vision-based Uneven {BEV} Representation Learning with Polar Rasterization and Surface Estimation},\nauthor={Zhi Liu and Shaoyu Chen and Xiaojie Guo and Xinggang Wang and Tianheng Cheng and Hongmei Zhu and Qian Zhang and Wenyu Liu and Yi Zhang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=SM70KHTBG-0}\n}", "github": "https://github.com/SuperZ-Liu/PolarBEV", "project": "", "reviewers": "SUoV;Zrsk;iJAs", "site": "https://openreview.net/forum?id=SM70KHTBG-0", "pdf_size": 0, "rating": "4;4;6", "confidence": "", "rating_avg": 4.666666666666667, "confidence_avg": 0, "replies_avg": 8, "authors#_avg": 9, "corr_rating_confidence": 0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17071391729332487176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0;0;1;2;2", "aff_unique_norm": "Horizon Robotics;Huazhong University of Science and Technology;Tianjin University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.horizon-robotics.com/;http://www.hust.edu.cn;http://www.tju.edu.cn", "aff_unique_abbr": "Horizon Robotics;HUST;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SrSCqW4dq9", "title": "Volumetric-based Contact Point Detection for 7-DoF Grasping", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this paper, we propose a novel grasp pipeline based on contact point detection on the truncated signed distance function (TSDF) volume to achieve closed-loop 7-degree-of-freedom (7-DoF) grasping on cluttered environments. The key aspects of our method are that 1) the proposed pipeline exploits the TSDF volume in terms of multi-view fusion, contact-point sampling and evaluation, and collision checking, which provides reliable and collision-free 7-DoF gripper poses with real-time performance; 2) the contact-based pose representation effectively eliminates the ambiguity introduced by the normal-based methods, which provides a more precise and flexible solution. Extensive simulated and real-robot experiments demonstrate that the proposed pipeline can select more antipodal and stable grasp poses and outperforms normal-based baselines in terms of the grasp success rate in both simulated and physical scenarios. Code and data are available at https://github.com/caijunhao/vcpd", "keywords": "Contact point detection;7-DoF grasping;General object grasping", "primary_area": "", "supplementary_material": "/attachment/e551ebc53177f45a848b8cade023c0830480fcd9.zip", "author": "Junhao Cai;Jingcheng Su;Zida Zhou;Hui Cheng;Qifeng Chen;Michael Y Wang", "authorids": "~Junhao_Cai1;~Jingcheng_Su1;~Zida_Zhou1;~Hui_Cheng5;~Qifeng_Chen1;~Michael_Y_Wang1", "gender": "M;M;M;;M;", "homepage": ";https://scott-su.github.io/;http://lab.sysu-robotics.com/?page_id=2;;http://cqf.io/;", "dblp": "226/6472;;;;117/4819;", "google_scholar": "GeSCNR4AAAAJ;;;;lLMX9hcAAAAJ;", "orcid": "0000-0002-1440-0406;;;;;", "linkedin": ";;;;;", "or_profile": "~Junhao_Cai1;~Jingcheng_Su1;~Zida_Zhou1;~Hui_Cheng5;~Qifeng_Chen1;~Michael_Y_Wang1", "aff": "Hong Kong University of Science and Technology;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;;Hong Kong University of Science and Technology;", "aff_domain": "ust.hk;sysu.edu.cn;sysu.edu.cn;;hkust.edu;", "position": "PhD student;MS student;Researcher;;Assistant Professor;", "bibtex": "@inproceedings{\ncai2022volumetricbased,\ntitle={Volumetric-based Contact Point Detection for 7-DoF Grasping},\nauthor={Junhao Cai and Jingcheng Su and Zida Zhou and Hui Cheng and Qifeng Chen and Michael Y Wang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=SrSCqW4dq9}\n}", "github": "", "project": "", "reviewers": "EzWb;cFdk;AfSj;eHii", "site": "https://openreview.net/forum?id=SrSCqW4dq9", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13941507163105176864&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;http://www.sysu.edu.cn", "aff_unique_abbr": "HKUST;SYSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "TAgVKiF2O8p", "title": "Concept Learning for Interpretable Multi-Agent Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "We introduce a method for incorporating interpretable concepts from a domain expert into models trained through multi-agent reinforcement learning by requiring the model to first predict such concepts then utilize them for decision making.", "abstract": "Multi-agent robotic systems are increasingly operating in real-world environments in close proximity to humans, yet are largely controlled by policy models with inscrutable deep neural network representations. We introduce a method for incorporating interpretable concepts from a domain expert into models trained through multi-agent reinforcement learning, by requiring the model to first predict such concepts then utilize them for decision making. This allows an expert to both reason about the resulting concept policy models in terms of these high-level concepts at run-time, as well as intervene and correct mispredictions to improve performance. We show that this yields improved interpretability and training stability, with benefits to policy performance and sample efficiency in a simulated and real-world cooperative-competitive multi-agent game.", "keywords": "Multi-Agent Reinforcement Learning;Interpretable Machine Learning", "primary_area": "", "supplementary_material": "/attachment/f0c4f54e1fdbf31f0c26602fe8e61e346f6700fe.zip", "author": "Renos Zabounidis;Joseph Campbell;Simon Stepputtis;Dana Hughes;Katia P. Sycara", "authorids": "~Renos_Zabounidis1;~Joseph_Campbell1;~Simon_Stepputtis1;~Dana_Hughes1;~Katia_P._Sycara1", "gender": ";;;M;F", "homepage": ";;https://simonstepputtis.com/;http://danathughes.com;", "dblp": ";179/2732;192/7092;;s/KatiaPSycara", "google_scholar": "Yt9SHQUAAAAJ;1NmM6OUAAAAJ;WUQgzsAAAAAJ;mv_fbkkAAAAJ;VWv6a9kAAAAJ", "orcid": ";;0009-0003-0519-3454;;", "linkedin": ";;simon-stepputtis/;;", "or_profile": "~Renos_Zabounidis1;~Joseph_Campbell1;~Simon_Stepputtis1;~Dana_Hughes1;~Katia_P._Sycara1", "aff": "University of Massachusetts, Amherst;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "umass.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "Undergrad student;Postdoc;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzabounidis2022concept,\ntitle={Concept Learning for Interpretable Multi-Agent Reinforcement Learning},\nauthor={Renos Zabounidis and Joseph Campbell and Simon Stepputtis and Dana Hughes and Katia P. Sycara},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=TAgVKiF2O8p}\n}", "github": "", "project": "", "reviewers": "p3Pf;EPqJ;dgWp;avMX", "site": "https://openreview.net/forum?id=TAgVKiF2O8p", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 23, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15280167043253082817&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.cmu.edu", "aff_unique_abbr": "UMass Amherst;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "TGUp8EaCGj9", "title": "Offline Reinforcement Learning at Multiple Frequencies", "track": "main", "status": "Poster", "tldr": "Offline reinforcement learning over multiple discretizations creates training instabilities that can be solved with a simple adaptation of N-step returns", "abstract": "To leverage many sources of offline robot data, robots must grapple with the heterogeneity of such data. In this paper, we focus on one particular aspect of this challenge: learning from offline data collected at different control frequencies. Across labs, the discretization of controllers, sampling rates of sensors, and demands of a task of interest may differ, giving rise to a mixture of frequencies in an aggregated dataset. We study how well offline reinforcement learning (RL) algorithms can accommodate data with a mixture of frequencies during training. We observe that the $Q$-value propagates at different rates for different discretizations, leading to a number of learning challenges for off-the-shelf offline RL algorithms. We present a simple yet effective solution that enforces consistency in the rate of $Q$-value updates to stabilize learning. By scaling the value of $N$ in $N$-step returns with the discretization size, we effectively balance $Q$-value propagation, leading to more stable convergence. On three simulated robotic control problems, we empirically find that this simple approach significantly outperforms na\\\"ive mixing both terms of absolute performance and training stability, while also improving over using only the data from a single control frequency.", "keywords": "offline reinforcement learning;robotics", "primary_area": "", "supplementary_material": "/attachment/1317974b0165f3bbcda1eb4e0ed95224c5e68390.zip", "author": "Kaylee Burns;Tianhe Yu;Chelsea Finn;Karol Hausman", "authorids": "~Kaylee_Burns2;~Tianhe_Yu1;~Chelsea_Finn1;~Karol_Hausman2", "gender": "F;M;F;M", "homepage": "https://kayburns.github.io;https://cs.stanford.edu/~tianheyu/;https://ai.stanford.edu/~cbfinn/;https://karolhausman.github.io/", "dblp": "217/3002;192/1797;131/1783;135/8164", "google_scholar": "N_rVVG8AAAAJ;;vfPE6hgAAAAJ;yy0UFOwAAAAJ", "orcid": ";;;", "linkedin": ";;;karolhausman/", "or_profile": "~Kaylee_Burns2;~Tianhe_Yu1;~Chelsea_Finn1;~Karol_Hausman1", "aff": "Stanford University;Stanford University;Google;Google Brain", "aff_domain": "stanford.edu;stanford.edu;google.com;google.com", "position": "PhD student;PhD student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nburns2022offline,\ntitle={Offline Reinforcement Learning at Multiple Frequencies},\nauthor={Kaylee Burns and Tianhe Yu and Chelsea Finn and Karol Hausman},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=TGUp8EaCGj9}\n}", "github": "https://github.com/stanford-iris-lab/offline_rl_at_multiple_freqs", "project": "", "reviewers": "W1tF;Avb9;Z97e;1JmA", "site": "https://openreview.net/forum?id=TGUp8EaCGj9", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1936092916315740117&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "UAA5bNospA0", "title": "PLATO: Predicting Latent Affordances Through Object-Centric Play", "track": "main", "status": "Poster", "tldr": "We learn to represent object affordances from diverse human play data and demonstrate that we can learn more generalizable imitation policies by conditioning on these discovered latent affordances.", "abstract": "Constructing a diverse repertoire of manipulation skills in a scalable fashion remains an unsolved challenge in robotics. One way to address this challenge is with unstructured human play, where humans operate freely in an environment to reach unspecified goals. Play is a simple and cheap method for collecting diverse user demonstrations with broad state and goal coverage over an environment. Due to this diverse coverage, existing approaches for learning from play are more robust to online policy deviations from the offline data distribution. However, these methods often struggle to learn under scene variation and on challenging manipulation primitives, due in part to improperly associating complex behaviors to the scene changes they induce. Our insight is that an object-centric view of play data can help link human behaviors and the resulting changes in the environment, and thus improve multi-task policy learning. In this work, we construct a latent space to model object \\textit{affordances} -- properties of an object that define its uses -- in the environment, and then learn a policy to achieve the desired affordances. By modeling and predicting the desired affordance across variable horizon tasks, our method, Predicting Latent Affordances Through Object-Centric Play (PLATO), outperforms existing methods on complex manipulation tasks in both 2D and 3D object manipulation simulation and real world environments for diverse types of interactions. Videos can be found on our \n website: https://sites.google.com/view/plato-corl22/home.", "keywords": "Human Play Data;Object Affordance Learning;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/e11fbc003ab425adc59addcc7ba38d72c6028d7d.zip", "author": "Suneel Belkhale;Dorsa Sadigh", "authorids": "~Suneel_Belkhale1;~Dorsa_Sadigh1", "gender": "M;F", "homepage": "https://github.com/suneelbelkhale;https://dorsa.fyi/", "dblp": "236/5069;117/3174", "google_scholar": ";ZaJEZpYAAAAJ", "orcid": "0000-0002-3963-7987;", "linkedin": "suneel-b-032b1a101/;", "or_profile": "~Suneel_Belkhale1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbelkhale2022plato,\ntitle={{PLATO}: Predicting Latent Affordances Through Object-Centric Play},\nauthor={Suneel Belkhale and Dorsa Sadigh},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=UAA5bNospA0}\n}", "github": "", "project": "", "reviewers": "Qcyf;KtMz;Z8qY;7Bgr", "site": "https://openreview.net/forum?id=UAA5bNospA0", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7963411702820315382&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "UW5A3SweAH", "title": "LM-Nav: Robotic Navigation with Large Pre-Trained Models of Language, Vision, and Action", "track": "main", "status": "Poster", "tldr": "We can utilize pre-trained models of images and language to provide a textual interface to visual navigation models, enabling zero-shot instruction following robots in the real world!", "abstract": "Goal-conditioned policies for robotic navigation can be trained on large, unannotated datasets, providing for good generalization to real-world settings. However, particularly in vision-based settings where specifying goals requires an image, this makes for an unnatural interface. Language provides a more convenient modality for communication with robots, but contemporary methods typically require expensive supervision, in the form of trajectories annotated with language descriptions. We present a system, LM-Nav, for robotic navigation that enjoys the benefits of training on unannotated large datasets of trajectories, while still providing a high-level interface to the user. Instead of utilizing a labeled instruction following dataset, we show that such a system can be constructed entirely out of pre-trained models for navigation (ViNG), image-language association (CLIP), and language modeling (GPT-3), without requiring any fine-tuning or language-annotated robot data. LM-Nav extracts landmarks names from an instruction, grounds them in the world via the image-language model, and then reaches them via the (vision-only) navigation model. We instantiate LM-Nav on a real-world mobile robot and demonstrate long-horizon navigation through complex, outdoor environments from natural language instructions.", "keywords": "instruction following;language models;vision-based navigation", "primary_area": "", "supplementary_material": "/attachment/2ac471255df6deecb6f8b4b633e8113050f7d1c1.zip", "author": "Dhruv Shah;B\u0142a\u017cej Osi\u0144ski;brian ichter;Sergey Levine", "authorids": "~Dhruv_Shah1;~B\u0142a\u017cej_Osi\u0144ski1;~brian_ichter1;~Sergey_Levine1", "gender": "M;;;M", "homepage": "http://cs.berkeley.edu/~shah;;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";218/5547;;80/7594", "google_scholar": ";WuWWdKcAAAAJ;-w5DuHgAAAAJ;8R35rCwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Dhruv_Shah1;~B\u0142a\u017cej_Osi\u0144ski1;~brian_ichter1;~Sergey_Levine1", "aff": "UC Berkeley;University of Warsaw;Google;Google", "aff_domain": "berkeley.edu;mimuw.edu.pl;google.com;google.com", "position": "PhD student;PhD student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nshah2022lmnav,\ntitle={{LM}-Nav: Robotic Navigation with Large Pre-Trained Models of Language, Vision, and Action},\nauthor={Dhruv Shah and B{\\l}a{\\.z}ej Osi{\\'n}ski and brian ichter and Sergey Levine},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=UW5A3SweAH}\n}", "github": "https://github.com/blazejosinski/lm_nav", "project": "", "reviewers": "Mudn;2UZt;FpxS;eeqY", "site": "https://openreview.net/forum?id=UW5A3SweAH", "pdf_size": 0, "rating": "1;6;6;6", "confidence": "", "rating_avg": 4.75, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 519, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15240855498055344101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of California, Berkeley;University of Warsaw;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.uw.edu.pl;https://www.google.com", "aff_unique_abbr": "UC Berkeley;UW;Google", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Poland" }, { "id": "UWTP_JvSug", "title": "Proactive slip control by learned slip model and trajectory adaptation", "track": "main", "status": "Poster", "tldr": "", "abstract": "This paper presents a novel control approach to dealing with a slip during robotic manipulative movements. Slip is a major cause of failure in many robotic grasping and manipulation tasks. Existing works use increased gripping forces to avoid/control slip. However, this may not be feasible, e.g., because (i) the robot cannot increase the gripping force-- the max gripping force has already applied or (ii) an increased force yields a damaged grasped object, such as soft fruit. Moreover, the robot fixes the gripping force when it forms a stable grasp on the surface of an object, and changing the gripping force during manipulative movements in real-time may not be feasible, e.g., with the Franka robot. Hence, controlling the slip by changing gripping forces is not an effective control policy in many settings. We propose a novel control approach to slip avoidance including a learned action-conditioned slip predictor and a constrained optimizer avoiding an expected slip given the desired robot actions. We show the effectiveness of this receding horizon controller in a series of test cases in real robot experimentation. Our experimental results show our proposed data-driven predictive controller can control slip for objects unseen in training. \n", "keywords": "Manipulation;Motion planning;Slip control;Motion control", "primary_area": "", "supplementary_material": "/attachment/2b0e834b8504e3b79c68bc5df3148f6966128add.zip", "author": "Kiyanoush Nazari;Willow Mandil;Amir Masoud Ghalamzan Esfahani", "authorids": "~Kiyanoush_Nazari1;~Willow_Mandil1;~Amir_Masoud_Ghalamzan_Esfahani1", "gender": "M;;", "homepage": ";;https://staff.lincoln.ac.uk/0c09165e-bd6d-473d-93ce-4fceb583d722", "dblp": "276/2004;;164/8192", "google_scholar": "yU2nzuoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=CyMunvwAAAAJ", "orcid": ";;0000-0003-4589-0185", "linkedin": "kiyanoush-nazari-7b2039192/;;amir-ghalamzan/", "or_profile": "~Kiyanoush_Nazari1;~Willow_Mandil1;~Amir_Masoud_Ghalamzan_Esfahani1", "aff": "University of Lincoln;;University of Lincoln", "aff_domain": "lincoln.ac.uk;;lincoln.ac.uk", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nnazari2022proactive,\ntitle={Proactive slip control by learned slip model and trajectory adaptation},\nauthor={Kiyanoush Nazari and Willow Mandil and Amir Masoud Ghalamzan Esfahani},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=UWTP_JvSug}\n}", "github": "https://github.com/imanlab/proactive-slip-control", "project": "", "reviewers": "b9H8;xX6y;PezY;FJ4L", "site": "https://openreview.net/forum?id=UWTP_JvSug", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13597429798337446153&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "University of Lincoln", "aff_unique_dep": "", "aff_unique_url": "https://www.lincoln.ac.uk", "aff_unique_abbr": "UoL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "V3Mjpi4kzdn", "title": "ARC - Actor Residual Critic for Adversarial Imitation Learning", "track": "main", "status": "Poster", "tldr": "A new class of RL algorithms that improves Adversarial Imitation Learning (AIL) by leveraging differentiability of shaped AIL reward.", "abstract": "Adversarial Imitation Learning (AIL) is a class of popular state-of-the-art Imitation Learning algorithms commonly used in robotics. In AIL, an artificial adversary's misclassification is used as a reward signal that is optimized by any standard Reinforcement Learning (RL) algorithm. Unlike most RL settings, the reward in AIL is $differentiable$ but current model-free RL algorithms do not make use of this property to train a policy. The reward is AIL is also $shaped$ since it comes from an adversary. We leverage the differentiability property of the shaped AIL reward function and formulate a class of Actor Residual Critic (ARC) RL algorithms. ARC algorithms draw a parallel to the standard Actor-Critic (AC) algorithms in RL literature and uses a residual critic, $C$ function (instead of the standard $Q$ function) to approximate only the discounted future return (excluding the immediate reward). ARC algorithms have similar convergence properties as the standard AC algorithms with the additional advantage that the gradient through the immediate reward is exact. For the discrete (tabular) case with finite states, actions, and known dynamics, we prove that policy iteration with $C$ function converges to an optimal policy. In the continuous case with function approximation and unknown dynamics, we experimentally show that ARC aided AIL outperforms standard AIL in simulated continuous-control and real robotic manipulation tasks. ARC algorithms are simple to implement and can be incorporated into any existing AIL implementation with an AC algorithm.", "keywords": "Adversarial Imitation Learning (AIL);Actor-Critic (AC);Actor Residual Critic (ARC)", "primary_area": "", "supplementary_material": "/attachment/36ed6fe1027d51f1d2da3c99f077b349904386ef.zip", "author": "Ankur Deka;Changliu Liu;Katia P. Sycara", "authorids": "~Ankur_Deka1;~Changliu_Liu1;~Katia_P._Sycara1", "gender": "M;F;F", "homepage": "https://ankur-deka.github.io/;http://www.cs.cmu.edu/~cliu6/index.html;", "dblp": ";166/3563;s/KatiaPSycara", "google_scholar": "CaBIO8cAAAAJ;;VWv6a9kAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ankur_Deka1;~Changliu_Liu1;~Katia_P._Sycara1", "aff": ";Carnegie Mellon University;Carnegie Mellon University", "aff_domain": ";cmu.edu;cmu.edu", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndeka2022arc,\ntitle={{ARC} - Actor Residual Critic for Adversarial Imitation Learning},\nauthor={Ankur Deka and Changliu Liu and Katia P. Sycara},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=V3Mjpi4kzdn}\n}", "github": "https://github.com/Ankur-Deka/Actor-Residual-Critic", "project": "", "reviewers": "XgGH;AjRj;iG9x;FH78", "site": "https://openreview.net/forum?id=V3Mjpi4kzdn", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 63, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11979992888801828026&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "VD0nXUG5Qk", "title": "RoboTube: Learning Household Manipulation from Human Videos with Simulated Twin Environments", "track": "main", "status": "Oral", "tldr": "", "abstract": "We aim to build a useful, reproducible, democratized benchmark for learning household robotic manipulation from human videos. To realize this goal, a diverse, high-quality human video dataset curated specifically for robots is desired. To evaluate the learning progress, a simulated twin environment that resembles the appearance and the dynamics of the physical world would help roboticists and AI researchers validate their algorithms convincingly and efficiently before testing on a real robot. Hence, we present RoboTube, a human video dataset, and its digital twins for learning various robotic manipulation tasks. RoboTube video dataset contains 5{,}000 video demonstrations recorded with multi-view RGB-D cameras of human-performing everyday household tasks including manipulation of rigid objects, articulated objects, deformable objects, and bimanual manipulation. RT-sim, as the simulated twin environments, consists of 3D scanned, photo-realistic objects, minimizing the visual domain gap between the physical world and the simulated environment. After extensively benchmarking existing methods in the field of robot learning from videos, the empirical results suggest that knowledge and models learned from the RoboTube video dataset can be deployed, benchmarked, and reproduced in RT-sim and be transferred to a real robot. We hope RoboTube can lower the barrier to robotics research for beginners while facilitating reproducible research in the community. More experiments and videos can be found in the supplementary materials and on the website: https://sites.google.com/view/robotube", "keywords": "Learning from Videos;Video Demonstration Dataset;Real2Sim;Self-supervised Reward Learning;Robotic Simulation Benchmark", "primary_area": "", "supplementary_material": "", "author": "Haoyu Xiong;Haoyuan Fu;Jieyi Zhang;Chen Bao;Qiang Zhang;Yongxi Huang;Wenqiang Xu;Animesh Garg;Cewu Lu", "authorids": "~Haoyu_Xiong3;~Haoyuan_Fu1;~Jieyi_Zhang1;~Chen_Bao2;~Qiang_Zhang10;~Yongxi_Huang1;~Wenqiang_Xu2;~Animesh_Garg1;~Cewu_Lu3", "gender": "M;;M;M;M;M;M;M;", "homepage": "https://haoyu-x.github.io/;https://simon-fuhaoyuan.github.io/;https://github.com/SJTUzjy;https://chenbao.tech;https://github.com/Riften;;http://animesh.garg.tech;https://www.mvig.org/;https://github.com/jonyzhang2023", "dblp": ";292/4369;;;;;123/5728;;72/3527-29", "google_scholar": ";https://scholar.google.com/citations?hl=zh-TW;;HOngPZAAAAAJ;;PdzO-4YAAAAJ;zp8V7ZMAAAAJ;https://scholar.google.com.tw/citations?user=QZVQEWAAAAAJ;", "orcid": ";;;0009-0007-0042-0821;;0000-0002-8648-5576;0000-0003-0482-4296;;", "linkedin": ";;;;;;animeshgarg/;;", "or_profile": "~Haoyu_Xiong3;~Haoyuan_Fu1;~Jieyi_Zhang1;~Chen_Bao2;~Yongxi_Huang1;~Wenqiang_Xu2;~Animesh_Garg1;~Cewu_Lu3;~QIANG_ZHANG9", "aff": "Carnegie Mellon University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;University of Toronto;Shanghai Jiaotong University;", "aff_domain": "andrew.cmu.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;toronto.edu;sjtu.edu.cn;", "position": "MS student;MS student;Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nxiong2022robotube,\ntitle={RoboTube: Learning Household Manipulation from Human Videos with Simulated Twin Environments},\nauthor={Haoyu Xiong and Haoyuan Fu and Jieyi Zhang and Chen Bao and Qiang Zhang and Yongxi Huang and Wenqiang Xu and Animesh Garg and Cewu Lu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=VD0nXUG5Qk}\n}", "github": "", "project": "", "reviewers": "wYij;fnSc;ZLYq", "site": "https://openreview.net/forum?id=VD0nXUG5Qk", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 4, "authors#_avg": 9, "corr_rating_confidence": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=461892447459001271&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1;1;1;1;2;1", "aff_unique_norm": "Carnegie Mellon University;Shanghai Jiao Tong University;University of Toronto", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.sjtu.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "CMU;SJTU;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;2;1", "aff_country_unique": "United States;China;Canada" }, { "id": "VHia4Cint7", "title": "HERD: Continuous Human-to-Robot Evolution for Learning from Human Demonstration", "track": "main", "status": "Poster", "tldr": "A method for learning from human demonstration using continuous robot evolution.", "abstract": "The ability to learn from human demonstration endows robots with the ability to automate various tasks. However, directly learning from human demonstration is challenging since the structure of the human hand can be very different from the desired robot gripper. In this work, we show that manipulation skills can be transferred from a human to a robot through the use of micro-evolutionary reinforcement learning, where a five-finger human dexterous hand robot gradually evolves into a commercial robot, while repeated interacting in a physics simulator to continuously update the policy that is first learned from human demonstration. To deal with the high dimensions of robot parameters, we propose an algorithm for multi-dimensional evolution path searching that allows joint optimization of both the robot evolution path and the policy. Through experiments on human object manipulation datasets, we show that our framework can efficiently transfer the expert human agent policy trained from human demonstrations in diverse modalities to target commercial robots.", "keywords": "learning from human demonstration;imitation learning;curriculum learning;transfer learning;robotic manipulation", "primary_area": "", "supplementary_material": "/attachment/3cbd05aeb18d9766877512fe96f606d3a6dd6eb9.zip", "author": "Xingyu Liu;Deepak Pathak;Kris M. Kitani", "authorids": "~Xingyu_Liu1;~Deepak_Pathak1;~Kris_M._Kitani1", "gender": "M;M;M", "homepage": "https://xingyul.github.io;https://www.cs.cmu.edu/~dpathak/;http://www.cs.cmu.edu/~kkitani/", "dblp": ";155/9860;42/163", "google_scholar": "ZVABLi8AAAAJ;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ;yv3sH74AAAAJ", "orcid": ";;0000-0002-9389-4060", "linkedin": "xing-yu-liu/;pathak22/;", "or_profile": "~Xingyu_Liu1;~Deepak_Pathak1;~Kris_M._Kitani1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu", "position": "Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2022herd,\ntitle={{HERD}: Continuous Human-to-Robot Evolution for Learning from Human Demonstration},\nauthor={Xingyu Liu and Deepak Pathak and Kris M. Kitani},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=VHia4Cint7}\n}", "github": "https://github.com/xingyul/herd", "project": "", "reviewers": "mLRs;Pofh;bvcA;WPK2", "site": "https://openreview.net/forum?id=VHia4Cint7", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15850436772541710232&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ViYLaruFwN3", "title": "Latent Plans for Task-Agnostic Offline Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "The primary contribution of this work is an offline, self-supervised approach to solving goal-reaching tasks by combining model-free RL methods with imitation learning methods.", "abstract": "Everyday tasks of long-horizon and comprising a sequence of multiple implicit subtasks still impose a major challenge in offline robot control. While a number of prior methods aimed to address this setting with variants of imitation and offline reinforcement learning, the learned behavior is typically narrow and often struggles to reach configurable long-horizon goals. As both paradigms have complementary strengths and weaknesses, we propose a novel hierarchical approach that combines the strengths of both methods to learn task-agnostic long-horizon policies from high-dimensional camera observations. Concretely, we combine a low-level policy that learns latent skills via imitation learning and a high-level policy learned from offline reinforcement learning for skill-chaining the latent behavior priors. Experiments in various simulated and real robot control tasks show that our formulation enables producing previously unseen combinations of skills to reach temporally extended goals by ``stitching'' together latent skills through goal chaining with an order-of-magnitude improvement in performance upon state-of-the-art baselines. We even learn one multi-task visuomotor policy for 25 distinct manipulation tasks in the real world which outperforms both imitation learning and offline reinforcement learning techniques.", "keywords": "Offline Reinforcement Learning;Imitation Learning;Robot Learning", "primary_area": "", "supplementary_material": "/attachment/52d00ee8267a13d3facc91abb9fe0514adc38e1b.zip", "author": "Erick Rosete-Beas;Oier Mees;Gabriel Kalweit;Joschka Boedecker;Wolfram Burgard", "authorids": "~Erick_Rosete-Beas1;~Oier_Mees1;~Gabriel_Kalweit1;~Joschka_Boedecker1;~Wolfram_Burgard2", "gender": "M;M;;M;M", "homepage": "https://erickrosete.com/;https://www.oiermees.com/;https://nr.informatik.uni-freiburg.de/people/gabriel-kalweit;https://nr.informatik.uni-freiburg.de;https://www.utn.de/person/wolfram-burgard/", "dblp": ";190/8659;208/0991;84/5457;b/WolframBurgard", "google_scholar": "https://scholar.google.de/citations?user=5g-WFooAAAAJ;https://scholar.google.de/citations?user=sgsLkM0AAAAJ;xLNPWK8AAAAJ;https://scholar.google.de/citations?user=2mv2dDkAAAAJ;zj6FavAAAAAJ", "orcid": ";;;;0000-0002-5680-6500", "linkedin": "erickrosete/;oier-mees-a3069488;;;burgard/?originalSubdomain=de", "or_profile": "~Erick_Rosete-Beas1;~Oier_Mees1;~Gabriel_Kalweit1;~Joschka_B\u00f6decker1;~Wolfram_Burgard3", "aff": "Albert-Ludwigs-Universit\u00e4t Freiburg;CS Department, University of Freiburg, Germany, Albert-Ludwigs-Universit\u00e4t Freiburg;Universit\u00e4t Freiburg;Universit\u00e4t Freiburg;CS Department, University of Freiburg, Germany, Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;informatik.uni-freiburg.de;uni-freiburg.de;uni-freiburg.de;informatik.uni-freiburg.de", "position": "MS student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nrosete-beas2022latent,\ntitle={Latent Plans for Task-Agnostic Offline Reinforcement Learning},\nauthor={Erick Rosete-Beas and Oier Mees and Gabriel Kalweit and Joschka Boedecker and Wolfram Burgard},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ViYLaruFwN3}\n}", "github": "", "project": "", "reviewers": "3aE6;mMCb;jnvs;nPLz", "site": "https://openreview.net/forum?id=ViYLaruFwN3", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4087469931010367361&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Albert-Ludwigs-Universit\u00e4t Freiburg;University of Freiburg", "aff_unique_dep": ";CS Department", "aff_unique_url": "https://www.uni-freiburg.de;https://www.uni-freiburg.de", "aff_unique_abbr": "Albert-Ludwigs-Universit\u00e4t;UoF", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Freiburg;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "WJbw_C-pCox", "title": "Synthesizing Adversarial Visual Scenarios for Model-Based Robotic Control", "track": "main", "status": "Poster", "tldr": "", "abstract": "Today\u2019s robots often interface data-driven perception and planning models with classical model-predictive controllers (MPC). Often, such learned perception/planning models produce erroneous waypoint predictions on out-of-distribution (OoD) or even adversarial visual inputs, which increase control cost. However, today\u2019s methods to train robust perception models are largely task-agnostic \u2013 they augment a dataset using random image transformations or adversarial examples targeted at the vision model in isolation. As such, they often introduce pixel perturbations that are ultimately benign for control. In contrast to prior work that synthesizes adversarial examples for single-step vision tasks, our key contribution is to synthesize adversarial scenarios tailored to multi-step, model-based control. To do so, we use differentiable MPC methods to calculate the sensitivity of a model-based controller to errors in state estimation. We show that re-training vision models on these adversarial datasets improves control performance on OoD test scenarios by up to 36.2% compared to standard task-agnostic data augmentation. We demonstrate our method on examples of robotic navigation, manipulation in RoboSuite, and control of an autonomous air vehicle.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/57942a5f5e9b9e0ea83af365bc3933d7ba4560f9.zip", "author": "Shubhankar Agarwal;Sandeep P. Chinchali", "authorids": "~Shubhankar_Agarwal1;~Sandeep_P._Chinchali1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Shubhankar_Agarwal1;~Sandeep_P._Chinchali1", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nagarwal2022synthesizing,\ntitle={Synthesizing Adversarial Visual Scenarios for Model-Based Robotic Control},\nauthor={Shubhankar Agarwal and Sandeep P. Chinchali},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=WJbw_C-pCox}\n}", "github": "", "project": "", "reviewers": "2DMp;bmgX;rbua;bY11", "site": "https://openreview.net/forum?id=WJbw_C-pCox", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1168873474719145054&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5 }, { "id": "WbdaYyDkNZL", "title": "Don\u2019t Start From Scratch: Leveraging Prior Data to Automate Robotic Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "Incorporating prior data into robotic reinforcement learning enables autonomous learning, substantially improves sample-efficiency of learning, and enables better generalization.", "abstract": "Reinforcement learning (RL) algorithms hold the promise of enabling autonomous skill acquisition for robotic systems. However, in practice, real-world robotic RL typically requires time consuming data collection and frequent human intervention to reset the environment. Moreover, robotic policies learned with RL often fail when deployed beyond the carefully controlled setting in which they were learned.\nIn this work, we study how these challenges of real-world robotic learning can all be tackled by effective utilization of diverse offline datasets collected from previously seen tasks. When faced with a new task, our system adapts previously learned skills to quickly learn to both perform the new task and return the environment to an initial state, effectively performing its own environment reset. \nOur empirical results demonstrate that incorporating prior data into robotic reinforcement learning enables autonomous learning, substantially improves sample-efficiency of learning, and enables better generalization.", "keywords": "autonomous RL;offline RL;reset-free manipulation", "primary_area": "", "supplementary_material": "/attachment/b496bc8c4620fea2fe29b576d9c2c32b73246a20.zip", "author": "Homer Rich Walke;Jonathan Heewon Yang;Albert Yu;Aviral Kumar;J\u0119drzej Orbik;Avi Singh;Sergey Levine", "authorids": "~Homer_Rich_Walke1;~Jonathan_Heewon_Yang1;~Albert_Yu1;~Aviral_Kumar2;~J\u0119drzej_Orbik1;~Avi_Singh1;~Sergey_Levine1", "gender": "M;M;;M;M;;M", "homepage": "https://homerwalke.com;;;https://aviralkumar2907.github.io/;;https://www.avisingh.org/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "279/6795;;61/5253.html;202/7961;;https://dblp.org/pers/s/Singh:Avi.html;80/7594", "google_scholar": "ZWH5jCwAAAAJ;;ZzURcb4AAAAJ;;;C2_ZXdcAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;;;", "linkedin": ";jonathan-yang-7b5542124/;alberty101/;;jedrzejorbik/;;", "or_profile": "~Homer_Rich_Walke1;~Jonathan_Heewon_Yang1;~Albert_Yu1;~Aviral_Kumar2;~J\u0119drzej_Orbik1;~Avi_Singh1;~Sergey_Levine1", "aff": "University of California, Berkeley;Electrical Engineering & Computer Science Department, University of California, Berkeley;University of Texas at Austin;University of California, Berkeley;;Google;Google", "aff_domain": "berkeley.edu;eecs.berkeley.edu;utexas.edu;berkeley.edu;;google.com;google.com", "position": "PhD student;Undergrad student;PhD student;PhD student;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nwalke2022dont,\ntitle={Don{\\textquoteright}t Start From Scratch: Leveraging Prior Data to Automate Robotic Reinforcement Learning},\nauthor={Homer Rich Walke and Jonathan Heewon Yang and Albert Yu and Aviral Kumar and J{\\k{e}}drzej Orbik and Avi Singh and Sergey Levine},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=WbdaYyDkNZL}\n}", "github": "", "project": "", "reviewers": "CCLn;YCu7;BwFL;rVQq", "site": "https://openreview.net/forum?id=WbdaYyDkNZL", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16819805877850340128&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;2;2", "aff_unique_norm": "University of California, Berkeley;University of Texas at Austin;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;UT Austin;Google", "aff_campus_unique_index": "0;0;1;0;2;2", "aff_campus_unique": "Berkeley;Austin;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "WkbvHMQL6uJ", "title": "Where To Start? Transferring Simple Skills to Complex Environments", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robot learning provides a number of ways to teach robots simple skills, such as grasping. However, these skills are usually trained in open, clutter-free environments, and therefore would likely cause undesirable collisions in more complex, cluttered environments. In this work, we introduce an affordance model based on a graph representation of an environment, which is optimised during deployment to find suitable robot configurations to start a skill from, such that the skill can be executed without any collisions. We demonstrate that our method can generalise a priori acquired skills to previously unseen cluttered and constrained environments, in simulation and in the real world, for both a grasping and a placing task. ", "keywords": "Robot manipulation;planning;obstacle avoidance", "primary_area": "", "supplementary_material": "/attachment/6630ee5e4b29a8fa18f934d83f535d0a42763f80.zip", "author": "Vitalis Vosylius;Edward Johns", "authorids": "~Vitalis_Vosylius1;~Edward_Johns1", "gender": "M;M", "homepage": "https://www.google.com/;https://www.robot-learning.uk", "dblp": "272/8659;68/9968", "google_scholar": "nktafp8AAAAJ;https://scholar.google.co.uk/citations?user=sMIUkiQAAAAJ", "orcid": ";0000-0002-8914-8786", "linkedin": ";https://uk.linkedin.com/in/edward-johns-1b24845a", "or_profile": "~Vitalis_Vosylius1;~Edward_Johns1", "aff": "Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;imperial.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nvosylius2022where,\ntitle={Where To Start? Transferring Simple Skills to Complex Environments},\nauthor={Vitalis Vosylius and Edward Johns},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=WkbvHMQL6uJ}\n}", "github": "", "project": "", "reviewers": "oZZf;dTVn;rXLB;jW3a", "site": "https://openreview.net/forum?id=WkbvHMQL6uJ", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16613969934695816466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "X4228W0QpvN", "title": "Safe Robot Learning in Assistive Devices through Neural Network Repair", "track": "main", "status": "Poster", "tldr": "In this paper, we derive an algorithm for training neural networks controllers to satisfy given safety specifications, in addition to fitting the given training/test data.", "abstract": "Assistive robotic devices are a particularly promising field of application for neural networks (NN) due to the need for personalization and hard-to-model human-machine interaction dynamics. However, NN based estimators and controllers may produce potentially unsafe outputs over previously unseen data points. In this paper, we introduce an algorithm for updating NN control policies to satisfy a given set of formal safety constraints, while also optimizing the original loss function. Given a set of mixed-integer linear constraints, we define the NN repair problem as a Mixed Integer Quadratic Program (MIQP). In extensive experiments, we demonstrate the efficacy of our repair method in generating safe policies for a lower-leg prosthesis.", "keywords": "Imitation Learning;Assistive Robotics;Safety;Prosthesis", "primary_area": "", "supplementary_material": "/attachment/dfdaf7d7731c5eab1bb6bd3e4862f0e61873bfd8.zip", "author": "Keyvan Majd;Geoffrey Mitchell Clark;Tanmay Khandait;Siyu Zhou;Sriram Sankaranarayanan;Georgios Fainekos;Heni Amor", "authorids": "~Keyvan_Majd1;~Geoffrey_Mitchell_Clark1;tkhandai@asu.edu;szhou22@asu.edu;~Sriram_Sankaranarayanan1;~Georgios_Fainekos1;~Heni_Amor1", "gender": "M;M;;;M;M;M", "homepage": ";https://www.linkedin.com/in/geoffrey-m-clark/;;;http://www.cs.colorado.edu/~srirams;https://www.fainekos.net/;https://cidse.engineering.asu.edu/directory/ben-amor-heni/", "dblp": ";;;;82/1542.html;34/4314;18/3990", "google_scholar": "NeFHXVAAAAAJ;r99xpsAAAAAJ;;;V8RKLEsAAAAJ;https://scholar.google.com.tw/citations?user=WGRYgBEAAAAJ;https://scholar.google.com.tw/citations?user=u_7S7VYAAAAJ", "orcid": ";;;;0000-0001-7315-4340;0000-0002-0456-2129;", "linkedin": ";;;;;fainekos/;", "or_profile": "~Keyvan_Majd1;~Geoffrey_Mitchell_Clark1;tkhandai@asu.edu;szhou22@asu.edu;~Sriram_Sankaranarayanan1;~Georgios_Fainekos1;~Heni_Amor1", "aff": ";Arizona State University;;;University of Colorado at Boulder;Arizona State University;Arizona State University", "aff_domain": ";asu.edu;;;colorado.edu;asu.edu;asu.edu", "position": ";PhD student;;;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nmajd2022safe,\ntitle={Safe Robot Learning in Assistive Devices through Neural Network Repair},\nauthor={Keyvan Majd and Geoffrey Mitchell Clark and Tanmay Khandait and Siyu Zhou and Sriram Sankaranarayanan and Georgios Fainekos and Heni Amor},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=X4228W0QpvN}\n}", "github": "https://github.com/k1majd/NNRepLayer.git", "project": "", "reviewers": "pwYF;4TJw;qM6j;TbCe", "site": "https://openreview.net/forum?id=X4228W0QpvN", "pdf_size": 0, "rating": "1;6;6;6", "confidence": "", "rating_avg": 4.75, "confidence_avg": 0, "replies_avg": 25, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1261932134600028954&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Arizona State University;University of Colorado", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.colorado.edu", "aff_unique_abbr": "ASU;CU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boulder", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "X6CjiTWVRVr", "title": "Out-of-Dynamics Imitation Learning from Multimodal Demonstrations", "track": "main", "status": "Poster", "tldr": "We develop a transferability measurement learning by a sequence-based contrastive clustering algorithm and a adversarial-learning based algorithm to address out-of-dynamics imitation learning.", "abstract": "Existing imitation learning works mainly assume that the demonstrator who collects demonstrations shares the same dynamics as the imitator. However, the assumption limits the usage of imitation learning, especially when collecting demonstrations for the imitator is difficult. In this paper, we study out-of-dynamics imitation learning (OOD-IL), which relaxes the assumption to that the demonstrator and the imitator have the same state spaces but could have different action spaces and dynamics. OOD-IL enables imitation learning to utilize demonstrations from a wide range of demonstrators but introduces a new challenge: some demonstrations cannot be achieved by the imitator due to the different dynamics. Prior works try to filter out such demonstrations by feasibility measurements, but ignore the fact that the demonstrations exhibit a multimodal distribution since the different demonstrators may take different policies in different dynamics.\nWe develop a better transferability measurement to tackle this newly-emerged challenge. We firstly design a novel sequence-based contrastive clustering algorithm to cluster demonstrations from the same mode to avoid the mutual interference of demonstrations from different modes, and then learn the transferability of each demonstration with an adversarial-learning based algorithm in each cluster. Experiment results on several MuJoCo environments, a driving environment, and a simulated robot environment show that the proposed transferability measurement more accurately finds and down-weights non-transferable demonstrations and outperforms prior works on the final imitation learning performance. We show the videos of our experiment results on our website.", "keywords": "Imitation Learning;Out-of-Dynamics Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/aa03bf8309f03835cb8cc4940fc69deaa05e0cf9.zip", "author": "Yiwen Qiu;Jialong Wu;Zhangjie Cao;Mingsheng Long", "authorids": "~Yiwen_Qiu1;~Jialong_Wu1;~Zhangjie_Cao1;~Mingsheng_Long5", "gender": "F;M;M;M", "homepage": "https://evieq01.github.io/evieqiu.github.io/;https://manchery.github.io/;https://caozhangjie.github.io/;http://ise.thss.tsinghua.edu.cn/~mlong", "dblp": "159/9832;73/498-1.html;https://dblp.org/pers/hd/c/Cao:Zhangjie;74/9023", "google_scholar": "tumZYG0AAAAJ;FfTZ66gAAAAJ;pA-TqMEAAAAJ;_MjXpXkAAAAJ", "orcid": ";0009-0008-7846-053X;;0000-0002-5412-9120", "linkedin": ";;;", "or_profile": "~Yiwen_Qiu1;~Jialong_Wu1;~Zhangjie_Cao1;~Mingsheng_Long2", "aff": "Tsinghua University;Tsinghua University;Stanford University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;stanford.edu;tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nqiu2022outofdynamics,\ntitle={Out-of-Dynamics Imitation Learning from Multimodal Demonstrations},\nauthor={Yiwen Qiu and Jialong Wu and Zhangjie Cao and Mingsheng Long},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=X6CjiTWVRVr}\n}", "github": "https://github.com/EvieQ01/OODIL", "project": "", "reviewers": "M2Dq;Zz2p;sqom;UJgv", "site": "https://openreview.net/forum?id=X6CjiTWVRVr", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10899829086978679230&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tsinghua University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.stanford.edu", "aff_unique_abbr": "THU;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "X_qYPtJLaX8", "title": "PRISM: Probabilistic Real-Time Inference in Spatial World Models", "track": "main", "status": "Oral", "tldr": "We propose a probabilistic real-time filtering inference for a generative spatial model that features rendering and dynamics.", "abstract": "We introduce PRISM, a method for real-time filtering in a probabilistic generative model of agent motion and visual perception. Previous approaches either lack uncertainty estimates for the map and agent state, do not run in real-time, do not have a dense scene representation or do not model agent dynamics. Our solution reconciles all of these aspects. We start from a predefined state-space model which combines differentiable rendering and 6-DoF dynamics. Probabilistic inference in this model amounts to simultaneous localisation and mapping (SLAM) and is intractable. We use a series of approximations to Bayesian inference to arrive at probabilistic map and state estimates. We take advantage of well-established methods and closed-form updates, preserving accuracy and enabling real-time capability. The proposed solution runs at 10Hz real-time and is similarly accurate to state-of-the-art SLAM in small to medium-sized indoor environments, with high-speed UAV and handheld camera agents (Blackbird, EuRoC and TUM-RGBD).\n", "keywords": "generative model;SLAM;Bayes filter;uncertainty;differentiable rendering", "primary_area": "", "supplementary_material": "/attachment/c804b523a89e5c32ca3e1f480378c86f6584b8fb.zip", "author": "Atanas Mirchev;Baris Kayalibay;Ahmed Agha;Patrick van der Smagt;Daniel Cremers;Justin Bayer", "authorids": "~Atanas_Mirchev1;~Baris_Kayalibay1;~Ahmed_Agha1;~Patrick_van_der_Smagt1;~Daniel_Cremers1;~Justin_Bayer1", "gender": "M;;M;M;M;M", "homepage": ";;;https://argmax.org;https://vision.in.tum.de/members/cremers;", "dblp": "171/4448.html;194/2562;;24/6573.html;c/DanielCremers;", "google_scholar": ";;It8RcRIAAAAJ;https://scholar.google.de/citations?user=5ybzvbsAAAAJ;cXQciMEAAAAJ;https://scholar.google.de/citations?user=kczEEFAAAAAJ", "orcid": "0000-0003-2890-5015;;;0000-0003-4418-4916;;", "linkedin": ";;ahmed-agha-5862b0195/?originalSubdomain=de;smagt/;;", "or_profile": "~Atanas_Mirchev1;~Baris_Kayalibay1;~Ahmed_Agha1;~Patrick_van_der_Smagt1;~Daniel_Cremers1;~Justin_Bayer1", "aff": "Machine Learning Research Lab, Volkswagen Group;Data Lab, Volkswagen Group;Volkswagen Group;Machine Learning Research Lab; Volkswagen Group;Technical University Munich;VW Group", "aff_domain": "argmax.ai;volkswagen.de;volkswagen.de;volkswagen.de;tum.de;volkswagen.de", "position": "PhD student;PhD student;Research Assistant;Full Professor;Full Professor;research scientist", "bibtex": "@inproceedings{\nmirchev2022prism,\ntitle={{PRISM}: Probabilistic Real-Time Inference in Spatial World Models},\nauthor={Atanas Mirchev and Baris Kayalibay and Ahmed Agha and Patrick van der Smagt and Daniel Cremers and Justin Bayer},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=X_qYPtJLaX8}\n}", "github": "", "project": "", "reviewers": "6pXK;ZggF;uCb5", "site": "https://openreview.net/forum?id=X_qYPtJLaX8", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 4, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5530325309982077464&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "Volkswagen Group;Machine Learning Research Lab;Technical University of Munich", "aff_unique_dep": "Machine Learning Research Lab;Machine Learning Research;", "aff_unique_url": "https://www.volkswagenag.com;;https://www.tum.de", "aff_unique_abbr": "Volkswagen;;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany;" }, { "id": "Xo3eOibXCQ8", "title": "Learning with Muscles: Benefits for Data-Efficiency and Robustness in Anthropomorphic Tasks", "track": "main", "status": "Poster", "tldr": "We showcase the benefits of muscle-actuation for learning efficiency and robustness in a multitude of anthropomorphic tasks.", "abstract": "Humans are able to outperform robots in terms of robustness, versatility, and learning of new tasks in a wide variety of movements. We hypothesize that highly nonlinear muscle dynamics play a large role in providing inherent stability, which is favorable to learning. While recent advances have been made in applying modern learning techniques to muscle-actuated systems both in simulation as well as in robotics, so far, no detailed analysis has been performed to show the benefits of muscles in this setting. Our study closes this gap by investigating core robotics challenges and comparing the performance of different actuator morphologies in terms of data-efficiency, hyperparameter sensitivity, and robustness.", "keywords": "reinforcement learning;model predictive control;actuator morphology;morphological computation", "primary_area": "", "supplementary_material": "/attachment/f3f465a79cdb74ca682e817fbe41bf3d448c3573.zip", "author": "Isabell Wochner;Pierre Schumacher;Georg Martius;Dieter B\u00fcchler;Syn Schmitt;Daniel Haeufle", "authorids": "~Isabell_Wochner1;~Pierre_Schumacher1;~Georg_Martius1;~Dieter_B\u00fcchler1;~Syn_Schmitt1;~Daniel_Haeufle1", "gender": "F;M;M;M;M;", "homepage": "https://www.imsb.uni-stuttgart.de/team/Wochner/;https://al.is.mpg.de/person/pschumacher;https://uni-tuebingen.de/de/264672;http://embodied.ml/;https://www.imsb.uni-stuttgart.de/research/cbb/;", "dblp": ";;47/2706;181/4076.html;;", "google_scholar": ";;https://scholar.google.de/citations?user=b-JF-UIAAAAJ;https://scholar.google.de/citations?user=8HYQ1tgAAAAJ;;", "orcid": "0000-0002-2820-5791;;;;;", "linkedin": ";;;;;", "or_profile": "~Isabell_Wochner1;~Pierre_Schumacher1;~Georg_Martius1;~Dieter_B\u00fcchler1;~Syn_Schmitt1;~Daniel_Haeufle1", "aff": "Universit\u00e4t Stuttgart;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Universit\u00e4t Stuttgart;", "aff_domain": "uni-stuttgart.de;tuebingen.mpg.de;tuebingen.mpg.de;tuebingen.mpg.de;uni-stuttgart.de;", "position": "PhD student;PhD Student;Assistant Professor;Group Leader;Full Professor;", "bibtex": "@inproceedings{\nwochner2022learning,\ntitle={Learning with Muscles: Benefits for Data-Efficiency and Robustness in Anthropomorphic Tasks},\nauthor={Isabell Wochner and Pierre Schumacher and Georg Martius and Dieter B{\\\"u}chler and Syn Schmitt and Daniel Haeufle},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Xo3eOibXCQ8}\n}", "github": "https://sites.google.com/view/learning-with-muscles", "project": "", "reviewers": "KLzq;SnhT;vcXi;XEHZ", "site": "https://openreview.net/forum?id=Xo3eOibXCQ8", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1099918299519152833&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Stuttgart;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.uni-stuttgart.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Uni Stuttgart;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "Xux9gSS7WE0", "title": "In-Hand Object Rotation via Rapid Motor Adaptation", "track": "main", "status": "Poster", "tldr": "We present an approach for general in-hand rotation for a diverse set of objects.", "abstract": "Generalized in-hand manipulation has long been an unsolved challenge of robotics. As a small step towards this grand goal, we demonstrate how to design and learn a simple adaptive controller to achieve in-hand object rotation using only fingertips. The controller is trained entirely in simulation on only cylindrical objects, which then \u2013 without any fine-tuning \u2013 can be directly deployed to a real robot hand to rotate dozens of objects with diverse sizes, shapes, and weights over the z-axis. This is achieved via rapid online adaptation of the robot\u2019s controller to the object properties using only proprioception history. Furthermore, natural and stable finger gaits automatically emerge from training the control policy via reinforcement learning. Code and more videos are available at https://github.com/HaozhiQi/Hora .", "keywords": "Dexterous In-Hand Manipulation;Object Rotation;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/3ec890ea03e9a73d9c61812f602702aa27e2a3bd.zip", "author": "Haozhi Qi;Ashish Kumar;Roberto Calandra;Yi Ma;Jitendra Malik", "authorids": "~Haozhi_Qi1;~Ashish_Kumar1;~Roberto_Calandra1;~Yi_Ma4;~Jitendra_Malik2", "gender": "M;M;M;M;M", "homepage": "https://haozhi.io/;https://ashish-kmr.github.io/;https://www.robertocalandra.com;http://people.eecs.berkeley.edu/~yima/;https://people.eecs.berkeley.edu/~malik/", "dblp": "190/7802;34/5378;118/8239;;58/2944", "google_scholar": "https://scholar.google.com.hk/citations?user=iyVHKkcAAAAJ;Oj-2ZNEAAAAJ;FdE3LOEAAAAJ;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ;oY9R5YQAAAAJ", "orcid": ";;0000-0001-9430-8433;;0000-0003-3695-1580", "linkedin": ";;rcalandra;;", "or_profile": "~Haozhi_Qi1;~Ashish_Kumar1;~Roberto_Calandra1;~Yi_Ma4;~Jitendra_Malik2", "aff": "University of California, Berkeley;University of California, Berkeley;Meta Facebook;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;fb.com;berkeley.edu;berkeley.edu", "position": "PhD student;Graduate Student;Research Scientist;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqi2022inhand,\ntitle={In-Hand Object Rotation via Rapid Motor Adaptation},\nauthor={Haozhi Qi and Ashish Kumar and Roberto Calandra and Yi Ma and Jitendra Malik},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Xux9gSS7WE0}\n}", "github": "https://github.com/HaozhiQi/Hora", "project": "", "reviewers": "W5ch;p2uD;5UyE;arnN", "site": "https://openreview.net/forum?id=Xux9gSS7WE0", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13498256009534931601&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.berkeley.edu;https://meta.com", "aff_unique_abbr": "UC Berkeley;Meta", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Y42uoIekm5b", "title": "JFP: Joint Future Prediction with Interactive Multi-Agent Modeling for Autonomous Driving", "track": "main", "status": "Poster", "tldr": "Using learned interaction among agents for multi-agent motion forecasting achieves sota results on WOMD interactive split.", "abstract": "We propose \\textit{JFP}, a Joint Future Prediction model that can learn to generate accurate and consistent multi-agent future trajectories. For this task, many different methods have been proposed to capture social interactions in the encoding part of the model, however, considerably less focus has been placed on representing interactions in the decoder and output stages. As a result, the predicted trajectories are not necessarily consistent with each other, and often result in unrealistic trajectory overlaps. In contrast, we propose an end-to-end trainable model that learns directly the interaction between pairs of agents in a structured, graphical model formulation in order to generate consistent future trajectories. It sets new state-of-the-art results on Waymo Open Motion Dataset (WOMD) for the interactive setting. We also investigate a more complex multi-agent setting for both WOMD and a larger internal dataset, where our approach improves significantly on the trajectory overlap metrics while obtaining on-par or better performance on single-agent trajectory metrics.", "keywords": "Self-driving;motion forecasting;interactive prediction", "primary_area": "", "supplementary_material": "/attachment/e8e540150ea82dc0f3a38f2f2dcf52cb8d162784.zip", "author": "Wenjie Luo;Cheol Park;Andre Cornman;Benjamin Sapp;Dragomir Anguelov", "authorids": "~Wenjie_Luo1;cheolhop@waymo.com;~Andre_Cornman1;~Benjamin_Sapp3;~Dragomir_Anguelov1", "gender": ";;;M;M", "homepage": ";;;;", "dblp": ";;;54/5582;a/DragomirAnguelov", "google_scholar": ";;;aPqcyU4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;andre-cornman-65304589/;;dragomiranguelov/", "or_profile": "~Wenjie_Luo1;cheolhop@waymo.com;~Andre_Cornman1;~Benjamin_Sapp3;~Dragomir_Anguelov1", "aff": ";;;Waymo;Waymo", "aff_domain": ";;;waymo.com;waymo.com", "position": ";;;Researcher;Researcher", "bibtex": "@inproceedings{\nluo2022jfp,\ntitle={{JFP}: Joint Future Prediction with Interactive Multi-Agent Modeling for Autonomous Driving},\nauthor={Wenjie Luo and Cheol Park and Andre Cornman and Benjamin Sapp and Dragomir Anguelov},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Y42uoIekm5b}\n}", "github": "", "project": "", "reviewers": "2W5p;DT7Z;jdv8;CkUV", "site": "https://openreview.net/forum?id=Y42uoIekm5b", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9571190497350186237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Waymo", "aff_unique_dep": "", "aff_unique_url": "https://www.waymo.com", "aff_unique_abbr": "Waymo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Y7xQsyGEQ0C", "title": "Adapting Neural Models with Sequential Monte Carlo Dropout", "track": "main", "status": "Poster", "tldr": "We infer dropout masks at run time to adapt a neural model to changing conditions. Masks also capture context-dependen information.", "abstract": "The ability to adapt to changing environments and settings is essential for robots acting in dynamic and unstructured environments or working alongside humans with varied abilities or preferences. This work introduces an extremely simple and effective approach to adapting neural models in response to changing settings, without requiring any specialised meta-learning strategies. We first train a standard network using dropout, which is analogous to learning an ensemble of predictive models or distribution over predictions. At run-time, we use a particle filter to maintain a distribution over dropout masks to adapt the neural model to changing settings in an online manner. Experimental results show improved performance in control problems requiring both online and look-ahead prediction, and showcase the interpretability of the inferred masks in a human behaviour modelling task for drone tele-operation. ", "keywords": "Model Adaptation;Meta-Learning;Online Robot Control and Prediction", "primary_area": "", "supplementary_material": "/attachment/bb1c02ec0e781a269fc70d0467e21781f441b182.zip", "author": "Pamela Carreno;Dana Kulic;Michael Burke", "authorids": "~Pamela_Carreno1;~Dana_Kulic1;~Michael_Burke1", "gender": "F;F;", "homepage": "https://research.monash.edu/en/persons/pamela-carreno-medrano;https://www.monash.edu/engineering/danakulic;http://michaelburke.co.za", "dblp": ";;70/4353", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=sL0KJlQAAAAJ;https://scholar.google.co.uk/citations?user=Abz56f4AAAAJ", "orcid": ";;0000-0001-7426-1498", "linkedin": ";;", "or_profile": "~Pamela_Carreno1;~Dana_Kulic1;~Michael_Burke1", "aff": "Monash University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;monash.edu", "position": "Lecturer;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ncarreno2022adapting,\ntitle={Adapting Neural Models with Sequential Monte Carlo Dropout},\nauthor={Pamela Carreno and Dana Kulic and Michael Burke},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Y7xQsyGEQ0C}\n}", "github": "", "project": "", "reviewers": "iKY7;PBaL;YMaq;J9eA", "site": "https://openreview.net/forum?id=Y7xQsyGEQ0C", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3246772912109346308&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "Y_YUEEQMjQK", "title": "Rethinking Optimization with Differentiable Simulation from a Global Perspective", "track": "main", "status": "Oral", "tldr": "We show that differentiable simulations present difficult optimization landscapes and address this with a method that combines global and local optimization.", "abstract": "Differentiable simulation is a promising toolkit for fast gradient-based policy optimization and system identification. However, existing approaches to differentiable simulation have largely tackled scenarios where obtaining smooth gradients has been relatively easy, such as systems with mostly smooth dynamics. In this work, we study the challenges that differentiable simulation presents when it is not feasible to expect that a single descent reaches a global optimum, which is often a problem in contact-rich scenarios. We analyze the optimization landscapes of diverse scenarios that contain both rigid bodies and deformable objects. In dynamic environments with highly deformable objects and fluids, differentiable simulators produce rugged landscapes with nonetheless useful gradients in some parts of the space. We propose a method that combines Bayesian optimization with semi-local 'leaps' to obtain a global search method that can use gradients effectively, while also maintaining robust performance in regions with noisy gradients. We show that our approach outperforms several gradient-based and gradient-free baselines on an extensive set of experiments in simulation, and also validate the method using experiments with a real robot and deformables.", "keywords": "Differentiable simulation;Global optimization;Deformable Objects", "primary_area": "", "supplementary_material": "/attachment/79af4fdbf98cf866d1c4df25768717187774b32c.zip", "author": "Rika Antonova;Jingyun Yang;Krishna Murthy Jatavallabhula;Jeannette Bohg", "authorids": "~Rika_Antonova1;~Jingyun_Yang1;~Krishna_Murthy_Jatavallabhula1;~Jeannette_Bohg1", "gender": ";M;;", "homepage": ";https://yjy0625.github.io;;https://web.stanford.edu/~bohg/", "dblp": ";;;52/7377", "google_scholar": ";7XBAa2QAAAAJ;;rjnJnEkAAAAJ", "orcid": ";;;0000-0002-4921-7193", "linkedin": ";;;", "or_profile": "~Rika_Antonova1;~Jingyun_Yang1;~Krishna_Murthy_Jatavallabhula1;~Jeannette_Bohg1", "aff": ";Stanford University;;Stanford University", "aff_domain": ";stanford.edu;;stanford.edu", "position": ";PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nantonova2022rethinking,\ntitle={Rethinking Optimization with Differentiable Simulation from a Global Perspective},\nauthor={Rika Antonova and Jingyun Yang and Krishna Murthy Jatavallabhula and Jeannette Bohg},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Y_YUEEQMjQK}\n}", "github": "", "project": "", "reviewers": "LcC1;jekJ;SNTh;LRvk", "site": "https://openreview.net/forum?id=Y_YUEEQMjQK", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1784131642464785779&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "YmJi0bTfeNX", "title": "TAX-Pose: Task-Specific Cross-Pose Estimation for Robot Manipulation", "track": "main", "status": "Poster", "tldr": "Using dense residual correspondences, we estimate task-specific object relationships that generalize to novel objects.", "abstract": "How do we imbue robots with the ability to efficiently manipulate unseen objects and transfer relevant skills based on demonstrations? End-to-end learning methods often fail to generalize to novel objects or unseen configurations. Instead, we focus on the task-specific pose relationship between relevant parts of interacting objects. We conjecture that this relationship is a generalizable notion of a manipulation task that can transfer to new objects in the same category; examples include the relationship between the pose of a pan relative to an oven or the pose of a mug relative to a mug rack. We call this task-specific pose relationship \u201ccross-pose\u201d and provide a mathematical definition of this concept. We propose a vision-based system that learns to estimate the cross-pose between two objects for a given manipulation task using learned cross-object correspondences. The estimated cross-pose is then used to guide a downstream motion planner to manipulate the objects into the desired pose relationship (placing a pan into the oven or the mug onto the mug rack). We demonstrate our method\u2019s capability to generalize to unseen objects, in some cases after training on only 10 demonstrations in the real world. Results show that our system achieves state-of-the-art performance in both simulated and real-world experiments across a number of tasks. Supplementary information and videos can be found at https://sites.google.com/view/tax-pose/home.", "keywords": "Learning from Demonstration;Manipulation;3D Learning", "primary_area": "", "supplementary_material": "/attachment/f197e8fb27d3df9d34132bdd1b696039ea7ac452.zip", "author": "Chuer Pan;Brian Okorn;Harry Zhang;Ben Eisner;David Held", "authorids": "~Chuer_Pan1;~Brian_Okorn1;~Harry_Zhang2;~Ben_Eisner1;~David_Held1", "gender": "F;M;M;M;M", "homepage": "http://chuerpan.com/;https://brianokorn.com;https://harryzhangog.github.io/;;http://davheld.github.io/", "dblp": ";91/9610;;;22/11147", "google_scholar": "HBMZ02EAAAAJ;uV7RuGwAAAAJ;e-p7KiUAAAAJ;RWe-v0UAAAAJ;0QtU-NsAAAAJ", "orcid": ";;;;", "linkedin": ";brianokornrobotics/;;;", "or_profile": "~Chuer_Pan1;~Brian_Okorn1;~Harry_Zhang2;~Ben_Eisner1;~David_Held1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu", "position": "MS student;PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npan2022taxpose,\ntitle={{TAX}-Pose: Task-Specific Cross-Pose Estimation for Robot Manipulation},\nauthor={Chuer Pan and Brian Okorn and Harry Zhang and Ben Eisner and David Held},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=YmJi0bTfeNX}\n}", "github": "", "project": "", "reviewers": "4CrR;gUTd;d5cK;r914", "site": "https://openreview.net/forum?id=YmJi0bTfeNX", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4296220255675679509&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Z8mqodASTQd", "title": "Domain Adaptation and Generalization: A Low-Complexity Approach", "track": "main", "status": "Poster", "tldr": "", "abstract": "Well-performing deep learning methods are essential in today's perception of robotic systems such as autonomous driving vehicles.\nOngoing research is due to the real-life demands for robust deep learning models against numerous domain changes and cheap training processes to avoid costly manual-labeling efforts.\nThese requirements are addressed by unsupervised domain adaptation methods, in particular for synthetic to real-world domain changes.\nRecent top-performing approaches are hybrids consisting of multiple adaptation technologies and complex training processes.\n\nIn contrast, this work proposes EasyAdap, a simple and easy-to-use unsupervised domain adaptation method achieving near state-of-the-art performance on the synthetic to real-world domain change.\nOur evaluation consists of a comparison to numerous top-performing methods, and it shows the competitiveness and further potential of domain adaptation and domain generalization capabilities of our method.\nWe contribute and focus on an extensive discussion revealing possible reasons for domain generalization capabilities, which is necessary to satisfy real-life application's demands.\n", "keywords": "unsupervised domain adaptation;semantic segmentation;domain generalization", "primary_area": "", "supplementary_material": "/attachment/00ba2861459c8f17dc24679ea158da197c57c214.zip", "author": "Joshua Niemeijer;J\u00f6rg Peter Sch\u00e4fer", "authorids": "~Joshua_Niemeijer1;joerg.schaefer@dlr.de", "gender": "Not Specified;", "homepage": ";", "dblp": "254/6556;", "google_scholar": "SK0mAJ0AAAAJ;", "orcid": "0000-0002-2417-8749;", "linkedin": ";", "or_profile": "~Joshua_Niemeijer1;joerg.schaefer@dlr.de", "aff": "German Aerospace Center (DLR);", "aff_domain": "dlr.de;", "position": "Researcher;", "bibtex": "@inproceedings{\nniemeijer2022domain,\ntitle={Domain Adaptation and Generalization: A Low-Complexity Approach},\nauthor={Joshua Niemeijer and J{\\\"o}rg Peter Sch{\\\"a}fer},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=Z8mqodASTQd}\n}", "github": "", "project": "", "reviewers": "fxUx;vMEa;geXK", "site": "https://openreview.net/forum?id=Z8mqodASTQd", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6900438902171459873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "German Aerospace Center", "aff_unique_dep": "", "aff_unique_url": "https://www.dlr.de", "aff_unique_abbr": "DLR", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "ZAbh4Zp0Blt", "title": "PoET: Pose Estimation Transformer for Single-View, Multi-Object 6D Pose Estimation", "track": "main", "status": "Poster", "tldr": "Novel transformer-based network for RGB-only, 6 degree of freedom multi-object pose estimation while not relying on depth maps or 3D object models", "abstract": "Accurate 6D object pose estimation is an important task for a variety of robotic applications such as grasping or localization. It is a challenging task due to object symmetries, clutter and occlusion, but it becomes more challenging when additional information, such as depth and 3D models, is not provided. We present a transformer-based approach that takes an RGB image as input and predicts a 6D pose for each object in the image. Besides the image, our network does not require any additional information such as depth maps or 3D object models. First, the image is passed through an object detector to generate feature maps and to detect objects. Then, the feature maps are fed into a transformer with the detected bounding boxes as additional information. Afterwards, the output object queries are processed by a separate translation and rotation head. We achieve state-of-the-art results for RGB-only approaches on the challenging YCB-V dataset. We illustrate the suitability of the resulting model as pose sensor for a 6-DoF state estimation task. Code is available at https://github.com/aau-cns/poet .", "keywords": "6D Pose Estimation;Transformer;Object-Relative Localization", "primary_area": "", "supplementary_material": "/attachment/b6ad8fef97c72f018cb4bdd6f6c4d3f74797353f.zip", "author": "Thomas Georg Jantos;Mohamed Amin Hamdad;Wolfgang Granig;Stephan Weiss;Jan Steinbrener", "authorids": "~Thomas_Georg_Jantos1;~Mohamed_Amin_Hamdad1;wolfgang.granig@infineon.com;~Stephan_Weiss1;~Jan_Steinbrener1", "gender": "M;M;;M;M", "homepage": ";;;https://sst.aau.at/cns;https://sst.aau.at/cns", "dblp": "216/2188;;;19/4190;236/6004", "google_scholar": "g38xaeAAAAAJ;;;https://scholar.google.ch/citations?user=dQmvEyUAAAAJ;https://scholar.google.at/citations?user=oUlJ_1cAAAAJ", "orcid": "0009-0007-6066-0931;;;;0000-0002-2465-2527", "linkedin": "thomas-jantos-90b477151/;mohamed-amin-hamdad-36742a147;;stephan-weiss-42319113;jansteinbrener/", "or_profile": "~Thomas_Georg_Jantos1;~Mohamed_Amin_Hamdad1;wolfgang.granig@infineon.com;~Stephan_Weiss1;~Jan_Steinbrener1", "aff": "University of Klagenfurt;;;University of Klagenfurt;Alpen-Adria Universit\u00e4t Klagenfurt", "aff_domain": "aau.at;;;aau.at;aau.at", "position": "PhD student;;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njantos2022poet,\ntitle={Po{ET}: Pose Estimation Transformer for Single-View, Multi-Object 6D Pose Estimation},\nauthor={Thomas Georg Jantos and Mohamed Amin Hamdad and Wolfgang Granig and Stephan Weiss and Jan Steinbrener},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ZAbh4Zp0Blt}\n}", "github": "https://github.com/aau-cns/poet", "project": "", "reviewers": "pB8h;v6yy;6Xj1;jp2U", "site": "https://openreview.net/forum?id=ZAbh4Zp0Blt", "pdf_size": 0, "rating": "4;4;4;6", "confidence": "", "rating_avg": 4.5, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10814373409037463202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Klagenfurt;Alpen-Adria-Universit\u00e4t Klagenfurt", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-klagenfurt.at;https://www.aau.at", "aff_unique_abbr": "Uni Klagenfurt;AAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Austria" }, { "id": "ZJYbW8Cwys", "title": "Real-Time Generation of Time-Optimal Quadrotor Trajectories with Semi-Supervised Seq2Seq Learning", "track": "main", "status": "Poster", "tldr": "Seq2Seq learning for time-optimal quadrotor trajectory generation", "abstract": "Generating time-optimal quadrotor trajectories is challenging due to the complex dynamics of high-speed, agile flight. In this paper, we propose a data-driven method for real-time time-optimal trajectory generation that is suitable for complicated system models. We utilize a temporal deep neural network with sequence-to-sequence learning to find the optimal trajectories for sequences of a variable number of waypoints. The model is efficiently trained in a semi-supervised manner by combining supervised pretraining using a minimum-snap baseline method with Bayesian optimization and reinforcement learning. Compared to the baseline method, the trained model generates up to 20 % faster trajectories at an order of magnitude less computational cost. The optimized trajectories are evaluated in simulation and real-world flight experiments, where the improvement is further demonstrated.\n", "keywords": "Motion Planning;Model Learning", "primary_area": "", "supplementary_material": "/attachment/be830006cb61e7729bbb5550a0d68e8d075bf5da.zip", "author": "Gilhyun Ryou;Ezra Tal;Sertac Karaman", "authorids": "~Gilhyun_Ryou1;eatal@mit.edu;~Sertac_Karaman1", "gender": "M;;M", "homepage": ";;https://karaman.mit.edu", "dblp": ";;45/1718", "google_scholar": "cwDIMh0AAAAJ;;Vu-Zb7EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gilhyun_Ryou1;eatal@mit.edu;~Sertac_Karaman1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nryou2022realtime,\ntitle={Real-Time Generation of Time-Optimal Quadrotor Trajectories with Semi-Supervised Seq2Seq Learning},\nauthor={Gilhyun Ryou and Ezra Tal and Sertac Karaman},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ZJYbW8Cwys}\n}", "github": "", "project": "", "reviewers": "7heK;a94g;SHLP", "site": "https://openreview.net/forum?id=ZJYbW8Cwys", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2792381049570530384&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ZL2keFk7WXJ", "title": "Learning Multi-Objective Curricula for Robotic Policy Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Various automatic curriculum learning (ACL) methods have been proposed to improve the sample efficiency and final performance of robots' policies learning. They are designed to control how a robotic agent collects data, which is inspired by how humans gradually adapt their learning processes to their capabilities. In this paper, we propose a unified automatic curriculum learning framework to create multi-objective but coherent curricula that are generated by a set of parametric curriculum modules. Each curriculum module is instantiated as a neural network and is responsible for generating a particular curriculum. In order to coordinate those potentially conflicting modules in unified parameter space, we propose a multi-task hyper-net learning framework that uses a single hyper-net to parameterize all those curriculum modules. We evaluate our method on a series of robotic manipulation tasks and demonstrate its superiority over other state-of-the-art ACL methods in terms of sample efficiency and final performance.", "keywords": "ACL;Hyper-net;Multi-objective Curricula", "primary_area": "", "supplementary_material": "/attachment/c43bebeb69d335589e23c62a1ca19bc557ddd5fb.zip", "author": "Jikun Kang;Miao Liu;Abhinav Gupta;Christopher Pal;Xue Liu;Jie Fu", "authorids": "~Jikun_Kang1;~Miao_Liu1;~Abhinav_Gupta2;~Christopher_Pal1;~Xue_Liu1;~Jie_Fu2", "gender": "M;M;M;;M;M", "homepage": "https://luciferkonn.github.io;https://sites.google.com/view/miaoliuhome;https://www.guabhinav.com;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao;http://www.cs.mcgill.ca/~xueliu/;https://bigaidream.github.io/", "dblp": "299/0233;;36/7024-2;45/1217;l/XueLiu;", "google_scholar": "Jikun%20Kang;7QHvAEYAAAAJ;jAaCd7YAAAAJ;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ;https://scholar.google.com.tw/citations?user=rfLIRakAAAAJ;66osleIAAAAJ", "orcid": "0009-0001-1334-7092;;;;;0000-0002-4494-843X", "linkedin": "kang-jikun-91993814b/;miao-liu-3273a32b;backpropper;;;", "or_profile": "~Jikun_Kang1;~Miao_Liu1;~Abhinav_Gupta2;~Christopher_Pal1;~Xue_Liu1;~Jie_Fu1", "aff": "McGill University;International Business Machines;Meta AI;Polytechnique Montreal;McGill University;University of Montreal", "aff_domain": "mcgill.ca;ibm.com;meta.com;polymtl.ca;mcgill.ca;umontreal.ca", "position": "PhD student;Research Staff Member;Research Intern;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nkang2022learning,\ntitle={Learning Multi-Objective Curricula for Robotic Policy Learning},\nauthor={Jikun Kang and Miao Liu and Abhinav Gupta and Christopher Pal and Xue Liu and Jie Fu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ZL2keFk7WXJ}\n}", "github": "", "project": "", "reviewers": "HfsD;oFwa;g3te;9eoj", "site": "https://openreview.net/forum?id=ZL2keFk7WXJ", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6955740527065994510&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "McGill University;International Business Machines Corporation;Meta;Polytechnique Montreal;University of Montreal", "aff_unique_dep": ";;Meta AI;;", "aff_unique_url": "https://www.mcgill.ca;https://www.ibm.com;https://meta.com;https://www.polymtl.ca;https://wwwumontreal.ca", "aff_unique_abbr": "McGill;IBM;Meta;PolyMTL;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "Canada;United States" }, { "id": "ZUtgUA0Fuwd", "title": "Watch and Match: Supercharging Imitation with Regularized Optimal Transport", "track": "main", "status": "Oral", "tldr": "We propose a new imitation learning algorithm that substantially improves sample efficiency for continuous control problems in simulation and on real-world robotic manipulation tasks.", "abstract": "Imitation learning holds tremendous promise in learning policies efficiently for complex decision making problems. Current state-of-the-art algorithms often use inverse reinforcement learning (IRL), where given a set of expert demonstrations, an agent alternatively infers a reward function and the associated optimal policy. However, such IRL approaches often require substantial online interactions for complex control problems. In this work, we present Regularized Optimal Transport (ROT), a new imitation learning algorithm that builds on recent advances in optimal transport based trajectory-matching. Our key technical insight is that adaptively combining trajectory-matching rewards with behavior cloning can significantly accelerate imitation even with only a few demonstrations. Our experiments on 20 visual control tasks across the DeepMind Control Suite, the OpenAI Robotics Suite, and the Meta-World Benchmark demonstrate an average of 7.8x faster imitation to reach 90% of expert performance compared to prior state-of-the-art methods. On real-world robotic manipulation, with just one demonstration and an hour of online training, ROT achieves an average success rate of 90.1% across 14 tasks.", "keywords": "Imitation Learning;Manipulation;Robotics", "primary_area": "", "supplementary_material": "/attachment/59231f2f2c1525347ff8285ebe5af7ea8d506a98.zip", "author": "Siddhant Haldar;Vaibhav Mathur;Denis Yarats;Lerrel Pinto", "authorids": "~Siddhant_Haldar1;~Vaibhav_Mathur1;~Denis_Yarats1;~Lerrel_Pinto1", "gender": "M;M;M;M", "homepage": "https://siddhanthaldar.github.io/;https://vaibhav117.github.io/;http://denis-yarats.info/;https://www.lerrelpinto.com/", "dblp": "227/2282;;200/8142;168/8304", "google_scholar": "-h_bkRgAAAAJ;;7kaXqgMAAAAJ;pmVPj94AAAAJ", "orcid": ";;;", "linkedin": ";vaibhav-mathur-b4320995/;;", "or_profile": "~Siddhant_Haldar1;~Vaibhav_Mathur1;~Denis_Yarats1;~Lerrel_Pinto1", "aff": "New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;cs.nyu.edu;cs.nyu.edu", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhaldar2022watch,\ntitle={Watch and Match: Supercharging Imitation with Regularized Optimal Transport},\nauthor={Siddhant Haldar and Vaibhav Mathur and Denis Yarats and Lerrel Pinto},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ZUtgUA0Fuwd}\n}", "github": "https://github.com/siddhanthaldar/ROT", "project": "", "reviewers": "fHhv;xvuE;qDWt;Ldfy", "site": "https://openreview.net/forum?id=ZUtgUA0Fuwd", "pdf_size": 0, "rating": "10;10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3681901830589436198&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ZezSmJq06Ep", "title": "Deep Projective Rotation Estimation through Relative Supervision", "track": "main", "status": "Poster", "tldr": "By using a stereographic projection to open the closed manifold of SO(3), we avoid the local optima common when naively applying relative self-supervision for object orientation estimation, allowing for faster convergence.", "abstract": "Orientation estimation is the core to a variety of vision and robotics tasks such as camera and object pose estimation. Deep learning has offered a way to develop image-based orientation estimators; however, such estimators often require training on a large labeled dataset, which can be time-intensive to collect. In this work, we explore whether self-supervised learning from unlabeled data can be used to alleviate this issue. Specifically, we assume access to estimates of the relative orientation between neighboring poses, such that can be obtained via a local alignment method. While self-supervised learning has been used successfully for translational object keypoints, in this work, we show that naively applying relative supervision to the rotational group $SO(3)$ will often fail to converge due to the non-convexity of the rotational space. To tackle this challenge, we propose a new algorithm for self-supervised orientation estimation which utilizes Modified Rodrigues Parameters to stereographically project the closed manifold of $SO(3)$ to the open manifold of $\\mathbb{R}^{3}$, allowing the optimization to be done in an open Euclidean space. We empirically validate the benefits of the proposed algorithm for rotational averaging problem in two settings: (1) direct optimization on rotation parameters, and (2) optimization of parameters of a convolutional neural network that predicts object orientations from images. In both settings, we demonstrate that our proposed algorithm is able to converge to a consistent relative orientation frame much faster than algorithms that purely operate in the $SO(3)$ space. Additional information can be found at https://sites.google.com/view/deep-projective-rotation.", "keywords": "rotations;deep learning;relative-supervision;self-supervision;modified rodrigues parameters;stereographic projection;pose estimation", "primary_area": "", "supplementary_material": "/attachment/32c16d9a60e3b4c4cbdc049bfad4b56c63a0af2a.zip", "author": "Brian Okorn;Chuer Pan;Martial Hebert;David Held", "authorids": "~Brian_Okorn1;~Chuer_Pan1;~Martial_Hebert1;~David_Held1", "gender": "M;F;M;M", "homepage": "https://brianokorn.com;http://chuerpan.com/;http://www.cs.cmu.edu/~hebert/;http://davheld.github.io/", "dblp": "91/9610;;h/MartialHebert;22/11147", "google_scholar": "uV7RuGwAAAAJ;HBMZ02EAAAAJ;https://scholar.google.com.tw/citations?user=0ytii2EAAAAJ;0QtU-NsAAAAJ", "orcid": ";;;", "linkedin": "brianokornrobotics/;;;", "or_profile": "~Brian_Okorn1;~Chuer_Pan1;~Martial_Hebert1;~David_Held1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;MS student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nokorn2022deep,\ntitle={Deep Projective Rotation Estimation through Relative Supervision},\nauthor={Brian Okorn and Chuer Pan and Martial Hebert and David Held},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ZezSmJq06Ep}\n}", "github": "", "project": "", "reviewers": "2N3Q;WRCQ;ziM5;cxjP", "site": "https://openreview.net/forum?id=ZezSmJq06Ep", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1686769653198913499&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "_8DoIe8G3t", "title": "BEHAVIOR-1K: A Benchmark for Embodied AI with 1,000 Everyday Activities and Realistic Simulation", "track": "main", "status": "Oral", "tldr": "BEHAVIOR-1K is a novel human-centric benchmark for Embodied AI in simulation with 1000 everyday activities, a diverse dataset of 5,000+ objects and 50 scenes, and a simulation environment, OmniGibson, that reaches high levels of simulation realism.", "abstract": "We present BEHAVIOR-1K, a comprehensive simulation benchmark for human-centered robotics. BEHAVIOR-1K includes two components, guided and motivated by the results of an extensive survey on \"what do you want robots to do for you?\". The first is the definition of 1,000 everyday activities, grounded in 50 scenes (houses, gardens, restaurants, offices, etc.) with more than 5,000 objects annotated with rich physical and semantic properties. The second is OmniGibson, a novel simulation environment that supports these activities via realistic physics simulation and rendering of rigid bodies, deformable bodies, and liquids. Our experiments indicate that the activities in BEHAVIOR-1K are long-horizon and dependent on complex manipulation skills, both of which remain a challenge for even state-of-the-art robot learning solutions. To calibrate the simulation-to-reality gap of BEHAVIOR-1K, we provide an initial study on transferring solutions learned with a mobile manipulator in a simulated apartment to its real-world counterpart. We hope that BEHAVIOR-1K's human-grounded nature, diversity, and realism make it valuable for embodied AI and robot learning research. Project website: https://behavior.stanford.edu.", "keywords": "Embodied AI Benchmark;Everyday Activities;Mobile Manipulation", "primary_area": "", "supplementary_material": "/attachment/4031d946bddbcdccc59169c5862bb72f6ef2d695.zip", "author": "Chengshu Li;Ruohan Zhang;Josiah Wong;Cem Gokmen;Sanjana Srivastava;Roberto Mart\u00edn-Mart\u00edn;Chen Wang;Gabrael Levine;Michael Lingelbach;Jiankai Sun;Mona Anvari;Minjune Hwang;Manasi Sharma;Arman Aydin;Dhruva Bansal;Samuel Hunter;Kyu-Young Kim;Alan Lou;Caleb R Matthews;Ivan Villa-Renteria;Jerry Huayang Tang;Claire Tang;Fei Xia;Silvio Savarese;Hyowon Gweon;Karen Liu;Jiajun Wu;Li Fei-Fei", "authorids": "~Chengshu_Li1;~Ruohan_Zhang1;~Josiah_Wong1;~Cem_Gokmen1;~Sanjana_Srivastava2;~Roberto_Mart\u00edn-Mart\u00edn1;~Chen_Wang16;~Gabrael_Levine1;~Michael_Lingelbach1;~Jiankai_Sun6;~Mona_Anvari2;~Minjune_Hwang1;~Manasi_Sharma1;~Arman_Aydin1;~Dhruva_Bansal1;~Samuel_Hunter2;~Kyu-Young_Kim1;~Alan_Lou1;~Caleb_R_Matthews1;~Ivan_Villa-Renteria1;~Jerry_Huayang_Tang1;~Claire_Tang1;~Fei_Xia1;~Silvio_Savarese1;~Hyowon_Gweon1;~Karen_Liu1;~Jiajun_Wu1;~Li_Fei-Fei1", "gender": "M;M;M;M;;M;M;;M;;F;M;;M;M;;;;;M;M;F;M;M;;;M;F", "homepage": "https://www.chengshuli.me/;https://ai.stanford.edu/~zharu/;https://www.jdw.ong;https://www.cemgokmen.com;;https://robertomartinmartin.com/;http://www.chenwangjeremy.net/;https://gabrael.io;;;;https://mj-hwang.github.io/;;;https://www.dhruvabansal.com;;https://kykim0.github.io;;;https://ivillar.github.io;;;;;http://sll.stanford.edu;https://cs.stanford.edu/~karenliu;https://jiajunwu.com;https://profiles.stanford.edu/fei-fei-li", "dblp": "63/6091-2;;178/8895;220/3187;;153/7670;;;;121/4211;;263/9824;;;243/0144;;;;;;;;;50/3578;;;117/4768;79/2528", "google_scholar": "yay_v9EAAAAJ;-bqvNWoAAAAJ;Y0a0n5wAAAAJ;wCiI8oUAAAAJ;sqTh_dwAAAAJ;XOJE8OEAAAAJ;lStkAzsAAAAJ;;d4xUjL8AAAAJ;726MCb8AAAAJ;;juBEoEUAAAAJ;;;uUTLG2IAAAAJ;;9iLSqKAAAAAJ;;wAr2ED8AAAAJ;;;;pqP5_PgAAAAJ;ImpbxLsAAAAJ;;i28fU0MAAAAJ;2efgcS0AAAAJ;rDfyQnIAAAAJ", "orcid": "0000-0002-9027-8617;;;0000-0001-9446-6052;;0000-0002-9586-2759;;;;;;;;;;;;;;;;;0000-0003-4343-1444;;;0000-0001-5926-0905;0000-0002-4176-343X;", "linkedin": "chengshu/;;josiahw/;cgokmen/;sanjana-srivastava5/;;;;;;mona-anvari-0b92aa97;minjune-hwang-751999138/;;https://linkedin.com/in/arman-aydin-915035185;dhruvabansal2k/;;;;caleb-matthews;;jerry-tang-b37026162;claire-tang-860176200/;;;;;jiajunwu/;fei-fei-li-4541247/", "or_profile": "~Chengshu_Li1;~Ruohan_Zhang1;~Josiah_Wong1;~Cem_Gokmen1;~Sanjana_Srivastava2;~Roberto_Mart\u00edn-Mart\u00edn1;~Chen_Wang16;~Gabrael_Levine1;~Michael_Lingelbach1;~Jiankai_Sun6;~Mona_Anvari2;~Minjune_Hwang1;~Manasi_Sharma1;~Arman_Aydin1;~Dhruva_Bansal1;~Samuel_Hunter2;~Kyu-Young_Kim1;~Alan_Lou1;~Caleb_R_Matthews1;~Ivan_Villa-Renteria1;~Jerry_Huayang_Tang1;~Claire_Tang1;~Fei_Xia1;~Silvio_Savarese1;~Hyowon_Gweon1;~Karen_Liu1;~Jiajun_Wu1;~Li_Fei-Fei1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;SalesForce.com;Computer Science Department, Stanford University;Stanford University;Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;;;Stanford University;;Computer Science Department, Stanford University;;Stanford University;Stanford University;Stanford University;Computer Science Department, Stanford University;Google;Stanford University;Stanford University;;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;salesforce.com;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;;;stanford.edu;;cs.stanford.edu;;stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;google.com;stanford.edu;stanford.edu;;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;MS student;MS student;PhD student;Researcher;PhD student;Undergrad student;PhD student;PhD student;MS student;MS student;;;MS student;;MS student;;Undergrad student;MS student;Undergrad student;Undergrad student;Researcher;Adjunct Professor;Associate Professor;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2022behaviork,\ntitle={{BEHAVIOR}-1K: A Benchmark for Embodied {AI} with 1,000 Everyday Activities and Realistic Simulation},\nauthor={Chengshu Li and Ruohan Zhang and Josiah Wong and Cem Gokmen and Sanjana Srivastava and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n and Chen Wang and Gabrael Levine and Michael Lingelbach and Jiankai Sun and Mona Anvari and Minjune Hwang and Manasi Sharma and Arman Aydin and Dhruva Bansal and Samuel Hunter and Kyu-Young Kim and Alan Lou and Caleb R Matthews and Ivan Villa-Renteria and Jerry Huayang Tang and Claire Tang and Fei Xia and Silvio Savarese and Hyowon Gweon and Karen Liu and Jiajun Wu and Li Fei-Fei},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=_8DoIe8G3t}\n}", "github": "", "project": "", "reviewers": "Qfw6;HvRe;NBGD;5r6Z", "site": "https://openreview.net/forum?id=_8DoIe8G3t", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 28, "corr_rating_confidence": 0, "gs_citation": 205, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15734979241787388245&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;2;0;0;0;0", "aff_unique_norm": "Stanford University;Salesforce;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.stanford.edu;https://www.salesforce.com;https://www.google.com", "aff_unique_abbr": "Stanford;Salesforce;Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;2;0;0;0;0", "aff_campus_unique": "Stanford;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "_u4m5aEbWfU", "title": "SurroundDepth: Entangling Surrounding Views for Self-Supervised Multi-Camera Depth Estimation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Depth estimation from images serves as the fundamental step of 3D perception for autonomous driving and is an economical alternative to expensive depth sensors like LiDAR. The temporal photometric consistency enables self-supervised depth estimation without labels, further facilitating its application. However, most existing methods predict the depth solely based on each monocular image and ignore the correlations among multiple surrounding cameras, which are typically available for modern self-driving vehicles. In this paper, we propose a SurroundDepth method to incorporate the information from multiple surrounding views to predict depth maps across cameras. Specifically, we employ a joint network to process all the surrounding views and propose a cross-view transformer to effectively fuse the information from multiple views. We apply cross-view self-attention to efficiently enable the global interactions between multi-camera feature maps. Different from self-supervised monocular depth estimation, we are able to predict real-world scales given multi-camera extrinsic matrices. To achieve this goal, we adopt two-frame structure-from-motion to extract scale-aware pseudo depths to pretrain the models. Further, instead of predicting the ego-motion of each individual camera, we estimate a universal ego-motion of the vehicle and transfer it to each view to achieve multi-view consistency. In experiments, our method achieves the state-of-the-art performance on the challenging multi-camera depth estimation datasets DDAD and nuScenes. ", "keywords": "Self-supervised depth estimation;Multi-camera perception;Structure-from-motion", "primary_area": "", "supplementary_material": "/attachment/df55c274865e5bf279ad243d9fb00f8abc2f4957.zip", "author": "Yi Wei;Linqing Zhao;Wenzhao Zheng;Zheng Zhu;Yongming Rao;Guan Huang;Jiwen Lu;Jie Zhou", "authorids": "~Yi_Wei1;~Linqing_Zhao1;~Wenzhao_Zheng1;~Zheng_Zhu1;~Yongming_Rao1;~Guan_Huang1;~Jiwen_Lu1;~Jie_Zhou3", "gender": "M;M;;M;M;M;M;M", "homepage": "https://weiyithu.github.io/;https://lqzhao.github.io/;https://wzzheng.net;http://www.zhengzhu.net/;https://raoyongming.github.io/;;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html", "dblp": ";296/3712;230/1277;29/4319.html/;200/2255;93/10768.html/;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen;00/5012-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;ypxt5UEAAAAJ;LdK9scgAAAAJ;https://scholar.google.com.hk/citations?user=NmwjI0AAAAAJ;3qO6gK4AAAAJ;;TN8uDQoAAAAJ;", "orcid": ";;;;0000-0003-3952-8753;;0000-0002-6121-5529;", "linkedin": ";;;;;;;", "or_profile": "~Yi_Wei1;~Linqing_Zhao1;~Wenzhao_Zheng1;~Zheng_Zhu1;~Yongming_Rao1;~Guan_Huang1;~Jiwen_Lu1;~Jie_Zhou3", "aff": "Automation, Tsinghua University, Tsinghua University;Tianjin University;Tsinghua University;PhiGent Robotics;Tsinghua University;Xforward AI Technology;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tju.edu.cn;tsinghua.edu.cn;phigent.ai;tsinghua.edu.cn;xforwardai.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Researcher;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwei2022surrounddepth,\ntitle={SurroundDepth: Entangling Surrounding Views for Self-Supervised Multi-Camera Depth Estimation},\nauthor={Yi Wei and Linqing Zhao and Wenzhao Zheng and Zheng Zhu and Yongming Rao and Guan Huang and Jiwen Lu and Jie Zhou},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=_u4m5aEbWfU}\n}", "github": "https://github.com/weiyithu/SurroundDepth", "project": "", "reviewers": "nGH3;RZGD;CgbY;maRz", "site": "https://openreview.net/forum?id=_u4m5aEbWfU", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 32, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15368514989058133898&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2;0;3;0;0", "aff_unique_norm": "Tsinghua University;Tianjin University;PhiGent Robotics;Xforward AI Technology", "aff_unique_dep": "Automation;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.tju.edu.cn;;", "aff_unique_abbr": "THU;TJU;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "aQnn9cIVTRJ", "title": "Motion Policy Networks", "track": "main", "status": "Poster", "tldr": "Motion Policy Networks are trained on millions of example trajectories to generate collision-free, smooth motion from just a single depth camera image", "abstract": "Collision-free motion generation in unknown environments is a core building block for robot manipulation. \nGenerating such motions is challenging due to multiple objectives; not only should the solutions be optimal, the motion generator itself must be fast enough for real-time performance and reliable enough for practical deployment.\nA wide variety of methods have been proposed ranging from local controllers to global planners, often being combined to offset their shortcomings. We present an end-to-end neural model called Motion Policy Networks (M$\\pi$Nets) to generate collision-free, smooth motion from just a single depth camera observation. M$\\pi$Nets are trained on over 3 million motion planning problems in more than 500,000 environments. Our experiments show that M$\\pi$Nets are significantly faster than global planners while exhibiting the reactivity needed to deal with dynamic scenes. They are 46% better than prior neural planners and more robust than local control policies. Despite being only trained in simulation, M$\\pi$Nets transfer well to the real robot with noisy partial point clouds. Videos and code are available at https://mpinets.github.io", "keywords": "Motion Control;Imitation Learning;End-to-End Learning", "primary_area": "", "supplementary_material": "/attachment/1099b764ba403f8997354cd72ba4ca723889c5c3.zip", "author": "Adam Fishman;Adithyavairavan Murali;Clemens Eppner;Bryan Peele;Byron Boots;Dieter Fox", "authorids": "~Adam_Fishman1;~Adithyavairavan_Murali2;~Clemens_Eppner1;~Bryan_Peele1;~Byron_Boots1;~Dieter_Fox1", "gender": "M;M;;;;M", "homepage": "https://fishbotics.com;http://adithyamurali.com;https://clemense.github.io;;;https://homes.cs.washington.edu/~fox/", "dblp": ";;47/7736;;;f/DieterFox", "google_scholar": "ciayRBYAAAAJ;Tjj8TZAAAAAJ;zMw7PF8AAAAJ;;;DqXsbPAAAAAJ", "orcid": ";;0000-0002-5398-4037;;;", "linkedin": "fishmanadam/;adithyamurali;;;;", "or_profile": "~Adam_Fishman1;~Adithyavairavan_Murali2;~Clemens_Eppner1;~Bryan_Peele1;~Byron_Boots1;~Dieter_Fox1", "aff": "University of Washington;NVIDIA;NVIDIA;;;Department of Computer Science", "aff_domain": "washington.edu;nvidia.com;nvidia.com;;;cs.washington.edu", "position": "PhD student;Researcher;Researcher;;;Full Professor", "bibtex": "@inproceedings{\nfishman2022motion,\ntitle={Motion Policy Networks},\nauthor={Adam Fishman and Adithyavairavan Murali and Clemens Eppner and Bryan Peele and Byron Boots and Dieter Fox},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=aQnn9cIVTRJ}\n}", "github": "https://github.com/nvlabs/motion-policy-networks", "project": "", "reviewers": "Eji4;S422;oeVp;3osu;V556", "site": "https://openreview.net/forum?id=aQnn9cIVTRJ", "pdf_size": 0, "rating": "1;4;4;10;10", "confidence": "", "rating_avg": 5.8, "confidence_avg": 0, "replies_avg": 24, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7461854993482222568&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Washington;NVIDIA;Unknown Institution", "aff_unique_dep": ";NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www.washington.edu;https://www.nvidia.com;", "aff_unique_abbr": "UW;NVIDIA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "abd_D-iVjk0", "title": "LaRa: Latents and Rays for Multi-Camera Bird\u2019s-Eye-View Semantic Segmentation", "track": "main", "status": "Poster", "tldr": "In this work, we present LaRa, a model doing deep multi-camera fusion in a latent representation for bird's-eye-view vehicle binary segmentation.", "abstract": "Recent works in autonomous driving have widely adopted the bird\u2019seye-view (BEV) semantic map as an intermediate representation of the world. Online prediction of these BEV maps involves non-trivial operations such as multi-camera data extraction as well as fusion and projection into a common topview grid. This is usually done with error-prone geometric operations (e.g., homography or back-projection from monocular depth estimation) or expensive direct dense mapping between image pixels and pixels in BEV (e.g., with MLP or attention). In this work, we present \u2018LaRa\u2019, an efficient encoder-decoder, transformer-based model for vehicle semantic segmentation from multiple cameras. Our approach uses a system of cross-attention to aggregate information over multiple sensors into a compact, yet rich, collection of latent representations. These latent representations, after being processed by a series of selfattention blocks, are then reprojected with a second cross-attention in the BEV space. We demonstrate that our model outperforms the best previous works using transformers on nuScenes. The code and trained models are available at https://github.com/valeoai/LaRa.\n", "keywords": "bird\u2019s eye view semantic segmentation;encoder-decoder transformers;autonomous driving", "primary_area": "", "supplementary_material": "/attachment/ebcc1d943d68032db6decd4c331cdba8ae88c784.zip", "author": "Florent Bartoccioni;Eloi Zablocki;Andrei Bursuc;Patrick Perez;Matthieu Cord;Karteek Alahari", "authorids": "~Florent_Bartoccioni2;~Eloi_Zablocki3;~Andrei_Bursuc1;~Patrick_Perez1;~Matthieu_Cord1;~Karteek_Alahari1", "gender": "M;M;M;M;M;M", "homepage": "https://abursuc.github.io/;https://cord.isir.upmc.fr/;http://thoth.inrialpes.fr/people/alahari;;https://f-barto.github.io/;https://ptrckprz.github.io/", "dblp": "40/8692.html;68/3117;a/KarteekAlahari;204/2791;;71/1167", "google_scholar": "https://scholar.google.fr/citations?user=HTfERCsAAAAJ;SpAotDcAAAAJ;https://scholar.google.fr/citations?user=qcyG7rwAAAAJ;https://scholar.google.fr/citations?user=dOkbUmEAAAAJ;SemxkMwAAAAJ;https://scholar.google.fr/citations?user=8Cph5uQAAAAJ", "orcid": ";;;;0000-0003-2285-2549;", "linkedin": ";;;;flobarto;", "or_profile": "~Andrei_Bursuc1;~Matthieu_Cord1;~Karteek_Alahari1;~eloi_zablocki1;~Florent_BARTOCCIONI1;~Patrick_Perez2", "aff": "Valeo;Sorbonne Universit\u00e9;Inria;Valeo;Valeo;Valeo", "aff_domain": "valeo.com;isir.upmc.fr;inria.fr;valeo.com;valeo.com;valeo.com", "position": "Research Scientist;Full Professor;Tenured researcher (eq. Asso. prof.);Researcher;PhD student;Scientific Director", "bibtex": "@inproceedings{\nbartoccioni2022lara,\ntitle={LaRa: Latents and Rays for Multi-Camera Bird{\\textquoteright}s-Eye-View Semantic Segmentation},\nauthor={Florent Bartoccioni and Eloi Zablocki and Andrei Bursuc and Patrick Perez and Matthieu Cord and Karteek Alahari},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=abd_D-iVjk0}\n}", "github": "https://github.com/valeoai/LaRa", "project": "", "reviewers": "u5Wy;T5RB;Zycj", "site": "https://openreview.net/forum?id=abd_D-iVjk0", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16993502557356759369&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Valeo;Sorbonne Universit\u00e9;INRIA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.valeo.com;https://www.sorbonne-universite.fr;https://www.inria.fr", "aff_unique_abbr": ";Sorbonne U;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "France" }, { "id": "awciQcCEGJs", "title": "Graph Inverse Reinforcement Learning from Diverse Videos", "track": "main", "status": "Oral", "tldr": "", "abstract": "Research on Inverse Reinforcement Learning (IRL) from third-person videos has shown encouraging results on removing the need for manual reward design for robotic tasks. However, most prior works are still limited by training from a relatively restricted domain of videos. In this paper, we argue that the true potential of third-person IRL lies in increasing the diversity of videos for better scaling. To learn a reward function from diverse videos, we propose to perform graph abstraction on the videos followed by temporal matching in the graph space to measure the task progress. Our insight is that a task can be described by entity interactions that form a graph, and this graph abstraction can help remove irrelevant information such as textures, resulting in more robust reward functions. We evaluate our approach, GraphIRL, on cross-embodiment learning in X-MAGICAL and learning from human demonstrations for real-robot manipulation. We show significant improvements in robustness to diverse video demonstrations over previous approaches, and even achieve better results than manual reward design on a real robot pushing task. Videos are available at https://sateeshkumar21.github.io/GraphIRL/.", "keywords": "Inverse Reinforcement Learning;Third-Person Video;Graph Network", "primary_area": "", "supplementary_material": "/attachment/f198e190b1bb7075b5f21af7ac27066971dd0901.zip", "author": "Sateesh Kumar;Jonathan Zamora;Nicklas Hansen;Rishabh Jangir;Xiaolong Wang", "authorids": "~Sateesh_Kumar2;~Jonathan_Zamora1;~Nicklas_Hansen1;~Rishabh_Jangir1;~Xiaolong_Wang3", "gender": "M;Non-Binary;M;M;M", "homepage": ";https://nicklashansen.github.io;https://jangirrishabh.github.io/;https://xiaolonw.github.io/;https://jonzamora.dev", "dblp": "253/0475;258/0744.html;;91/952-4;", "google_scholar": "6CWng3MAAAAJ;OFtDgzwAAAAJ;UFokX9EAAAAJ;Y8O9N_0AAAAJ;TIST9HIAAAAJ", "orcid": ";0000-0001-9897-4003;;;", "linkedin": ";ncklas;rishabh-jangir-74b1929b/;;jonzamora18/", "or_profile": "~Sateesh_Kumar2;~Nicklas_Hansen1;~Rishabh_Jangir1;~Xiaolong_Wang3;~Jonathan_Zamora-Anaya1", "aff": "University of California, San Diego;Meta;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;fb.com;ucsd.edu;ucsd.edu;ucsd.edu", "position": "MS student;Intern;MS student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nkumar2022graph,\ntitle={Graph Inverse Reinforcement Learning from Diverse Videos},\nauthor={Sateesh Kumar and Jonathan Zamora and Nicklas Hansen and Rishabh Jangir and Xiaolong Wang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=awciQcCEGJs}\n}", "github": "", "project": "", "reviewers": "77Z4;A9wW;NhHS;J5yc", "site": "https://openreview.net/forum?id=awciQcCEGJs", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15773673457390824925&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of California, San Diego;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ucsd.edu;https://meta.com", "aff_unique_abbr": "UCSD;Meta", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "b1zMsZuzd2e", "title": "Learning Dense Visual Descriptors using Image Augmentations for Robot Manipulation Tasks", "track": "main", "status": "Poster", "tldr": "We propose a self-supervised training approach for learning view-invariant dense visual descriptors using image augmentations.", "abstract": "We propose a self-supervised training approach for learning view-invariant\ndense visual descriptors using image augmentations. Unlike existing\nworks, which often require complex datasets, such as registered RGBD sequences,\nwe train on an unordered set of RGB images. This allows for learning from a single \ncamera view, e.g., in an existing robotic cell with a fix-mounted camera. We\ncreate synthetic views and dense pixel correspondences using data augmentations.\nWe find our descriptors are competitive to the existing methods, despite the simpler\ndata recording and setup requirements. We show that training on synthetic\ncorrespondences provides descriptor consistency across a broad range of camera\nviews. We compare against training with geometric correspondence from multiple\nviews and provide ablation studies. We also show a robotic bin-picking experiment \nusing descriptors learned from a fix-mounted camera for defining grasp\npreferences.", "keywords": "self-supervised learning;computer vision;representation learning;bin-picking", "primary_area": "", "supplementary_material": "/attachment/d39f8dacb6313897b6c6acf8d6cf9efd14f946f9.zip", "author": "Christian Graf;David B. Adrian;Joshua Weil;Miroslav Gabriel;Philipp Schillinger;Markus Spies;Heiko Neumann;Andras Gabor Kupcsik", "authorids": "~Christian_Graf1;~David_B._Adrian1;fixed-term.joshua.weil@de.bosch.com;miroslav.gabriel@de.bosch.com;~Philipp_Schillinger1;markus.spies2@de.bosch.com;~Heiko_Neumann1;~Andras_Gabor_Kupcsik2", "gender": ";M;;;M;;M;M", "homepage": ";;;;https://people.kth.se/~schillin/;;https://www.uni-ulm.de/in/neuroinformatik/institut/hidden/hneumann/;", "dblp": ";151/9363;;;;;n/HeikoNeumann;", "google_scholar": "https://scholar.google.de/citations?user=sueAl0cAAAAJ;vpn6QN0AAAAJ;;;;;https://scholar.google.de/citations?user=0NbVSbMAAAAJ;G0EQYYIAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Christian_Graf1;~David_B._Adrian1;fixed-term.joshua.weil@de.bosch.com;miroslav.gabriel@de.bosch.com;~Philipp_Schillinger1;markus.spies2@de.bosch.com;~Heiko_Neumann1;~Andras_Gabor_Kupcsik2", "aff": "Bosch Center for Artifical Intelligence;Robert Bosch GmbH, Bosch;;;Robert Bosch GmbH, Bosch;;Ulm University;Bosch Center for Artificial Intelligence", "aff_domain": "de.bosch.com;de.bosch.com;;;de.bosch.com;;uni-ulm.de;de.bosch.com", "position": "Researcher;PhD student;;;Researcher;;Full Professor;Researcher", "bibtex": "@inproceedings{\ngraf2022learning,\ntitle={Learning Dense Visual Descriptors using Image Augmentations for Robot Manipulation Tasks},\nauthor={Christian Graf and David B. Adrian and Joshua Weil and Miroslav Gabriel and Philipp Schillinger and Markus Spies and Heiko Neumann and Andras Gabor Kupcsik},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=b1zMsZuzd2e}\n}", "github": "", "project": "", "reviewers": "aGSf;etoZ;Wx5g;M14W", "site": "https://openreview.net/forum?id=b1zMsZuzd2e", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11337049388774151834&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Bosch Center for Artificial Intelligence;Robert Bosch GmbH;Ulm University", "aff_unique_dep": "Center for Artificial Intelligence;;", "aff_unique_url": "https://www.bosch-ai.com;https://www.bosch.com;https://www.uni-ulm.de/", "aff_unique_abbr": "BCAI;Bosch;U Ulm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "b88HF4vd_ej", "title": "Learning Representations that Enable Generalization in Assistive Tasks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Recent work in sim2real has successfully enabled robots to act in physical environments by training in simulation with a diverse ``population'' of environments (i.e. domain randomization). In this work, we focus on enabling generalization in \\emph{assistive tasks}: tasks in which the robot is acting to assist a user (e.g. helping someone with motor impairments with bathing or with scratching an itch). Such tasks are particularly interesting relative to prior sim2real successes because the environment now contains a \\emph{human who is also acting}. This complicates the problem because the diversity of human users (instead of merely physical environment parameters) is more difficult to capture in a population, thus increasing the likelihood of encountering out-of-distribution (OOD) human policies at test time. We advocate that generalization to such OOD policies benefits from (1) learning a good latent representation for human policies that test-time humans can accurately be mapped to, and (2) making that representation adaptable with test-time interaction data, instead of relying on it to perfectly capture the space of human policies based on the simulated population only. We study how to best learn such a representation by evaluating on purposefully constructed OOD test policies. \nWe find that sim2real methods that encode environment (or population) parameters and work well in tasks that robots do in isolation, do not work well in \\emph{assistance}. In assistance, it seems crucial to train the representation based on the \\emph{history of interaction} directly, because that is what the robot will have access to at test time. Further, training these representations to then \\emph{predict human actions} not only gives them better structure, but also enables them to be fine-tuned at test-time, when the robot observes the partner act.", "keywords": "assistive robots;representation learning;OOD generalization", "primary_area": "", "supplementary_material": "/attachment/be190b2627f4f0237b003734ad0e840715fa57c5.zip", "author": "Jerry Zhi-Yang He;Zackory Erickson;Daniel S. Brown;Aditi Raghunathan;Anca Dragan", "authorids": "~Jerry_Zhi-Yang_He1;~Zackory_Erickson1;~Daniel_S._Brown1;~Aditi_Raghunathan1;~Anca_Dragan1", "gender": "M;M;M;F;F", "homepage": "https://herobotics.me;https://zackory.com;https://www.cs.utah.edu/~dsbrown/;https://www.cs.cmu.edu/~aditirag/;http://www.ancadragan.com/", "dblp": ";;141/7769;166/1409;", "google_scholar": ";wElkTtIAAAAJ;https://scholar.google.com/citations?hl=en;Ch9iRwQAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jerry_Zhi-Yang_He1;~Zackory_Erickson1;~Daniel_S._Brown1;~Aditi_Raghunathan1;~Anca_Dragan1", "aff": ";Carnegie Mellon University;University of California, Berkeley;Carnegie Mellon University;University of California, Berkeley", "aff_domain": ";cmu.edu;berkeley.edu;cmu.edu;berkeley.edu", "position": ";Assistant Professor;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhe2022learning,\ntitle={Learning Representations that Enable Generalization in Assistive Tasks},\nauthor={Jerry Zhi-Yang He and Zackory Erickson and Daniel S. Brown and Aditi Raghunathan and Anca Dragan},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=b88HF4vd_ej}\n}", "github": "https://github.com/hzyjerry/adaptive-caregiver", "project": "", "reviewers": "EjUi;Up7D;tdQt;QYB2", "site": "https://openreview.net/forum?id=b88HF4vd_ej", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7021959480632987384&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "bJS2fBkFZ-K", "title": "PI-QT-Opt: Predictive Information Improves Multi-Task Robotic Reinforcement Learning at Scale", "track": "main", "status": "Poster", "tldr": "We introduce PI-QT-Opt, a QT-Opt agent with a predictive information auxiliary, to solve up to 297 vision-based robot manipulation tasks in simulation and the real world with one model, showing strong performance and zero-shot transfer to new tasks.", "abstract": "The predictive information, the mutual information between the past and future, has been shown to be a useful representation learning auxiliary loss for training reinforcement learning agents, as the ability to model what will happen next is critical to success on many control tasks. While existing studies are largely restricted to training specialist agents on single-task settings in simulation, in this work, we study modeling the predictive information for robotic agents and its importance for general-purpose agents that are trained to master a large repertoire of diverse skills from large amounts of data. Specifically, we introduce Predictive Information QT-Opt (PI-QT-Opt), a QT-Opt agent augmented with an auxiliary loss that learns representations of the predictive information to solve up to 297 vision-based robot manipulation tasks in simulation and the real world with a single set of parameters. We demonstrate that modeling the predictive information significantly improves success rates on the training tasks and leads to better zero-shot transfer to unseen novel tasks. Finally, we evaluate PI-QT-Opt on real robots, achieving substantial and consistent improvement over QT-Opt in multiple experimental settings of varying environments, skills, and multi-task configurations.", "keywords": "deep reinforcement learning;robot manipulation;multi-task learning", "primary_area": "", "supplementary_material": "/attachment/7ab5900e7f656ef2f1262a9a392b66ccbcbed0b3.zip", "author": "Kuang-Huei Lee;Ted Xiao;Adrian Li;Paul Wohlhart;Ian Fischer;Yao Lu", "authorids": "~Kuang-Huei_Lee1;~Ted_Xiao1;alhli@google.com;~Paul_Wohlhart1;~Ian_Fischer1;~Yao_Lu13", "gender": "M;M;;M;M;", "homepage": "https://kuanghuei.github.io/;https://www.tedxiao.me;;;;", "dblp": "66/11466;198/0598;;http://dblp.uni-trier.de/pers/hd/w/Wohlhart:Paul;17/5600;26/5662-6", "google_scholar": "rE7-N30AAAAJ;;;SzHPa90AAAAJ;tPnf61gAAAAJ;OI7zFmwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;iantfischer;", "or_profile": "~Kuang-Huei_Lee1;~Ted_Xiao1;alhli@google.com;~Paul_Wohlhart1;~Ian_Fischer1;~Yao_Lu13", "aff": "Google;;;Graz University of Technology;Google;Google", "aff_domain": "google.com;;; ;google.com;google.com", "position": "Researcher;;;Post Doc;Researcher;Researcher", "bibtex": "@inproceedings{\nlee2022piqtopt,\ntitle={{PI}-{QT}-Opt: Predictive Information Improves Multi-Task Robotic Reinforcement Learning at Scale},\nauthor={Kuang-Huei Lee and Ted Xiao and Adrian Li and Paul Wohlhart and Ian Fischer and Yao Lu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=bJS2fBkFZ-K}\n}", "github": "", "project": "", "reviewers": "UcuR;G4sr;jMwU;QJ7c", "site": "https://openreview.net/forum?id=bJS2fBkFZ-K", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5672641947952886938&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Google;Graz University of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.tugraz.at", "aff_unique_abbr": "Google;TUGraz", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Austria" }, { "id": "bUUf1CT1sNu", "title": "Learning Robust Real-World Dexterous Grasping Policies via Implicit Shape Augmentation", "track": "main", "status": "Poster", "tldr": "We propose Implicit Shape Augmentation, a method that is able to robustly interact with daily unseen objects by leveraging simulation as well as a small number of human demonstrations.", "abstract": "Dexterous robotic hands have the capability to interact with a wide variety of household objects. However, learning robust real world grasping policies for arbitrary objects has proven challenging due to the difficulty of generating high quality training data. In this work, we propose a learning system (\\emph{ISAGrasp}) for leveraging a small number of human demonstrations to bootstrap the generation of a much larger dataset containing successful grasps on a variety of novel objects. Our key insight is to use a correspondence-aware implicit generative model to deform object meshes and demonstrated human grasps in order to create a diverse dataset for supervised learning, while maintaining semantic realism. We use this dataset to train a robust grasping policy in simulation which can be deployed in the real world. We demonstrate grasping performance with a four-fingered Allegro hand in both simulation and the real world, and show this method can handle entirely new semantic classes and achieve a 79% success rate on grasping unseen objects in the real world. ", "keywords": "Dexterous Manipulation;Learning from Human Demonstration;Data Augmentation", "primary_area": "", "supplementary_material": "/attachment/77c06e101c3dda8b347901eb7a2a8c6d823b082d.zip", "author": "Qiuyu Chen;Karl Van Wyk;Yu-Wei Chao;Wei Yang;Arsalan Mousavian;Abhishek Gupta;Dieter Fox", "authorids": "~Qiuyu_Chen5;~Karl_Van_Wyk1;~Yu-Wei_Chao1;~Wei_Yang2;~Arsalan_Mousavian1;~Abhishek_Gupta1;~Dieter_Fox1", "gender": "F;;M;M;M;M;M", "homepage": "https://qiuyuchen14.github.io/;;http://www-personal.umich.edu/~ywchao/;http://wyang.me/;https://cs.gmu.edu/~amousavi/;https://homes.cs.washington.edu/~abhgupta/;https://homes.cs.washington.edu/~fox/", "dblp": ";;44/10700;03/1094-19;164/8572;18/6404-4;f/DieterFox", "google_scholar": ";TCYAoF8AAAAJ;48Y9F-YAAAAJ;6QQX88UAAAAJ;fcA9m88AAAAJ;1wLVDP4AAAAJ;DqXsbPAAAAAJ", "orcid": ";;;0000-0003-3975-2472;;;", "linkedin": ";;;;;;", "or_profile": "~Qiuyu_Chen5;~Karl_Van_Wyk1;~Yu-Wei_Chao1;~Wei_Yang2;~Arsalan_Mousavian1;~Abhishek_Gupta1;~Dieter_Fox1", "aff": "Department of Computer Science, University of Washington;;NVIDIA;NVIDIA;NVIDIA;Massachusetts Institute of Technology;Department of Computer Science", "aff_domain": "cs.washington.edu;;nvidia.com;nvidia.com;nvidia.com;mit.edu;cs.washington.edu", "position": "PhD student;;Research Scientist;Research Scientist;Research Scientist;Postdoc;Full Professor", "bibtex": "@inproceedings{\nchen2022learning,\ntitle={Learning Robust Real-World Dexterous Grasping Policies via Implicit Shape Augmentation},\nauthor={Qiuyu Chen and Karl Van Wyk and Yu-Wei Chao and Wei Yang and Arsalan Mousavian and Abhishek Gupta and Dieter Fox},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=bUUf1CT1sNu}\n}", "github": "", "project": "", "reviewers": "RWRD;KGmi;kqPQ;gvHJ", "site": "https://openreview.net/forum?id=bUUf1CT1sNu", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17267494363471865136&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "University of Washington;NVIDIA;Massachusetts Institute of Technology;Unknown Institution", "aff_unique_dep": "Department of Computer Science;NVIDIA Corporation;;Department of Computer Science", "aff_unique_url": "https://www.washington.edu;https://www.nvidia.com;https://web.mit.edu;", "aff_unique_abbr": "UW;NVIDIA;MIT;", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "bdHkMjBJG_w", "title": "Do As I Can, Not As I Say: Grounding Language in Robotic Affordances", "track": "main", "status": "Oral", "tldr": "Grounding large language models through value function-based robotic affordances gives language models \"hands and eyes\" and allows long-horizon planning long-horizon tasks.", "abstract": "Large language models can encode a wealth of semantic knowledge about the world. Such knowledge could be extremely useful to robots aiming to act upon high-level, temporally extended instructions expressed in natural language. However, a significant weakness of language models is that they lack real-world experience, which makes it difficult to leverage them for decision making within a given embodiment. For example, asking a language model to describe how to clean a spill might result in a reasonable narrative, but it may not be applicable to a particular agent, such as a robot, that needs to perform this task in a particular environment. We propose to provide real-world grounding by means of pretrained skills, which are used to constrain the model to propose natural language actions that are both feasible and contextually appropriate. The robot can act as the language model\u2019s \u201chands and eyes,\u201d while the language model supplies high-level semantic knowledge about the task. We show how low-level skills can be combined with large language models so that the language model provides high-level knowledge about the procedures for performing complex and temporally extended instructions, while value functions associated with these skills provide the grounding necessary to connect this knowledge to a particular physical environment. We evaluate our method on a number of real-world robotic tasks, where we show the need for real-world grounding and that this approach is capable of completing long-horizon, abstract, natural language instructions on a mobile manipulator. The project\u2019s website, video, and open source can be found at say-can.github.io.", "keywords": "Large Language Models;Task and Motion Planning;Grounding Models;Mobile Manipulation;Reinforcement Learning;Imitation learning", "primary_area": "", "supplementary_material": "/attachment/ea4cb70d187ec724c6d479a01601bbfaaf1dcfc3.zip", "author": "brian ichter;Anthony Brohan;Yevgen Chebotar;Chelsea Finn;Karol Hausman;Alexander Herzog;Daniel Ho;Julian Ibarz;Alex Irpan;Eric Jang;Ryan Julian;Dmitry Kalashnikov;Sergey Levine;Yao Lu;Carolina Parada;Kanishka Rao;Pierre Sermanet;Alexander T Toshev;Vincent Vanhoucke;Fei Xia;Ted Xiao;Peng Xu;Mengyuan Yan;Noah Brown;Michael Ahn;Omar Cortes;Nicolas Sievers;Clayton Tan;Sichun Xu;Diego Reyes;Jarek Rettinghouse;Jornell Quiambao;Peter Pastor;Linda Luu;Kuang-Huei Lee;Yuheng Kuang;Sally Jesmonth;Kyle Jeffrey;Rosario Jauregui Ruano;Jasmine Hsu;Keerthana Gopalakrishnan;Byron David;Andy Zeng;Chuyuan Kelly Fu", "authorids": "~brian_ichter1;~Anthony_Brohan1;~Yevgen_Chebotar1;~Chelsea_Finn1;~Karol_Hausman2;~Alexander_Herzog2;~Daniel_Ho1;~Julian_Ibarz1;~Alex_Irpan1;~Eric_Jang1;~Ryan_Julian2;~Dmitry_Kalashnikov1;~Sergey_Levine1;~Yao_Lu13;~Carolina_Parada1;~Kanishka_Rao1;~Pierre_Sermanet1;~Alexander_T_Toshev1;~Vincent_Vanhoucke1;~Fei_Xia1;~Ted_Xiao1;~Peng_Xu9;~Mengyuan_Yan1;noahbrown@google.com;michaelahn@google.com;ocortes@google.com;nsievers@google.com;claytontan@google.com;sicxu@google.com;diegoreyes@google.com;jarekr@google.com;jornell@google.com;peterpastor@google.com;luulinda@google.com;~Kuang-Huei_Lee1;yuheng@google.com;sallyjesmonth@google.com;kylejeffrey@google.com;jaureguiruano@google.com;hellojas@google.com;keerthanapg@google.com;byrondavid@google.com;~Andy_Zeng3;fuchuyuan@google.com", "gender": ";M;M;F;;M;M;;M;M;M;;M;;;;;;M;M;M;M;F;;;;;;;;;;;;M;;;;;;;;;", "homepage": ";;;https://ai.stanford.edu/~cbfinn/;;;https://itsdanielho.com/;;http://www.alexirpan.com;http://evjang.com;https://ryanjulian.me;;https://people.eecs.berkeley.edu/~svlevine/;;;https://research.google/people/KanishkaRao/;https://sermanet.github.io/;;http://vincent.vanhoucke.com;;https://www.tedxiao.me;;;;;;;;;;;;;;https://kuanghuei.github.io/;;;;;;;;;", "dblp": ";;01/11424;131/1783;;;55/10982;;202/2063;190/7794;227/2645;222/2882;80/7594;26/5662-6;71/5134;;28/6457;;69/7157;;198/0598;;164/5672;;;;;;;;;;;;66/11466;;;;;;;;;", "google_scholar": "-w5DuHgAAAAJ;;ADkiClQAAAAJ;vfPE6hgAAAAJ;;jrfFYAIAAAAJ;i05Kw5cAAAAJ;;;Izhkp4YAAAAJ;8C2_ZVsAAAAJ;;8R35rCwAAAAJ;OI7zFmwAAAAJ;;;0nPi5YYAAAAJ;;T7uctwYAAAAJ;pqP5_PgAAAAJ;;460NWeQAAAAJ;https://scholar.google.com/citations?hl=en;;;;;;;;;;;;rE7-N30AAAAJ;;;;;;;;;", "orcid": ";;;;;;;;;;;;;;;;;;0000-0003-0544-2791;0000-0003-4343-1444;;;;;;;;;;;;;;;;;;;;;;;;", "linkedin": ";anthony-brohan-99782b36;;;;alexander-herzog-154030a5/;;;;;;;;;;;sermanet/;;vanhoucke;;;;;;;;;;;;;;;;;;;;;;;;;", "or_profile": "~brian_ichter1;~Anthony_Brohan1;~Yevgen_Chebotar1;~Chelsea_Finn1;~Karol_Hausman2;~Alexander_Herzog2;~Daniel_Ho1;~Julian_Ibarz1;~Alex_Irpan1;~Eric_Jang1;~Ryan_Julian2;~Dmitry_Kalashnikov1;~Sergey_Levine1;~Yao_Lu13;~Carolina_Parada1;~Kanishka_Rao1;~Pierre_Sermanet1;~Alexander_T_Toshev1;~Vincent_Vanhoucke1;~Fei_Xia1;~Ted_Xiao1;~Peng_Xu9;~Mengyuan_Yan1;noahbrown@google.com;michaelahn@google.com;ocortes@google.com;nsievers@google.com;claytontan@google.com;sicxu@google.com;diegoreyes@google.com;jarekr@google.com;jornell@google.com;peterpastor@google.com;luulinda@google.com;~Kuang-Huei_Lee1;yuheng@google.com;sallyjesmonth@google.com;kylejeffrey@google.com;jaureguiruano@google.com;hellojas@google.com;keerthanapg@google.com;byrondavid@google.com;~Andy_Zeng3;fuchuyuan@google.com", "aff": "Google;;Google;Google;;Google;Google;;Google DeepMind;Google;Google;Google;Google;Google;;;Google;;Google;Google;;Google;Google;;;;;;;;;;;;Google;;;;;;;;;", "aff_domain": "google.com;;google.com;google.com;;google.com;google.com;;google.com;google.com;google.com;google.com;google.com;google.com;;;google.com;;google.com;google.com;;google.com;google.com;;;;;;;;;;;;google.com;;;;;;;;;", "position": "Research Scientist;;Research Scientist;Research Scientist;;Researcher;Software Engineer;;Researcher;Researcher;Senior Research Software Engineer;Researcher;Research Scientist;Researcher;;;Research Scientist;;Principal Scientist;Researcher;;Researcher;Researcher;;;;;;;;;;;;Researcher;;;;;;;;;", "bibtex": "@inproceedings{\nichter2022do,\ntitle={Do As I Can, Not As I Say: Grounding Language in Robotic Affordances},\nauthor={brian ichter and Anthony Brohan and Yevgen Chebotar and Chelsea Finn and Karol Hausman and Alexander Herzog and Daniel Ho and Julian Ibarz and Alex Irpan and Eric Jang and Ryan Julian and Dmitry Kalashnikov and Sergey Levine and Yao Lu and Carolina Parada and Kanishka Rao and Pierre Sermanet and Alexander T Toshev and Vincent Vanhoucke and Fei Xia and Ted Xiao and Peng Xu and Mengyuan Yan and Noah Brown and Michael Ahn and Omar Cortes and Nicolas Sievers and Clayton Tan and Sichun Xu and Diego Reyes and Jarek Rettinghouse and Jornell Quiambao and Peter Pastor and Linda Luu and Kuang-Huei Lee and Yuheng Kuang and Sally Jesmonth and Kyle Jeffrey and Rosario Jauregui Ruano and Jasmine Hsu and Keerthana Gopalakrishnan and Byron David and Andy Zeng and Chuyuan Kelly Fu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=bdHkMjBJG_w}\n}", "github": "https://github.com/google-research/google-research/blob/master/saycan/SayCan-Robot-Pick-Place.ipynb", "project": "", "reviewers": "JdHL;W6E5;h3Ar;Hat5", "site": "https://openreview.net/forum?id=bdHkMjBJG_w", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 44, "corr_rating_confidence": 0, "gs_citation": 1747, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14998466456873496511&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "cAIIbdNAeNa", "title": "CAtNIPP: Context-Aware Attention-based Network for Informative Path Planning", "track": "main", "status": "Poster", "tldr": "Our context-aware neural framework for adaptive IPP allows a robot to sequence non-myopic decisions that can balance short-term exploitation with longer-term exploration, resulting in improved solution quality and drastically reduced planning times.", "abstract": "Informative path planning (IPP) is an NP-hard problem, which aims at planning a path allowing an agent to build an accurate belief about a quantity of interest throughout a given search domain, within constraints on resource budget (e.g., path length for robots with limited battery life). IPP requires frequent online replanning as this belief is updated with every new measurement (i.e., adaptive IPP), while balancing short-term exploitation and longer-term exploration to avoid suboptimal, myopic behaviors. Encouraged by the recent developments in deep reinforcement learning, we introduce CAtNIPP, a fully reactive, neural approach to the adaptive IPP problem. CAtNIPP relies on self-attention for its powerful ability to capture dependencies in data at multiple spatial scales. Specifically, our agent learns to form a context of its belief over the entire domain, which it uses to sequence local movement decisions that optimize short- and longer-term search objectives. We experimentally demonstrate that CAtNIPP significantly outperforms state-of-the-art non-learning IPP solvers in terms of solution quality and computing time once trained, and present experimental results on hardware.", "keywords": "deep RL;informative path planning;context-aware decision-making", "primary_area": "", "supplementary_material": "/attachment/958d020b8a31029b778d4c216a45c568835d7fdb.zip", "author": "Yuhong Cao;Yizhuo Wang;Apoorva Vashisth;Haolin Fan;Guillaume Adrien Sartoretti", "authorids": "~Yuhong_Cao1;~Yizhuo_Wang1;~Apoorva_Vashisth1;~Haolin_Fan1;~Guillaume_Adrien_Sartoretti1", "gender": "M;M;F;M;M", "homepage": ";https://github.com/wyzh98;;;https://marmotlab.org/", "dblp": ";;;;118/9066", "google_scholar": ";;_8HGrsAAAAAJ;;n7NzZ0sAAAAJ", "orcid": "0000-0001-8099-0689;;;;0000-0002-7579-9916", "linkedin": ";;apoorva-vashisth-5b421b173/?originalSubdomain=in;haolin-fan-526135219/;", "or_profile": "~Yuhong_Cao1;~Yizhuo_Wang1;~Apoorva_Vashisth1;~Haolin_Fan1;~Guillaume_Adrien_Sartoretti1", "aff": "National University of Singapore;National University of Singapore;Indian Institute of Technology Kharagpur;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;iitkgp.ac.in;u.nus.edu;nus.edu.sg", "position": "PhD student;Researcher;Undergrad student;MS student;Assistant Professor", "bibtex": "@inproceedings{\ncao2022catnipp,\ntitle={{CA}t{NIPP}: Context-Aware Attention-based Network for Informative Path Planning},\nauthor={Yuhong Cao and Yizhuo Wang and Apoorva Vashisth and Haolin Fan and Guillaume Adrien Sartoretti},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=cAIIbdNAeNa}\n}", "github": "https://github.com/marmotlab/CAtNIPP", "project": "", "reviewers": "HgVR;6v6E;8gWj;QZo2", "site": "https://openreview.net/forum?id=cAIIbdNAeNa", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6261042545995098635&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "National University of Singapore;Indian Institute of Technology Kharagpur", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.iitkgp.ac.in", "aff_unique_abbr": "NUS;IIT Kharagpur", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kharagpur", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Singapore;India" }, { "id": "cF1dxVGxic-", "title": "Embedding Synthetic Off-Policy Experience for Autonomous Driving via Zero-Shot Curricula", "track": "main", "status": "Oral", "tldr": "A zero-shot transfer of a difficulty model to generate training curricula for an AV planning agent using only 10% of the data outperforms training on full dataset.", "abstract": "ML-based motion planning is a promising approach to produce agents that exhibit complex behaviors, and automatically adapt to novel environments. In the context of autonomous driving, it is common to treat all available training data equally. However, this approach produces agents that do not perform robustly in safety-critical settings, an issue that cannot be addressed by simply adding more data to the training set -- we show that an agent trained using only a 10% subset of the data performs just as well as an agent trained on the entire dataset. We present a method to predict the inherent difficulty of a driving situation given data collected from a fleet of autonomous vehicles deployed on public roads. We then demonstrate that this difficulty score can be used in a zero-shot transfer to generate curricula for an imitation-learning based planning agent. Compared to training on the entire unbiased training dataset, we show that prioritizing difficult driving scenarios both reduces collisions by 15% and increases route adherence by 14% in closed-loop evaluation, all while using only 10% of the training data.", "keywords": "Imitation Learning;Curriculum Learning;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/4977d41d8902d323a44b341c52e586a93972f5d0.zip", "author": "Eli Bronstein;Sirish Srinivasan;Supratik Paul;Aman Sinha;Matthew O'Kelly;Payam Nikdel;Shimon Whiteson", "authorids": "~Eli_Bronstein1;~Sirish_Srinivasan1;~Supratik_Paul1;~Aman_Sinha1;~Matthew_O'Kelly2;~Payam_Nikdel1;~Shimon_Whiteson1", "gender": "M;M;M;M;M;;M", "homepage": "http://elibronstein.com/;;;https://amansinha.com;https://www.linkedin.com/in/pnikdel/;;http://www.mokelly.net", "dblp": ";260/6712;180/5515;;;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;198/6855", "google_scholar": "uQRY6KoAAAAJ;2yc-GDAAAAAJ;;ZCa4VDcAAAAJ;JZBqZzkAAAAJ;;EMrV8BIAAAAJ", "orcid": "0009-0002-6544-2988;;;;;;", "linkedin": "eli-bronstein;sirish-srinivasan/;;amans1nha;pnikdel/;;", "or_profile": "~Eli_Bronstein1;~Sirish_Srinivasan1;~Supratik_Paul1;~Aman_Sinha1;~Payam_Nikdel1;~Shimon_Whiteson1;~Matthew_Edward_O'Kelly1", "aff": "Waymo;Waymo Research;Waymo;Princeton University;Simon Fraser University;University of Oxford;Trustworthy AI", "aff_domain": "waymo.com;waymo.com;waymo.com;princeton.edu;cs.sfu.ca;ox.ac.uk;trustworthy.ai", "position": "Researcher;Researcher;Researcher;Undergrad student;PhD student;Professor;Researcher", "bibtex": "@inproceedings{\nbronstein2022embedding,\ntitle={Embedding Synthetic Off-Policy Experience for Autonomous Driving via Zero-Shot Curricula},\nauthor={Eli Bronstein and Sirish Srinivasan and Supratik Paul and Aman Sinha and Matthew O'Kelly and Payam Nikdel and Shimon Whiteson},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=cF1dxVGxic-}\n}", "github": "", "project": "", "reviewers": "4k9b;Dcm5;CdL1;fmkD", "site": "https://openreview.net/forum?id=cF1dxVGxic-", "pdf_size": 0, "rating": "1;6;6;10", "confidence": "", "rating_avg": 5.75, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9465722687980979954&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;2;3;4", "aff_unique_norm": "Waymo;Princeton University;Simon Fraser University;University of Oxford;Trustworthy AI", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.waymo.com;https://www.princeton.edu;https://www.sfu.ca;https://www.ox.ac.uk;", "aff_unique_abbr": "Waymo;Princeton;SFU;Oxford;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "United States;Canada;United Kingdom;" }, { "id": "d-JYso87y6s", "title": "Frame Mining: a Free Lunch for Learning Robotic Manipulation from 3D Point Clouds", "track": "main", "status": "Poster", "tldr": "By mining coordinate frames of input point clouds, we can significantly improve the training efficiency of 3D manipulation learning without changing camera placements or adding extra cameras.", "abstract": " We study how choices of input point cloud coordinate frames impact learning of manipulation skills from 3D point clouds. There exist a variety of coordinate frame choices to normalize captured robot-object-interaction point clouds. We find that different frames have a profound effect on agent learning performance, and the trend is similar across 3D backbone networks. In particular, the end-effector frame and the target-part frame achieve higher training efficiency than the commonly used world frame and robot-base frame in many tasks, intuitively because they provide helpful alignments among point clouds across time steps and thus can simplify visual module learning. Moreover, the well-performing frames vary across tasks, and some tasks may benefit from multiple frame candidates. We thus propose FrameMiners to adaptively select candidate frames and fuse their merits in a task-agnostic manner. Experimentally, FrameMiners achieves on-par or significantly higher performance than the best single-frame version on five fully physical manipulation tasks adapted from ManiSkill and OCRTOC. Without changing existing camera placements or adding extra cameras, point cloud frame mining can serve as a free lunch to improve 3D manipulation learning.", "keywords": "point cloud;coordinate frame;robot manipulation;3D;RL", "primary_area": "", "supplementary_material": "/attachment/f78f29dc52fc417c0408f908afd1fae95299d3e2.zip", "author": "Minghua Liu;Xuanlin Li;Zhan Ling;Yangyan Li;Hao Su", "authorids": "~Minghua_Liu1;~Xuanlin_Li1;~Zhan_Ling2;~Yangyan_Li1;~Hao_Su1", "gender": "M;;M;M;M", "homepage": "https://cseweb.ucsd.edu//~mil070/;https://xuanlinli17.github.io/;;https://yangyan.li/;http://ai.ucsd.edu/~haosu", "dblp": "28/8907;251/3029;254/1980;https://dblp.uni-trier.de/pid/50/8293;09/4945-1", "google_scholar": "6U3IGtEAAAAJ;7vyVxxQAAAAJ;vsRxnYAAAAAJ;9RxI7UAAAAAJ;1P8Zu04AAAAJ", "orcid": ";;;;", "linkedin": ";xuanlin-li-4684b8145/;zhan-ling-069a59149/;;", "or_profile": "~Minghua_Liu1;~Xuanlin_Li1;~Zhan_Ling2;~Yangyan_Li1;~Hao_Su1", "aff": "University of California, San Diego;University of California, San Diego;Qualcomm Inc, QualComm;Alibaba Group;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;qti.qualcomm.com;alibaba-inc.com;ucsd.edu", "position": "PhD student;PhD student;Intern;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nliu2022frame,\ntitle={Frame Mining: a Free Lunch for Learning Robotic Manipulation from 3D Point Clouds},\nauthor={Minghua Liu and Xuanlin Li and Zhan Ling and Yangyan Li and Hao Su},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=d-JYso87y6s}\n}", "github": "", "project": "", "reviewers": "LU2t;1Hcu;vXBE;aJn6", "site": "https://openreview.net/forum?id=d-JYso87y6s", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9590727167725251458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of California, San Diego;Qualcomm Incorporated;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.qualcomm.com;https://www.alibaba.com", "aff_unique_abbr": "UCSD;Qualcomm;Alibaba", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "eI8CZ2s267o", "title": "Learning Markerless Robot-Depth Camera Calibration and End-Effector Pose Estimation", "track": "main", "status": "Poster", "tldr": "A learning based system for extrinsic calibration between a depth camera and a robot arm without additional hardware, markers or simulation. ", "abstract": "Traditional approaches to extrinsic calibration use fiducial markers and learning-based approaches rely heavily on simulation data. In this work, we present a learning-based markerless extrinsic calibration system that uses a depth camera and does not rely on simulation data. We learn models for end-effector (EE) segmentation, single-frame rotation prediction and keypoint detection, from automatically generated real-world data. We use a transformation trick to get EE pose estimates from rotation predictions and a matching algorithm to get EE pose estimates from keypoint predictions. We further utilize the iterative closest point algorithm, multiple-frames, filtering and outlier detection to increase calibration robustness. Our evaluations with training data from multiple camera poses and test data from previously unseen poses give sub-centimeter and sub-deciradian average calibration and pose estimation errors. We also show that a carefully selected single training pose gives comparable results.", "keywords": "Camera Calibration;Pose Estimation;Perception", "primary_area": "", "supplementary_material": "/attachment/992ca5c38264fec8a02391f582372d017d044897.zip", "author": "Bugra Can Sefercik;Baris Akgun", "authorids": "~Bugra_Can_Sefercik1;~Baris_Akgun1", "gender": "M;M", "homepage": ";https://mysite.ku.edu.tr/baakgun/", "dblp": ";80/7656", "google_scholar": "cnULLNAAAAAJ;https://scholar.google.com.tw/citations?user=5sL0xZ4AAAAJ", "orcid": "0000-0001-8531-6829;", "linkedin": "bcsefercik/;", "or_profile": "~Bugra_Can_Sefercik1;~Baris_Akgun1", "aff": "Ko\u00e7 University;Koc University", "aff_domain": "ku.edu.tr;ku.edu.tr", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nsefercik2022learning,\ntitle={Learning Markerless Robot-Depth Camera Calibration and End-Effector Pose Estimation},\nauthor={Bugra Can Sefercik and Baris Akgun},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=eI8CZ2s267o}\n}", "github": "https://github.com/bcsefercik/robot-camera-calibration", "project": "", "reviewers": "5rEJ;2Nyt;HmKT;LhSL", "site": "https://openreview.net/forum?id=eI8CZ2s267o", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6690654021993148121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Ko\u00e7 University;Koc University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.edu.tr;https://www.koc.edu.tr", "aff_unique_abbr": "Ko\u00e7;Koc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "T\u00fcrkiye" }, { "id": "esOrVR_8-rc", "title": "Generalization with Lossy Affordances: Leveraging Broad Offline Data for Learning Visuomotor Tasks", "track": "main", "status": "Oral", "tldr": "A framework that leverages diverse offline data for learning representations, goal-conditioned policies, and affordance models that enable rapid fine-tuning to new tasks in target scenes.", "abstract": "The use of broad datasets has proven to be crucial for generalization for a wide range of fields. However, how to effectively make use of diverse multi-task data for novel downstream tasks still remains a grand challenge in reinforcement learning and robotics. To tackle this challenge, we introduce a framework that acquires goal-conditioned policies for unseen temporally extended tasks via offline reinforcement learning on broad data, in combination with online fine-tuning guided by subgoals in a learned lossy representation space. When faced with a novel task goal, our framework uses an affordance model to plan a sequence of lossy representations as subgoals that decomposes the original task into easier problems. Learned from the broad prior data, the lossy representation emphasizes task-relevant information about states and goals while abstracting away redundant contexts that hinder generalization. It thus enables subgoal planning for unseen tasks, provides a compact input to the policy, and facilitates reward shaping during fine-tuning. We show that our framework can be pre-trained on large-scale datasets of robot experience from prior work and efficiently fine-tuned for novel tasks, entirely from visual inputs without any manual reward engineering.", "keywords": "Reinforcement Learning;Representation Learning;Planning", "primary_area": "", "supplementary_material": "/attachment/1106aa6ba532d946c6bc0e87809182a3fd563361.zip", "author": "Kuan Fang;Patrick Yin;Ashvin Nair;Homer Rich Walke;Gengchen Yan;Sergey Levine", "authorids": "~Kuan_Fang3;~Patrick_Yin1;~Ashvin_Nair1;~Homer_Rich_Walke1;~Gengchen_Yan1;~Sergey_Levine1", "gender": ";M;M;M;M;M", "homepage": ";https://www.patrickyin.me/;http://ashvin.me/;https://homerwalke.com;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";319/3869.html;182/2436;279/6795;;80/7594", "google_scholar": ";https://scholar.google.com/citations?hl=en;BsOkXDsAAAAJ;ZWH5jCwAAAAJ;;8R35rCwAAAAJ", "orcid": ";;;;;", "linkedin": ";patrickhaoy/;;;gengchen-matt-yan/;", "or_profile": "~Kuan_Fang3;~Patrick_Yin1;~Ashvin_Nair1;~Homer_Rich_Walke1;~Gengchen_Yan1;~Sergey_Levine1", "aff": ";University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google", "aff_domain": ";berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;google.com", "position": ";Undergrad student;PhD student;PhD student;Undergrad student;Research Scientist", "bibtex": "@inproceedings{\nfang2022generalization,\ntitle={Generalization with Lossy Affordances: Leveraging Broad Offline Data for Learning Visuomotor Tasks},\nauthor={Kuan Fang and Patrick Yin and Ashvin Nair and Homer Rich Walke and Gengchen Yan and Sergey Levine},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=esOrVR_8-rc}\n}", "github": "", "project": "", "reviewers": "qYWX;xMTo;jYk6", "site": "https://openreview.net/forum?id=esOrVR_8-rc", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3554570665692236639&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "eyxfGTFZbNQ", "title": "Sim-to-Real via Sim-to-Seg: End-to-end Off-road Autonomous Driving Without Real Data", "track": "main", "status": "Poster", "tldr": "Sim-to-Real for end-to-end off-road autonomous driving without any real-world training data.", "abstract": "Autonomous driving is complex, requiring sophisticated 3D scene understanding, localization, mapping, and control. Rather than explicitly modelling and fusing each of these components, we instead consider an end-to-end approach via reinforcement learning (RL). However, collecting exploration driving data in the real world is impractical and dangerous. While training in simulation and deploying visual sim-to-real techniques has worked well for robot manipulation, deploying beyond controlled workspace viewpoints remains a challenge. In this paper, we address this challenge by presenting Sim2Seg, a re-imagining of RCAN that crosses the visual reality gap for off-road autonomous driving, without using any real-world data. This is done by learning to translate randomized simulation images into simulated segmentation and depth maps, subsequently enabling real-world images to also be translated. This allows us to train an end-to-end RL policy in simulation, and directly deploy in the real-world. Our approach, which can be trained in 48 hours on 1 GPU, can perform equally as well as a classical perception and control stack that took thousands of engineering hours over several months to build. We hope this work motivates future end-to-end autonomous driving research.", "keywords": "Sim-to-Real;Reinforcement Learning;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/af201f504ae718a4b2065c32cd59e2a32b5c3924.zip", "author": "John So;Amber Xie;Sunggoo Jung;Jeffrey Edlund;Rohan Thakker;Ali-akbar Agha-mohammadi;Pieter Abbeel;Stephen James", "authorids": "johnianrso@berkeley.edu;~Amber_Xie1;sunggoo.jung@jpl.nasa.gov;jeffrey.a.edlund@jpl.nasa.gov;rohan.a.thakker@jpl.nasa.gov;aliakbar.aghamohammadi@jpl.nasa.gov;~Pieter_Abbeel2;~Stephen_James1", "gender": ";;;;;;M;M", "homepage": ";;;;;;https://people.eecs.berkeley.edu/~pabbeel/;https://stepjam.github.io/", "dblp": ";;;;;;;163/5669", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;OXtG-isAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "johnianrso@berkeley.edu;~Amber_Xie1;sunggoo.jung@jpl.nasa.gov;jeffrey.a.edlund@jpl.nasa.gov;rohan.a.thakker@jpl.nasa.gov;aliakbar.aghamohammadi@jpl.nasa.gov;~Pieter_Abbeel2;~Stephen_James1", "aff": ";University of California, Berkeley;;;;;Covariant;University of California, Berkeley", "aff_domain": ";berkeley.edu;;;;;covariant.ai;berkeley.edu", "position": ";Undergrad student;;;;;Founder;Postdoc", "bibtex": "@inproceedings{\nso2022simtoreal,\ntitle={Sim-to-Real via Sim-to-Seg: End-to-end Off-road Autonomous Driving Without Real Data},\nauthor={John So and Amber Xie and Sunggoo Jung and Jeffrey Edlund and Rohan Thakker and Ali-akbar Agha-mohammadi and Pieter Abbeel and Stephen James},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=eyxfGTFZbNQ}\n}", "github": "https://github.com/rll-research/sim2seg", "project": "", "reviewers": "McEn;Vqbb;ACNP;aEoJ", "site": "https://openreview.net/forum?id=eyxfGTFZbNQ", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11270814142389704652&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Berkeley;Covariant", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "id": "fXMV2CEwNVo", "title": "SSL-Lanes: Self-Supervised Learning for Motion Forecasting in Autonomous Driving", "track": "main", "status": "Poster", "tldr": "We report the first systematic exploration of self-supervision with motion forecasting for autonomous driving.", "abstract": "Self-supervised learning (SSL) is an emerging technique that has been successfully employed to train convolutional neural networks (CNNs) and graph neural networks (GNNs) for more transferable, generalizable, and robust representation learning. However its potential in motion forecasting for autonomous driving has rarely been explored. In this study, we report the first systematic exploration and assessment of incorporating self-supervision into motion forecasting. We first propose to investigate four novel self-supervised learning tasks for motion forecasting with theoretical rationale and quantitative and qualitative comparisons on the challenging large-scale Argoverse dataset. Secondly, we point out that our auxiliary SSL-based learning setup not only outperforms forecasting methods which use transformers, complicated fusion mechanisms and sophisticated online dense goal candidate optimization algorithms in terms of performance accuracy, but also has low inference time and architectural complexity. Lastly, we conduct several experiments to understand why SSL improves motion forecasting.", "keywords": "Motion Forecasting;Autonomous Driving;Self-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/b56a2e446e58049eafb5b1ceda3d6b4a1985cdfe.zip", "author": "Prarthana Bhattacharyya;Chengjie Huang;Krzysztof Czarnecki", "authorids": "~Prarthana_Bhattacharyya1;c.huang@uwaterloo.ca;~Krzysztof_Czarnecki1", "gender": "F;;M", "homepage": "https://github.com/AutoVision-cloud;;https://uwaterloo.ca/waterloo-intelligent-systems-engineering-lab/people-profiles/krzysztof-czarnecki", "dblp": "209/1863;;72/6806", "google_scholar": "v6pGkNQAAAAJ;;https://scholar.google.ca/citations?user=ZzCpumQAAAAJ", "orcid": ";;0000-0003-1642-1101", "linkedin": "https://uk.linkedin.com/in/prarthana-bhattacharyya;;krzysztof-czarnecki-a3a266", "or_profile": "~Prarthana_Bhattacharyya1;c.huang@uwaterloo.ca;~Krzysztof_Czarnecki1", "aff": "University of Waterloo;;University of Waterloo", "aff_domain": "uwaterloo.ca;;uwaterloo.ca", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nbhattacharyya2022ssllanes,\ntitle={{SSL}-Lanes: Self-Supervised Learning for Motion Forecasting in Autonomous Driving},\nauthor={Prarthana Bhattacharyya and Chengjie Huang and Krzysztof Czarnecki},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=fXMV2CEwNVo}\n}", "github": "https://github.com/AutoVision-cloud/SSL-Lanes", "project": "", "reviewers": "QGzU;fptR;dvVA;b8zt", "site": "https://openreview.net/forum?id=fXMV2CEwNVo", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17325955768077844743&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "f_XmiyZcsjL", "title": "Reciprocal MIND MELD: Improving Learning From Demonstration via Personalized, Reciprocal Teaching", "track": "main", "status": "Poster", "tldr": "To reduce the problem of suboptimal demonstrations, we introduce Reciprocal MIND MELD which learns a semantically meaningful embedding space to provide actionable robotic feedback to demonstrators to improve upon the quality of their demonstrations.", "abstract": "Endowing robots with the ability to learn novel tasks via demonstrations will increase the accessibility of robots for non-expert, non-roboticists. However, research has shown that humans can be poor teachers, making it difficult for robots to effectively learn from humans. If the robot could instruct humans how to provide better demonstrations, then humans might be able to effectively teach a broader range of novel, out-of-distribution tasks. In this work, we introduce Reciprocal MIND MELD, a framework in which the robot learns the way in which a demonstrator is suboptimal and utilizes this information to provide feedback to the demonstrator to improve upon their demonstrations. We additionally develop an Embedding Predictor Network which learns to predict the demonstrator\u2019s suboptimality online without the need for optimal labels. In a series of human-subject experiments in a driving simulator domain, we demonstrate that robotic feedback can effectively improve human demonstrations in two dimensions of suboptimality (p < .001) and that robotic feedback translates into better learning outcomes for a robotic agent on novel tasks (p = .045).", "keywords": "meta-learning;personalization;imitation learning", "primary_area": "", "supplementary_material": "/attachment/324308e00f42f937317607c908a72abe093ef859.zip", "author": "Mariah L Schrum;Erin Hedlund-Botti;Matthew Gombolay", "authorids": "~Mariah_L_Schrum1;ehedlund6@gatech.edu;~Matthew_Gombolay1", "gender": "F;;M", "homepage": ";;https://core-robotics.gatech.edu/", "dblp": "237/8619;;144/1022", "google_scholar": "QuzrQzIAAAAJ;;Ihyz20wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mariah_L_Schrum1;ehedlund6@gatech.edu;~Matthew_Gombolay1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology", "aff_domain": "gatech.edu;;cc.gatech.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nschrum2022reciprocal,\ntitle={Reciprocal {MIND} {MELD}: Improving Learning From Demonstration via Personalized, Reciprocal Teaching},\nauthor={Mariah L Schrum and Erin Hedlund-Botti and Matthew Gombolay},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=f_XmiyZcsjL}\n}", "github": "", "project": "", "reviewers": "oD66;QQMM;w2jn;tR3N", "site": "https://openreview.net/forum?id=f_XmiyZcsjL", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9330510115326419497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "fnaMlJbRc4t", "title": "Interpretable Self-Aware Neural Networks for Robust Trajectory Prediction", "track": "main", "status": "Poster", "tldr": "We propose the use of evidential deep learning to perform one-shot epistemic uncertainty estimation over a low-dimensional, interpretable latent space in a trajectory prediction setting.", "abstract": "Although neural networks have seen tremendous success as predictive models in a variety of domains, they can be overly confident in their predictions on out-of-distribution (OOD) data. To be viable for safety-critical applications, like autonomous vehicles, neural networks must accurately estimate their epistemic or model uncertainty, achieving a level of system self-awareness. Techniques for epistemic uncertainty quantification often require OOD data during training or multiple neural network forward passes during inference. These approaches may not be suitable for real-time performance on high-dimensional inputs. Furthermore, existing methods lack interpretability of the estimated uncertainty, which limits their usefulness both to engineers for further system development and to downstream modules in the autonomy stack. We propose the use of evidential deep learning to estimate the epistemic uncertainty over a low-dimensional, interpretable latent space in a trajectory prediction setting. We introduce an interpretable paradigm for trajectory prediction that distributes the uncertainty among the semantic concepts: past agent behavior, road structure, and social context. We validate our approach on real-world autonomous driving data, demonstrating superior performance over state-of-the-art baselines.", "keywords": "autonomous vehicles;trajectory prediction;out-of-distribution detection;distribution shift;epistemic uncertainty estimation", "primary_area": "", "supplementary_material": "/attachment/5c5f95510ea00d19b44209cc986b91eb2c7f276c.zip", "author": "Masha Itkina;Mykel Kochenderfer", "authorids": "~Masha_Itkina1;~Mykel_Kochenderfer1", "gender": "F;M", "homepage": "https://mashaitkina.weebly.com/;https://mykel.kochenderfer.com", "dblp": "239/8541;34/2029.html", "google_scholar": "https://scholar.google.ca/citations?user=JAmTk5gAAAAJ;cAy9G6oAAAAJ", "orcid": ";0000-0002-7238-9663", "linkedin": "masha-itkina-3bb11a97/;mykel-kochenderfer", "or_profile": "~Masha_Itkina1;~Mykel_Kochenderfer1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nitkina2022interpretable,\ntitle={Interpretable Self-Aware Neural Networks for Robust Trajectory Prediction},\nauthor={Masha Itkina and Mykel Kochenderfer},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=fnaMlJbRc4t}\n}", "github": "https://github.com/sisl/InterpretableSelfAwarePrediction", "project": "", "reviewers": "UZiy;we25;WAfv;xJrB", "site": "https://openreview.net/forum?id=fnaMlJbRc4t", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=547650943545012570&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ft8IeFe4-8e", "title": "Manipulation via Membranes: High-Resolution and Highly Deformable Tactile Sensing and Control", "track": "main", "status": "Poster", "tldr": "Our method learns soft tactile sensor membrane deformation dynamics to control a grasped object\u2019s pose and force transmitted to the environment during contact-rich manipulation tasks such as drawing and in-hand pivoting.", "abstract": "Collocated tactile sensing is a fundamental enabling technology for dexterous manipulation. However, deformable sensors introduce complex dynamics between the robot, grasped object, and environment that must be considered for fine manipulation. Here, we propose a method to learn soft tactile sensor membrane dynamics that accounts for sensor deformations caused by the physical interaction between the grasped object and environment. Our method combines the perceived 3D geometry of the membrane with proprioceptive reaction wrenches to predict future deformations conditioned on robot action. Grasped object poses are recovered from membrane geometry and reaction wrenches, decoupling interaction dynamics from the tactile observation model. We benchmark our approach on two real-world contact-rich tasks: drawing with a grasped marker and in-hand pivoting. Our results suggest that explicitly modeling membrane dynamics achieves better task performance and generalization to unseen objects than baselines.", "keywords": "manipulation;tactile control;deformable tactile sensors", "primary_area": "", "supplementary_material": "/attachment/0fddb9f87d7bd46403631f19658582c4323d6440.zip", "author": "Miquel Oller;Mireia Planas i Lisbona;Dmitry Berenson;Nima Fazeli", "authorids": "~Miquel_Oller1;mireiap@umich.edu;~Dmitry_Berenson1;~Nima_Fazeli1", "gender": ";;M;", "homepage": ";;http://web.eecs.umich.edu/~dmitryb/;https://www.mmintlab.com", "dblp": ";;;", "google_scholar": "N8LKz0kAAAAJ;;x-n9rIMAAAAJ;", "orcid": ";;0000-0002-9712-109X;", "linkedin": ";;;", "or_profile": "~Miquel_Oller1;mireiap@umich.edu;~Dmitry_Berenson1;~Nima_Fazeli1", "aff": "University of Michigan - Ann Arbor;;University of Michigan;University of Michigan", "aff_domain": "umich.edu;;umich.edu;umich.edu", "position": "PhD student;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\noller2022manipulation,\ntitle={Manipulation via Membranes: High-Resolution and Highly Deformable Tactile Sensing and Control},\nauthor={Miquel Oller and Mireia Planas i Lisbona and Dmitry Berenson and Nima Fazeli},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ft8IeFe4-8e}\n}", "github": "https://github.com/MMintLab/manipulation_via_membranes", "project": "", "reviewers": "rK46;ke9K;gpA9;xvAZ", "site": "https://openreview.net/forum?id=ft8IeFe4-8e", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=948002130693704048&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "gJhuiYQ6VGJ", "title": "Deep Black-Box Reinforcement Learning with Movement Primitives", "track": "main", "status": "Poster", "tldr": "A novel algorithm for deep episode-based reinforcement learning", "abstract": "Episode-based reinforcement learning (ERL) algorithms treat reinforcement learning (RL) as a black-box optimization problem where we learn to select a parameter vector of a controller, often represented as a movement primitive, for a given task descriptor called a context. ERL offers several distinct benefits in comparison to step-based RL. It generates smooth control trajectories, can handle non-Markovian reward definitions, and the resulting exploration in parameter space is well suited for solving sparse reward settings. Yet, the high dimensionality of the movement primitive parameters has so far hampered the effective use of deep RL methods. In this paper, we present a new algorithm for deep ERL. It is based on differentiable trust region layers, a successful on-policy deep RL algorithm. These layers allow us to specify trust regions for the policy update that are solved exactly for each state using convex optimization, which enables policies learning with the high precision required for the ERL. We compare our ERL algorithm to state-of-the-art step-based algorithms in many complex simulated robotic control tasks. In doing so, we investigate different reward formulations - dense, sparse, and non-Markovian. While step-based algorithms perform well only on dense rewards, ERL performs favorably on sparse and non-Markovian rewards. Moreover, our results show that the sparse and the non-Markovian rewards are also often better suited to define the desired behavior, allowing us to obtain considerably higher quality policies compared to step-based RL.", "keywords": "Movement Primitives;Episode-Based;Black-Box;Deep Reinforcement Learning;Trust Regions", "primary_area": "", "supplementary_material": "/attachment/7bf1bb03e7f9e4f393fb4b123f0c0828420e9132.zip", "author": "Fabian Otto;Onur Celik;Hongyi Zhou;Hanna Ziesche;Vien Anh Ngo;Gerhard Neumann", "authorids": "~Fabian_Otto1;~Onur_Celik1;uokad@student.kit.edu;~Hanna_Ziesche1;~Vien_Anh_Ngo1;~Gerhard_Neumann2", "gender": ";M;;;M;", "homepage": ";https://alr.anthropomatik.kit.edu/21_69.php;;;https://vienngo.github.io;", "dblp": "284/0547;243/5913;;;87/439;", "google_scholar": "dV8eLH8AAAAJ;9jqaTcAAAAAJ;;;https://scholar.google.co.uk/citations?user=xk1gsM8AAAAJ;", "orcid": "0000-0003-3484-1054;;;;;", "linkedin": "ottofabian/;;;;;", "or_profile": "~Fabian_Otto1;~Onur_Celik1;uokad@student.kit.edu;~Hanna_Ziesche1;~Vien_Anh_Ngo1;~Gerhard_Neumann2", "aff": "Bosch Center for AI;Karlsruhe Institute of Technology;;;Bosch Center for Artificial Intelligence;", "aff_domain": "bosch.com;kit.edu;;;bosch.com;", "position": "PhD student;PhD student;;;Research Scientist;", "bibtex": "@inproceedings{\notto2022deep,\ntitle={Deep Black-Box Reinforcement Learning with Movement Primitives},\nauthor={Fabian Otto and Onur Celik and Hongyi Zhou and Hanna Ziesche and Vien Anh Ngo and Gerhard Neumann},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=gJhuiYQ6VGJ}\n}", "github": "", "project": "", "reviewers": "Rejd;xpuk;FPE5;3iLp", "site": "https://openreview.net/forum?id=gJhuiYQ6VGJ", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14658581549259649398&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Bosch Center for AI;Karlsruhe Institute of Technology;Bosch Center for Artificial Intelligence", "aff_unique_dep": "Center for AI;;Center for Artificial Intelligence", "aff_unique_url": "https://www.bosch-ai.com;https://www.kit.edu;https://www.bosch-ai.com", "aff_unique_abbr": "BCAI;KIT;BCAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "gOW9pdOeqUc", "title": "Lidar Line Selection with Spatially-Aware Shapley Value for Cost-Efficient Depth Completion", "track": "main", "status": "Poster", "tldr": "To reduce the cost of depth completion, we propose a spatially-aware Shapley value scheme for selecting the most important lines of a lidar ", "abstract": "Lidar is a vital sensor for estimating the depth of a scene. Typical spinning lidars emit pulses arranged in several horizontal lines and the monetary cost of the sensor increases with the number of these lines. In this work, we present the new problem of optimizing the positioning of lidar lines to find the most effective configuration for the depth completion task. We propose a solution to reduce the number of lines while retaining the up-to-the-mark quality of depth completion. Our method consists of two components, (1) line selection based on the marginal contribution of a line computed via the Shapley value and (2) incorporating line position spread to take into account its need to arrive at image-wide depth completion. Spatially-aware Shapley values (SaS) succeed in selecting line subsets that yield a depth accuracy comparable to the full lidar input while using just half of the lines.", "keywords": "Lidar;Shapley value;feature selection;depth completion", "primary_area": "", "supplementary_material": "/attachment/a4c9df853fa1e52a91ea4e9ea4fcc4ccabe6105c.zip", "author": "Kamil Adamczewski;Christos Sakaridis;Vaishakh Patil;Luc Van Gool", "authorids": "~Kamil_Adamczewski1;~Christos_Sakaridis1;patil@vision.ee.ethz.ch;~Luc_Van_Gool1", "gender": "M;;;", "homepage": ";https://people.ee.ethz.ch/~csakarid/;;", "dblp": "150/5954;188/5858;;61/5017", "google_scholar": "https://scholar.google.pl/citations?user=O30Xj14AAAAJ;gyF5LmoAAAAJ;;https://scholar.google.be/citations?user=TwMib_QAAAAJ", "orcid": ";0000-0003-1127-8887;;", "linkedin": ";;;", "or_profile": "~Kamil_Adamczewski1;~Christos_Sakaridis1;patil@vision.ee.ethz.ch;~Luc_Van_Gool1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;ETHZ - ETH Zurich;;KU Leuven", "aff_domain": "tuebingen.mpg.de;ethz.ch;;kuleuven.be", "position": "PhD student;Postdoc;;Emeritus", "bibtex": "@inproceedings{\nadamczewski2022lidar,\ntitle={Lidar Line Selection with Spatially-Aware Shapley Value for Cost-Efficient Depth Completion},\nauthor={Kamil Adamczewski and Christos Sakaridis and Vaishakh Patil and Luc Van Gool},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=gOW9pdOeqUc}\n}", "github": "", "project": "", "reviewers": "xfBr;3dHW;6id7", "site": "https://openreview.net/forum?id=gOW9pdOeqUc", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10515548460351939940&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;ETH Zurich;Katholieke Universiteit Leuven", "aff_unique_dep": "Intelligent Systems;;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.ethz.ch;https://www.kuleuven.be", "aff_unique_abbr": "MPI-IS;ETHZ;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Germany;Switzerland;Belgium" }, { "id": "gUtbYFHluAg", "title": "SE(3)-Equivariant Point Cloud-Based Place Recognition", "track": "main", "status": "Poster", "tldr": "We propose a place recognition framework that exploits SE(3)-invariant features learned from 3D point clouds to improve robustness and generalizibility.", "abstract": "This paper reports on a new 3D point cloud-based place recognition framework that uses SE(3)-equivariant networks to learn SE(3)-invariant global descriptors. We discover that, unlike existing methods, learned SE(3)-invariant global descriptors are more robust to matching inaccuracy and failure in severe rotation and translation configurations. Mobile robots undergo arbitrary rotational and translational movements. The SE(3)-invariant property ensures that the learned descriptors are robust to the rotation and translation changes in the robot pose and can represent the intrinsic geometric information of the scene. Furthermore, we have discovered that the attention module aids in the enhancement of performance while allowing significant downsampling. We evaluate the performance of the proposed framework on real-world data sets. The experimental results show that the proposed framework outperforms state-of-the-art baselines in various metrics, leading to a reliable point cloud-based place recognition network. We have open-sourced our code at: https://github.com/UMich-CURLY/se3_equivariant_place_recognition.", "keywords": "Place Recognition;SE(3)-Invariant;Equivariant Representation Learning;3D Point Clouds", "primary_area": "", "supplementary_material": "", "author": "Chien Erh Lin;Jingwei Song;Ray Zhang;Minghan Zhu;Maani Ghaffari", "authorids": "~Chien_Erh_Lin1;jingweso@umich.edu;rzh@umich.edu;~Minghan_Zhu1;~Maani_Ghaffari1", "gender": "F;;;Not Specified;M", "homepage": "https://sites.google.com/view/chien-erh-lin;;;;https://curly.engin.umich.edu/", "dblp": "280/3510;;;255/5003;", "google_scholar": "ySuZAF0AAAAJ;;;70CbUXwAAAAJ;l2jdSb8AAAAJ", "orcid": "0000-0001-6946-5920;;;0000-0002-0145-7542;0000-0002-4734-4295", "linkedin": "chien-erh-lin/;;;;maani-ghaffari-19b017203/", "or_profile": "~Chien_Erh_Lin1;jingweso@umich.edu;rzh@umich.edu;~Minghan_Zhu1;~Maani_Ghaffari1", "aff": "University of Michigan;;;University of Michigan;University of Michigan", "aff_domain": "umich.edu;;;umich.edu;umich.edu", "position": "PhD student;;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlin2022seequivariant,\ntitle={{SE}(3)-Equivariant Point Cloud-Based Place Recognition},\nauthor={Chien Erh Lin and Jingwei Song and Ray Zhang and Minghan Zhu and Maani Ghaffari},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=gUtbYFHluAg}\n}", "github": "https://github.com/UMich-CURLY/se3_equivariant_place_recognition", "project": "", "reviewers": "GJ8U;xfgJ;cMpG;kN1U", "site": "https://openreview.net/forum?id=gUtbYFHluAg", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7921018530766062616&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "h0Yb0U_-Tki", "title": "Instruction-driven history-aware policies for robotic manipulations", "track": "main", "status": "Oral", "tldr": "A new model to solve robotics tasks using instructions, multi-views and history", "abstract": "In human environments, robots are expected to accomplish a variety of manipulation tasks given simple natural language instructions. Yet, robotic manipulation is extremely challenging as it requires fine-grained motor control, long-term memory as well as generalization to previously unseen tasks and environments. To address these challenges, we propose a unified transformer-based approach that takes into account multiple inputs. In particular, our transformer architecture integrates (i) natural language instructions and (ii) multi-view scene observations while (iii) keeping track of the full history of observations and actions. Such an approach enables learning dependencies between history and instructions and improves manipulation precision using multiple views. We evaluate our method on the challenging RLBench benchmark and on a real-world robot. Notably, our approach scales to 74 diverse RLBench tasks and outperforms the state of the art. We also address instruction-conditioned tasks and demonstrate excellent generalization to previously unseen variations. ", "keywords": "Robotics Manipulation;Language Instruction;Transformer", "primary_area": "", "supplementary_material": "/attachment/41708139e90946d9a0e94812376f4d7bf48a03f3.zip", "author": "Pierre-Louis Guhur;Shizhe Chen;Ricardo Garcia Pinel;Makarand Tapaswi;Ivan Laptev;Cordelia Schmid", "authorids": "~Pierre-Louis_Guhur1;~Shizhe_Chen1;~Ricardo_Garcia_Pinel1;~Makarand_Tapaswi1;~Ivan_Laptev1;~Cordelia_Schmid1", "gender": "M;F;M;;M;F", "homepage": ";https://cshizhe.github.io/;https://rjgpinel.github.io/;https://makarandtapaswi.github.io/;https://www.di.ens.fr/~laptev/;https://cordeliaschmid.github.io/", "dblp": "184/0930;153/0734;304/1714;69/1484;41/1854;s/CordeliaSchmid", "google_scholar": "https://scholar.google.fr/citations?user=mkdVWLwAAAAJ;wZhRRy0AAAAJ;cMA5vJwAAAAJ;rJotb-YAAAAJ;https://scholar.google.com.tw/citations?user=-9ifK0cAAAAJ;IvqCXP4AAAAJ", "orcid": ";;0000-0002-2553-7272;0000-0001-8800-9015;;", "linkedin": "guhur/;;rjgpinel;makarand-tapaswi/;;cordelia-schmid-47985a9", "or_profile": "~Pierre-Louis_Guhur1;~Shizhe_Chen1;~Ricardo_Garcia_Pinel1;~Makarand_Tapaswi1;~Ivan_Laptev1;~Cordelia_Schmid1", "aff": "INRIA;INRIA;INRIA;Wadhwani Institute for Artificial Intelligence;INRIA Paris;Inria", "aff_domain": "inria.fr;inria.fr;inria.fr;wadhwaniai.org;inria.fr;inria.fr", "position": "PhD student;Postdoc;PhD student;Principal ML Scientist;Senior Researcher;Researcher", "bibtex": "@inproceedings{\nguhur2022instructiondriven,\ntitle={Instruction-driven history-aware policies for robotic manipulations},\nauthor={Pierre-Louis Guhur and Shizhe Chen and Ricardo Garcia Pinel and Makarand Tapaswi and Ivan Laptev and Cordelia Schmid},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=h0Yb0U_-Tki}\n}", "github": "https://github.com/guhur/hiveformer", "project": "", "reviewers": "R4uU;G4rm;yM9G", "site": "https://openreview.net/forum?id=h0Yb0U_-Tki", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14138618304214578498&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "INRIA;Wadhwani Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.wadhwani-ai.org", "aff_unique_abbr": "INRIA;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "France;India" }, { "id": "h5g_VDJJfbt", "title": "Learning Generalizable Dexterous Manipulation from Human Grasp Affordance", "track": "main", "status": "Poster", "tldr": "We propose a pipeline that generates large-scale demonstrations for dexterous manipulation and an imitation learning algorithm that generalizes to novel objects absent during training.", "abstract": "Dexterous manipulation with a multi-finger hand is one of the most challenging problems in robotics. While recent progress in imitation learning has largely improved the sample efficiency compared to Reinforcement Learning, the learned policy can hardly generalize to manipulate novel objects, given limited expert demonstrations. In this paper, we propose to learn dexterous manipulation using large-scale demonstrations with diverse 3D objects in a category, which are generated from a human grasp affordance model. This generalizes the policy to novel object instances within the same category. To train the policy, we propose a novel imitation learning objective jointly with a geometric representation learning objective using our demonstrations. By experimenting with relocating diverse objects in simulation, we show that our approach outperforms baselines with a large margin when manipulating novel objects. We also ablate the importance of 3D object representation learning for manipulation. We include videos and code on the project website: https://kristery.github.io/ILAD/ .", "keywords": "Generalized policy learning;Dexterous manipulation;Affordance model;Reinforcement learning;Imitation learning", "primary_area": "", "supplementary_material": "/attachment/4aa8095f3821d4143553692eb9dae9b0eade23ea.zip", "author": "Yueh-Hua Wu;Jiashun Wang;Xiaolong Wang", "authorids": "~Yueh-Hua_Wu1;~Jiashun_Wang1;~Xiaolong_Wang3", "gender": ";M;M", "homepage": ";https://jiashunwang.github.io/;https://xiaolonw.github.io/", "dblp": ";260/6495;91/952-4", "google_scholar": ";gdO9Gb0AAAAJ;Y8O9N_0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yueh-Hua_Wu1;~Jiashun_Wang1;~Xiaolong_Wang3", "aff": ";University of California, San Diego;University of California, San Diego", "aff_domain": ";ucsd.edu;ucsd.edu", "position": ";MS student;Assistant Professor", "bibtex": "@inproceedings{\nwu2022learning,\ntitle={Learning Generalizable Dexterous Manipulation from Human Grasp Affordance},\nauthor={Yueh-Hua Wu and Jiashun Wang and Xiaolong Wang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=h5g_VDJJfbt}\n}", "github": "https://github.com/kristery/dex-affordance", "project": "", "reviewers": "nXCy;WeMh;p8H4", "site": "https://openreview.net/forum?id=h5g_VDJJfbt", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6964214718001806263&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "hW0tcXOJas2", "title": "Multi-Robot Scene Completion: Towards Task-Agnostic Collaborative Perception", "track": "main", "status": "Poster", "tldr": "", "abstract": "Collaborative perception learns how to share information among multiple robots to perceive the environment better than individually done. Past research on this has been task-specific, such as detection or segmentation. Yet this leads to different information sharing for different tasks, hindering the large-scale deployment of collaborative perception. We propose the first task-agnostic collaborative perception paradigm that learns a single collaboration module in a self-supervised manner for different downstream tasks. This is done by a novel task termed multi-robot scene completion, where each robot learns to effectively share information for reconstructing a complete scene viewed by all robots. Moreover, we propose a spatiotemporal autoencoder (STAR) that amortizes over time the communication cost by spatial sub-sampling and temporal mixing. Extensive experiments validate our method's effectiveness on scene completion and collaborative perception in autonomous driving scenarios. Our code is available at https://coperception.github.io/star/.", "keywords": "Multi-Robot Perception;Scene Completion;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/bd3d0f0b38369e1883d81fa5332b43bef088caf6.zip", "author": "Yiming Li;Juexiao Zhang;Dekun Ma;Yue Wang;Chen Feng", "authorids": "~Yiming_Li2;~Juexiao_Zhang1;~Dekun_Ma1;~Yue_Wang2;~Chen_Feng2", "gender": "M;M;M;M;M", "homepage": "https://yimingli-page.github.io/;https://juexzz.github.io/;https://dekun.me;https://yuewang.xyz;https://ai4ce.github.io/", "dblp": "l/YimingLi-3;250/9589;334/3464;33/4822-41;01/161-2", "google_scholar": "https://scholar.google.com/citations?hl=en;TYxPbcEAAAAJ;2tGmkhoAAAAJ;v-AEFIEAAAAJ;YeG8ZM0AAAAJ", "orcid": "0000-0002-0157-6218;;;;0000-0003-3211-1576", "linkedin": "yiming-li-58b519173/;juexiao-zhang-788453146/Juexiao-Zhang;dekunma/;;simbaforrest/", "or_profile": "~Yiming_Li2;~Juexiao_Zhang1;~Dekun_Ma1;~Yue_Wang2;~Chen_Feng2", "aff": "New York University;New York University;New York University;Massachusetts Institute of Technology;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;mit.edu;nyu.edu", "position": "PhD student;MS student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2022multirobot,\ntitle={Multi-Robot Scene Completion: Towards Task-Agnostic Collaborative Perception},\nauthor={Yiming Li and Juexiao Zhang and Dekun Ma and Yue Wang and Chen Feng},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=hW0tcXOJas2}\n}", "github": "https://coperception.github.io/star/", "project": "", "reviewers": "vXCo;MFnL;uiAy;8X6f", "site": "https://openreview.net/forum?id=hW0tcXOJas2", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3306550437868823358&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "New York University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://web.mit.edu", "aff_unique_abbr": "NYU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "iM932PeUi_7", "title": "Decentralized Data Collection for Robotic Fleet Learning: A Game-Theoretic Approach", "track": "main", "status": "Poster", "tldr": "", "abstract": "Fleets of networked autonomous vehicles (AVs) collect terabytes of sensory data, which is often transmitted to central servers (the ``cloud'') for training machine learning (ML) models. Ideally, these fleets should upload all their data, especially from rare operating contexts, in order to train robust ML models. However, this is infeasible due to prohibitive network bandwidth and data labeling costs. Instead, we propose a cooperative data sampling strategy where geo-distributed AVs collaborate to collect a diverse ML training dataset in the cloud. Since the AVs have a shared objective but minimal information about each other's local data distribution and perception model, we can naturally cast cooperative data collection as an $N$-player mathematical game. We show that our cooperative sampling strategy uses minimal information to converge to a centralized oracle policy with complete information about all AVs. Moreover, we theoretically characterize the performance benefits of our game-theoretic strategy compared to greedy sampling. Finally, we experimentally demonstrate that our method outperforms standard benchmarks by up to $21.9\\%$ on 4 perception datasets, including for autonomous driving in adverse weather conditions. Crucially, our experimental results on real-world datasets closely align with our theoretical guarantees.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/beaedfbfaa07b2cffaf5fafa85bed9ec75236ccc.zip", "author": "Oguzhan Akcin;Po-han Li;Shubhankar Agarwal;Sandeep P. Chinchali", "authorids": "~Oguzhan_Akcin2;~Po-han_Li1;~Shubhankar_Agarwal1;~Sandeep_P._Chinchali1", "gender": "M;M;;", "homepage": ";https://d31003.github.io/;;", "dblp": "311/3023;311/3416;;", "google_scholar": "2elIEXoAAAAJ;x0WbtmoAAAAJ;;", "orcid": ";;;", "linkedin": "oguzhan-akcin-0907/;po-han-li-9760161bb/;;", "or_profile": "~Oguzhan_Akcin2;~Po-han_Li1;~Shubhankar_Agarwal1;~Sandeep_P._Chinchali1", "aff": "The University of Texas at Austin;University of Texas, Austin;;", "aff_domain": "utexas.edu;utexas.edu;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nakcin2022decentralized,\ntitle={Decentralized Data Collection for Robotic Fleet Learning: A Game-Theoretic Approach},\nauthor={Oguzhan Akcin and Po-han Li and Shubhankar Agarwal and Sandeep P. Chinchali},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=iM932PeUi_7}\n}", "github": "", "project": "", "reviewers": "hwcw;YAM6;scJ6;2cKZ", "site": "https://openreview.net/forum?id=iM932PeUi_7", "pdf_size": 0, "rating": "1;6;6;6", "confidence": "", "rating_avg": 4.75, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7920400168049286810&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "iRabxvK3j0", "title": "Eliciting Compatible Demonstrations for Multi-Human Imitation Learning", "track": "main", "status": "Poster", "tldr": "We introduce an approach for measuring the compatibility between a base policy and a given user demonstration. We use this compatibility measure to actively elicit demonstrations from multiple humans to improve performance on manipulation tasks.", "abstract": "Imitation learning from human-provided demonstrations is a strong approach for learning policies for robot manipulation. While the ideal dataset for imitation learning is homogenous and low-variance - reflecting a single, optimal method for performing a task - natural human behavior has a great deal of heterogeneity, with several optimal ways to demonstrate a task. This multimodality is inconsequential to human users, with task variations manifesting as subconscious choices; for example, reaching down, then across to grasp an object, versus reaching across, then down. Yet, this mismatch presents a problem for interactive imitation learning, where sequences of users improve on a policy by iteratively collecting new, possibly conflicting demonstrations. To combat this problem of demonstrator incompatibility, this work designs an approach for 1) measuring the compatibility of a new demonstration given a base policy, and 2) actively eliciting more compatible demonstrations from new users. Across two simulation tasks requiring long-horizon, dexterous manipulation and a real-world ``food plating'' task with a Franka Emika Panda arm, we show that we can both identify incompatible demonstrations via post-hoc filtering, and apply our compatibility measure to actively elicit compatible demonstrations from new users, leading to improved task success rates across simulated and real environments.", "keywords": "Interactive Imitation Learning;Active Demonstration Elicitation;Human Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/582a81342995dac4ab6e1945a677cff00be8f20e.zip", "author": "Kanishk Gandhi;Siddharth Karamcheti;Madeline Liao;Dorsa Sadigh", "authorids": "~Kanishk_Gandhi1;~Siddharth_Karamcheti1;~Madeline_Liao1;~Dorsa_Sadigh1", "gender": "M;F;F;M", "homepage": "http://siddkaramcheti.com/;https://madelineliao.com;https://dorsa.fyi/;https://kanishkgandhi.com", "dblp": "199/1922;;117/3174;243/5820", "google_scholar": "L5v2PHAAAAAJ;ISZALOAAAAAJ;ZaJEZpYAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Siddharth_Karamcheti1;~Madeline_Liao1;~Dorsa_Sadigh1;~Kanishk_V_Gandhi1", "aff": "Stanford University;Computer Science Department, Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;MS student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ngandhi2022eliciting,\ntitle={Eliciting Compatible Demonstrations for Multi-Human Imitation Learning},\nauthor={Kanishk Gandhi and Siddharth Karamcheti and Madeline Liao and Dorsa Sadigh},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=iRabxvK3j0}\n}", "github": "", "project": "", "reviewers": "tzdZ;Esih;hqZQ", "site": "https://openreview.net/forum?id=iRabxvK3j0", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16560855054225227929&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "iVxy2eO601U", "title": "Skill-based Model-based Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Model-based reinforcement learning (RL) is a sample-efficient way of learning complex behaviors by leveraging a learned single-step dynamics model to plan actions in imagination. However, planning every action for long-horizon tasks is not practical, akin to a human planning out every muscle movement. Instead, humans efficiently plan with high-level skills to solve complex tasks. From this intuition, we propose a Skill-based Model-based RL framework (SkiMo) that enables planning in the skill space using a skill dynamics model, which directly predicts the skill outcomes, rather than predicting all small details in the intermediate states, step by step. For accurate and efficient long-term planning, we jointly learn the skill dynamics model and a skill repertoire from prior experience. We then harness the learned skill dynamics model to accurately simulate and plan over long horizons in the skill space, which enables efficient downstream learning of long-horizon, sparse reward tasks. Experimental results in navigation and manipulation domains show that SkiMo extends the temporal horizon of model-based approaches and improves the sample efficiency for both model-based RL and skill-based RL. Code and videos are available at https://clvrai.com/skimo", "keywords": "Model-Based Reinforcement Learning;Skill Dynamics Model", "primary_area": "", "supplementary_material": "/attachment/a46f7bd377d12b9b516b44a313d779b4dd875fdb.zip", "author": "Lucy Xiaoyang Shi;Joseph J Lim;Youngwoon Lee", "authorids": "~Lucy_Xiaoyang_Shi1;~Joseph_J_Lim1;~Youngwoon_Lee1", "gender": "F;M;M", "homepage": "https://lucys0.github.io/;http://people.csail.mit.edu/lim/;https://youngwoon.github.io", "dblp": "324/5129;08/3086;117/4767", "google_scholar": ";jTnQTBoAAAAJ;CDPa3AgAAAAJ", "orcid": ";;0000-0001-9918-1056", "linkedin": "lucy-xiaoyang-shi/;;", "or_profile": "~Lucy_Xiaoyang_Shi1;~Joseph_J_Lim1;~Youngwoon_Lee1", "aff": "University of Southern California;Korea Advanced Institute of Science & Technology;University of Southern California", "aff_domain": "usc.edu;kaist.ac.kr;usc.edu", "position": "Undergrad student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nshi2022skillbased,\ntitle={Skill-based Model-based Reinforcement Learning},\nauthor={Lucy Xiaoyang Shi and Joseph J Lim and Youngwoon Lee},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=iVxy2eO601U}\n}", "github": "https://github.com/clvrai/skimo", "project": "", "reviewers": "43gq;ctWV;gSBE;6Ygv", "site": "https://openreview.net/forum?id=iVxy2eO601U", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13425458595968178952&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Southern California;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.kaist.ac.kr", "aff_unique_abbr": "USC;KAIST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;South Korea" }, { "id": "jTh3rdEF3LH", "title": "HUM3DIL: Semi-supervised Multi-modal 3D HumanPose Estimation for Autonomous Driving", "track": "main", "status": "Poster", "tldr": "3D Human Pose estimation from RGB + LiDAR data, for autonomous driving. ", "abstract": "Autonomous driving is an exciting new industry, posing important research questions. Within the perception module, 3D human pose estimation is an emerging technology, which can enable the autonomous vehicle to perceive and understand the subtle and complex behaviors of pedestrians. While hardware systems and sensors have dramatically improved over the decades -- with cars potentially boasting complex LiDAR and vision systems and with a growing expansion of the available body of dedicated datasets for this newly available information -- not much work has been done to harness these novel signals for the core problem of 3D human pose estimation. Our method, which we coin HUM3DIL (HUMan 3D from Images and LiDAR), efficiently uses of these complementary signals, in a semi-supervised fashion and outperforms existing methods with a large margin. It is a fast and compact model for onboard deployment. Specifically, we embed LiDAR points into pixel-aligned multi-modal features, which we pass through a sequence of Transformer refinement stages. Quantitative experiments on the Waymo Open Dataset support these claims, where we achieve state-of-the-art results on the task of 3D pose estimation.", "keywords": "autonomous driving;perception;human pose;key points;skeletal representation", "primary_area": "", "supplementary_material": "/attachment/fa7824770675fd8b32f39247b0a2a082bca81a14.zip", "author": "Andrei Zanfir;Mihai Zanfir;Alex Gorban;Jingwei Ji;Yin Zhou;Dragomir Anguelov;Cristian Sminchisescu", "authorids": "~Andrei_Zanfir1;~Mihai_Zanfir1;~Alex_Gorban1;~Jingwei_Ji1;~Yin_Zhou1;~Dragomir_Anguelov1;~Cristian_Sminchisescu1", "gender": "M;M;M;M;M;;M", "homepage": ";;https://jingweij.github.io;;;http://www.maths.lth.se/sminchisescu/;", "dblp": "65/10771;142/2785;98/7765;;a/DragomirAnguelov;96/3826;", "google_scholar": ";https://scholar.google.ro/citations?user=af68sKkAAAAJ;rOD7cMkAAAAJ;https://scholar.google.com/citations?scilu=9351241097416630746:0,18260587605580260227:0;https://scholar.google.com/citations?hl=en;https://scholar.google.se/citations?hl=en;svJIv28AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;dragomiranguelov/;;alexgorban/", "or_profile": "~Andrei_Zanfir1;~Mihai_Zanfir1;~Jingwei_Ji1;~Yin_Zhou1;~Dragomir_Anguelov1;~Cristian_Sminchisescu1;~Alexander_S._Gorban1", "aff": "Google;Google;Waymo LLC;Waymo;Waymo;Lund University;Waymo", "aff_domain": "google.com;google.com;waymo.com;waymo.com;waymo.com;lth.se;waymo.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Professor;Senior Research Scientist", "bibtex": "@inproceedings{\nzanfir2022humdil,\ntitle={{HUM}3{DIL}: Semi-supervised Multi-modal 3D HumanPose Estimation for Autonomous Driving},\nauthor={Andrei Zanfir and Mihai Zanfir and Alex Gorban and Jingwei Ji and Yin Zhou and Dragomir Anguelov and Cristian Sminchisescu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=jTh3rdEF3LH}\n}", "github": "", "project": "", "reviewers": "xAs9;ZEP4;cQjf;fkxa", "site": "https://openreview.net/forum?id=jTh3rdEF3LH", "pdf_size": 0, "rating": "1;6;6;6", "confidence": "", "rating_avg": 4.75, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17745867708609459192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;1;1;2;1", "aff_unique_norm": "Google;Waymo;Lund University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.waymo.com;https://www.lunduniversity.lu.se", "aff_unique_abbr": "Google;Waymo;LU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "United States;Sweden" }, { "id": "lLq09gVoaTE", "title": "Learning Visuo-Haptic Skewering Strategies for Robot-Assisted Feeding", "track": "main", "status": "Oral", "tldr": "We develop a zero-shot bite acquisition framework for robot-assisted feeding by learning reactive, multimodal skewering strategies.", "abstract": "Acquiring food items with a fork poses an immense challenge to a robot-assisted feeding system, due to the wide range of material properties and visual appearances present across food groups. Deformable foods necessitate different skewering strategies than firm ones, but inferring such characteristics for several previously unseen items on a plate remains nontrivial. Our key insight is to leverage visual and haptic observations during interaction with an item to rapidly and reactively plan skewering motions. We learn a generalizable, multimodal representation for a food item from raw sensory inputs which informs the optimal skewering strategy. Given this representation, we propose a zero-shot framework to sense visuo-haptic properties of a previously unseen item and reactively skewer it, all within a single interaction. Real-robot experiments with foods of varying levels of visual and textural diversity demonstrate that our multimodal policy outperforms baselines which do not exploit both visual and haptic cues or do not reactively plan. Across 6 plates of different food items, our proposed framework achieves 71% success over 69 skewering attempts total. Supplementary material, code, and videos can be found on our website: https://sites.google.com/view/hapticvisualnet-corl22/home.", "keywords": "Assistive Feeding;Deformable Manipulation;Multisensory Learning", "primary_area": "", "supplementary_material": "/attachment/3c29fb3306c3c656be3c2fe7e24dd66d8846f0c8.zip", "author": "Priya Sundaresan;Suneel Belkhale;Dorsa Sadigh", "authorids": "~Priya_Sundaresan1;~Suneel_Belkhale1;~Dorsa_Sadigh1", "gender": "F;M;F", "homepage": ";https://github.com/suneelbelkhale;https://dorsa.fyi/", "dblp": ";236/5069;117/3174", "google_scholar": "7SUquR4AAAAJ;;ZaJEZpYAAAAJ", "orcid": ";0000-0002-3963-7987;", "linkedin": ";suneel-b-032b1a101/;", "or_profile": "~Priya_Sundaresan1;~Suneel_Belkhale1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsundaresan2022learning,\ntitle={Learning Visuo-Haptic Skewering Strategies for Robot-Assisted Feeding},\nauthor={Priya Sundaresan and Suneel Belkhale and Dorsa Sadigh},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=lLq09gVoaTE}\n}", "github": "https://github.com/priyasundaresan/hapticvisualnet", "project": "", "reviewers": "b6eX;jNun;LSMg;NczR", "site": "https://openreview.net/forum?id=lLq09gVoaTE", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2060008583265957903&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "lV-rNbXVSaO", "title": "Semantic Abstraction: Open-World 3D Scene Understanding from 2D Vision-Language Models", "track": "main", "status": "Poster", "tldr": " We proposed Semantic Abstraction, a framework that equips 2D VLMs with 3D spatial capabilities for open-world 3D scene understanding tasks.", "abstract": " We study open-world 3D scene understanding, a family of tasks that require agents to reason about their 3D environment with an open-set vocabulary and out-of-domain visual inputs -- a critical skill for robots to operate in the unstructured 3D world. Towards this end, we propose Semantic Abstraction (SemAbs), a framework that equips 2D Vision-Language Models (VLMs) with new 3D spatial capabilities, while maintaining their zero-shot robustness. We achieve this abstraction using relevancy maps extracted from CLIP and learn 3D spatial and geometric reasoning skills on top of those abstractions in a semantic-agnostic manner. We demonstrate the usefulness of SemAbs on two open-world 3D scene understanding tasks: 1) completing partially observed objects and 2) localizing hidden objects from language descriptions. Experiments show that SemAbs can generalize to novel vocabulary, materials/lighting, classes, and domains (i.e., real-world scans) from training on limited 3D synthetic data. ", "keywords": "3D scene understanding;out-of-domain generalization;language", "primary_area": "", "supplementary_material": "/attachment/3fbd7be9a1c85fa7a93b6be712a074ee57b1a6d5.zip", "author": "Huy Ha;Shuran Song", "authorids": "~Huy_Ha1;~Shuran_Song3", "gender": "M;F", "homepage": "https://www.cs.columbia.edu/~huy/;https://shurans.github.io/", "dblp": "277/9554;", "google_scholar": "-3-f_8YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Huy_Ha1;~Shuran_Song3", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;cs.columbia.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nha2022semantic,\ntitle={Semantic Abstraction: Open-World 3D Scene Understanding from 2D Vision-Language Models},\nauthor={Huy Ha and Shuran Song},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=lV-rNbXVSaO}\n}", "github": "https://github.com/columbia-ai-robotics/semantic-abstraction", "project": "", "reviewers": "Vpqh;ZWJc;jSHt", "site": "https://openreview.net/forum?id=lV-rNbXVSaO", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1998911484050007293&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "lb7B5Rw7tjw", "title": "You Only Look at One: Category-Level Object Representations for Pose Estimation From a Single Example", "track": "main", "status": "Poster", "tldr": "Real-time pose estimation that generalises to whole object categories after just inspecting one object", "abstract": "In order to meaningfully interact with the world, robot manipulators must be able to interpret objects they encounter. A critical aspect of this interpretation is pose estimation: inferring quantities that describe the position and orientation of an object in 3D space. Most existing approaches to pose estimation make limiting assumptions, often working only for specific, known object instances, or at best generalising to an object category using large pose-labelled datasets. In this work, we present a method for achieving category-level pose estimation by inspection of just a single object from a desired category. We show that we can subsequently perform accurate pose estimation for unseen objects from an inspected category, and considerably outperform prior work by exploiting multi-view correspondences. We demonstrate that our method runs in real-time, enabling a robot manipulator to rearrange previously unseen objects faithfully in terms of placement and orientation. Finally, we showcase our method in a continual learning setting, with a robot able to determine whether objects belong to known categories, and if not, use active perception to produce a one-shot category representation for subsequent pose estimation", "keywords": "pose estimation;object representations;one-shot", "primary_area": "", "supplementary_material": "/attachment/f7c2b9ad8856565687c6bd89b9e3ea6e727ed7f7.zip", "author": "Walter Goodwin;Ioannis Havoutis;Ingmar Posner", "authorids": "~Walter_Goodwin1;~Ioannis_Havoutis1;~Ingmar_Posner1", "gender": "M;;", "homepage": ";;", "dblp": ";;59/542", "google_scholar": ";;dPk-iwsAAAAJ", "orcid": ";;0000-0001-6270-700X", "linkedin": "walter-goodwin-291194115/;;ingmar-posner-20b49a", "or_profile": "~Walter_Goodwin1;~Ioannis_Havoutis1;~Ingmar_Posner1", "aff": "University of Oxford;;University of Oxford", "aff_domain": "ox.ac.uk;;ox.ac.uk", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\ngoodwin2022you,\ntitle={You Only Look at One: Category-Level Object Representations for Pose Estimation From a Single Example},\nauthor={Walter Goodwin and Ioannis Havoutis and Ingmar Posner},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=lb7B5Rw7tjw}\n}", "github": "", "project": "", "reviewers": "DGYB;kWPG;kBkt;5Bj6", "site": "https://openreview.net/forum?id=lb7B5Rw7tjw", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12051269255102913770&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "mqry_xMzvCM", "title": "GenLoco: Generalized Locomotion Controllers for Quadrupedal Robots", "track": "main", "status": "Poster", "tldr": "We introduce a framework for training generalized locomotion controllers for quadrupedal robots, which synthesizes general-purpose locomotion controllers that can be deployed on a large variety of quadrupedal robots.", "abstract": "Recent years have seen a surge in commercially-available and affordable quadrupedal robots, with many of these platforms being actively used in research and industry. As the availability of legged robots grows, so does the need for controllers that enable these robots to perform useful skills. However, most learning-based frameworks for controller development focus on training robot-specific controllers, a process that needs to be repeated for every new robot. In this work, we introduce a framework for training generalized locomotion (GenLoco) controllers for quadrupedal robots. Our framework synthesizes general-purpose locomotion controllers that can be deployed on a large variety of quadrupedal robots with similar morphologies. We present a simple but effective morphology randomization method that procedurally generates a diverse set of simulated robots for training. We show that by training a controller on this large set of simulated robots, our models acquire more general control strategies that can be directly transferred to novel simulated and real-world robots with diverse morphologies, which were not observed during training.", "keywords": "Legged Locomotion;Reinforcement Learning;Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/cdaf95e198d3cae72976e59d8dd56bcb82ac4fc3.zip", "author": "Gilbert Feng;Hongbo Zhang;Zhongyu Li;Xue Bin Peng;Bhuvan Basireddy;Linzhu Yue;ZHITAO SONG;Lizhi Yang;Yunhui Liu;Koushil Sreenath;Sergey Levine", "authorids": "~Gilbert_Feng1;lifelongyuanzhb@gmail.com;~Zhongyu_Li3;~Xue_Bin_Peng1;~Bhuvan_Basireddy1;lzyue@mae.cuhk.edu.hk;~ZHITAO_SONG1;~Lizhi_Yang1;yhliu@cuhk.edu.hk;~Koushil_Sreenath1;~Sergey_Levine1", "gender": "M;;M;M;M;;M;M;;M;M", "homepage": "https://gfeng2001.github.io;;;https://xbpeng.github.io;;;;;;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";;;;;;;12/5238;;;80/7594", "google_scholar": ";;ouSpgSkAAAAJ;https://scholar.google.ca/citations?user=FwxfQosAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;jEf5Q-4AAAAJ;;o9aFV8cAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;bhuvan-basireddy-928129140/;;;lzyang/;;;", "or_profile": "~Gilbert_Feng1;lifelongyuanzhb@gmail.com;~Zhongyu_Li3;~Xue_Bin_Peng1;~Bhuvan_Basireddy1;lzyue@mae.cuhk.edu.hk;~ZHITAO_SONG1;~Lizhi_Yang1;yhliu@cuhk.edu.hk;~Koushil_Sreenath1;~Sergey_Levine1", "aff": "University of California, Berkeley;;University of California, Berkeley;Simon Fraser University;University of California, Berkeley;;The Chinese University of Hong Kong;University of California, Berkeley;;University of California, Berkeley;Google", "aff_domain": "berkeley.edu;;berkeley.edu;sfu.ca;berkeley.edu;;cuhk.edu.hk;berkeley.edu;;berkeley.edu;google.com", "position": "Undergrad student;;PhD student;Assistant Professor;Undergrad student;;PhD student;Undergrad student;;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nfeng2022genloco,\ntitle={GenLoco: Generalized Locomotion Controllers for Quadrupedal Robots},\nauthor={Gilbert Feng and Hongbo Zhang and Zhongyu Li and Xue Bin Peng and Bhuvan Basireddy and Linzhu Yue and ZHITAO SONG and Lizhi Yang and Yunhui Liu and Koushil Sreenath and Sergey Levine},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=mqry_xMzvCM}\n}", "github": "https://github.com/HybridRobotics/GenLoco", "project": "", "reviewers": "3Unp;KDQ2;GntH;62wF", "site": "https://openreview.net/forum?id=mqry_xMzvCM", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 11, "corr_rating_confidence": 0, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17066491893648743053&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1;0;2;0;0;3", "aff_unique_norm": "University of California, Berkeley;Simon Fraser University;Chinese University of Hong Kong;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.sfu.ca;https://www.cuhk.edu.hk;https://www.google.com", "aff_unique_abbr": "UC Berkeley;SFU;CUHK;Google", "aff_campus_unique_index": "0;0;0;2;0;0;3", "aff_campus_unique": "Berkeley;;Hong Kong SAR;Mountain View", "aff_country_unique_index": "0;0;1;0;2;0;0;0", "aff_country_unique": "United States;Canada;China" }, { "id": "nBnHXevkjZ", "title": "CC-3DT: Panoramic 3D Object Tracking via Cross-Camera Fusion", "track": "main", "status": "Poster", "tldr": "We present CC-3DT, an effective solution to multi-camera 3D tracking with the key idea to merge 3D detections of multiple cameras into a single 3D tracking component handling both temporal and cross-camera association.", "abstract": "To track the 3D locations and trajectories of the other traffic participants at any given time, modern autonomous vehicles are equipped with multiple cameras that cover the vehicle's full surroundings. Yet, camera-based 3D object tracking methods prioritize optimizing the single-camera setup and resort to post-hoc fusion in a multi-camera setup. In this paper, we propose a method for panoramic 3D object tracking, called CC-3DT, that associates and models object trajectories both temporally and across views, and improves the overall tracking consistency. In particular, our method fuses 3D detections from multiple cameras before association, reducing identity switches significantly and improving motion modeling. Our experiments on large-scale driving datasets show that fusion before association leads to a large margin of improvement over post-hoc fusion. We set a new state-of-the-art with 12.6% improvement in average multi-object tracking accuracy (AMOTA) among all camera-based methods on the competitive NuScenes 3D tracking benchmark, outperforming previously published methods by 6.5% in AMOTA with the same 3D detector. ", "keywords": "Autonomous driving;3D multi-object tracking;cross-camera fusion", "primary_area": "", "supplementary_material": "/attachment/b4872db3ca2418dd78e0b11fd4d8739ddcd9e220.zip", "author": "Tobias Fischer;Yung-Hsu Yang;Suryansh Kumar;Min Sun;Fisher Yu", "authorids": "~Tobias_Fischer3;~Yung-Hsu_Yang1;~Suryansh_Kumar1;~Min_Sun1;~Fisher_Yu2", "gender": "M;M;M;M;M", "homepage": "https://tobiasfshr.github.io;https://royyang0714.github.io/;https://suryanshkumar.github.io/;http://aliensunmin.github.io;https://www.yf.io/", "dblp": "249/9213;288/0092;124/2783;62/2750-1;117/6314", "google_scholar": "Jp637I8AAAAJ;lFti01wAAAAJ;wbk0QAcAAAAJ;1Rf6sGcAAAAJ;-XCiamcAAAAJ", "orcid": "0000-0001-8227-001X;0000-0003-0044-515X;;;", "linkedin": ";royyang0714/;;;", "or_profile": "~Tobias_Fischer3;~Yung-Hsu_Yang1;~Suryansh_Kumar1;~Min_Sun1;~Fisher_Yu2", "aff": "Swiss Federal Institute of Technology;National Tsing Hua University;Swiss Federal Institute of Technology;National Tsing Hua University;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;nthu.edu.tw;ethz.ch;nthu.edu.tw;ethz.ch", "position": "PhD student;MS student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nfischer2022ccdt,\ntitle={{CC}-3{DT}: Panoramic 3D Object Tracking via Cross-Camera Fusion},\nauthor={Tobias Fischer and Yung-Hsu Yang and Suryansh Kumar and Min Sun and Fisher Yu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=nBnHXevkjZ}\n}", "github": "", "project": "", "reviewers": "qyz1;ANzg;qceo;tUyR", "site": "https://openreview.net/forum?id=nBnHXevkjZ", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12822657480325090747&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology;National Tsing Hua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.nthu.edu.tw", "aff_unique_abbr": "ETH Zurich;NTHU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Switzerland;China" }, { "id": "nPw7jaGBrCG", "title": "Leveraging Language for Accelerated Learning of Tool Manipulation", "track": "main", "status": "Poster", "tldr": "We combine language descriptions and meta-learning to accelerate tool learning in various manipulation tasks.", "abstract": "Robust and generalized tool manipulation requires an understanding of the properties and affordances of different tools. We investigate whether linguistic information about a tool (e.g., its geometry, common uses) can help control policies adapt faster to new tools for a given task. We obtain diverse descriptions of various tools in natural language and use pre-trained language models to generate their feature representations. We then perform language-conditioned meta-learning to learn policies that can efficiently adapt to new tools given their corresponding text descriptions. Our results demonstrate that combining linguistic information and meta-learning significantly accelerates tool learning in several manipulation tasks including pushing, lifting, sweeping, and hammering.", "keywords": "Language for Robotics;Tool Manipulation;Meta-learning", "primary_area": "", "supplementary_material": "/attachment/968aa1181362eaf965825864f7fac0772bc5d116.zip", "author": "Allen Z. Ren;Bharat Govil;Tsung-Yen Yang;Karthik R Narasimhan;Anirudha Majumdar", "authorids": "~Allen_Z._Ren1;bgovil@princeton.edu;~Tsung-Yen_Yang2;~Karthik_R_Narasimhan1;~Anirudha_Majumdar1", "gender": "M;;;M;M", "homepage": "http://allenzren.github.io/;;https://sites.google.com/view/tyjimmyyang;http://www.karthiknarasimhan.com;https://irom-lab.princeton.edu/majumdar/", "dblp": ";;204/7980;147/0322;116/6436", "google_scholar": "mgMzkYMAAAAJ;;g-hQdY8AAAAJ;euc0GX4AAAAJ;ibu3FwsAAAAJ", "orcid": ";;;;", "linkedin": "allenzren/;;tsung-yen-yang;;", "or_profile": "~Allen_Z._Ren1;bgovil@princeton.edu;~Tsung-Yen_Yang2;~Karthik_R_Narasimhan1;~Anirudha_Majumdar1", "aff": "Toyota Research Institute;;Princeton University;Princeton University;Princeton University", "aff_domain": "tri.global;;princeton.edu;princeton.edu;princeton.edu", "position": "Intern;;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nren2022leveraging,\ntitle={Leveraging Language for Accelerated Learning of Tool Manipulation},\nauthor={Allen Z. Ren and Bharat Govil and Tsung-Yen Yang and Karthik R Narasimhan and Anirudha Majumdar},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=nPw7jaGBrCG}\n}", "github": "", "project": "", "reviewers": "uYWj;tT65;x7HJ;kdpR", "site": "https://openreview.net/forum?id=nPw7jaGBrCG", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11645484068475850188&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Toyota Research Institute;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tri.global;https://www.princeton.edu", "aff_unique_abbr": "TRI;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ndYsaoyzCWv", "title": "Temporal Logic Imitation: Learning Plan-Satisficing Motion Policies from Demonstrations", "track": "main", "status": "Oral", "tldr": "Combining the task-level reactivity of LTL and the motion-level reactivity of DS, we arrive at an imitation learning system able to robustly perform various multi-step tasks under arbitrary perturbations given only a small number of demonstrations.", "abstract": "Learning from demonstration (LfD) has successfully solved tasks featuring a long time horizon. However, when the problem complexity also includes human-in-the-loop perturbations, state-of-the-art approaches do not guarantee the successful reproduction of a task. In this work, we identify the roots of this challenge as the failure of a learned continuous policy to satisfy the discrete plan implicit in the demonstration. By utilizing modes (rather than subgoals) as the discrete abstraction and motion policies with both mode invariance and goal reachability properties, we prove our learned continuous policy can simulate any discrete plan specified by a linear temporal logic (LTL) formula. Consequently, an imitator is robust to both task- and motion-level perturbations and guaranteed to achieve task success.", "keywords": "Learning from Demonstration;Dynamical Systems;Formal Methods;Linear Temporal Logic;Certifiable Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/626f4e58ab4b3adda676716beb8dd82d96317793.zip", "author": "Yanwei Wang;Nadia Figueroa;Shen Li;Ankit Shah;Julie Shah", "authorids": "~Yanwei_Wang1;nadiafig@seas.upenn.edu;~Shen_Li1;~Ankit_Shah2;~Julie_Shah2", "gender": ";;M;M;F", "homepage": "https://yanweiw.github.io/;;https://shenlirobot.github.io/;http://www.ajshah.info;https://interactive.mit.edu", "dblp": ";;22/1835;;", "google_scholar": ";;FpkwJdEAAAAJ;KmJNnzIAAAAJ;", "orcid": ";;0000-0002-8746-5438;;", "linkedin": ";;shen-li-robotics/;;", "or_profile": "~Yanwei_Wang1;nadiafig@seas.upenn.edu;~Shen_Li1;~Ankit_Shah2;~Julie_Shah2", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Brown University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu;brown.edu;mit.edu", "position": "PhD student;;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nwang2022temporal,\ntitle={Temporal Logic Imitation: Learning Plan-Satisficing Motion Policies from Demonstrations},\nauthor={Yanwei Wang and Nadia Figueroa and Shen Li and Ankit Shah and Julie Shah},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=ndYsaoyzCWv}\n}", "github": "", "project": "", "reviewers": "QTy6;VK5r;p7Tc;L7Wt", "site": "https://openreview.net/forum?id=ndYsaoyzCWv", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3184342769463001007&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.brown.edu", "aff_unique_abbr": "MIT;Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "niys1Zt1blq", "title": "COACH: Cooperative Robot Teaching", "track": "main", "status": "Poster", "tldr": "We present a conceptual framework, Cooperative Robot Teaching, which offers a formal model to describe and analyze robot teaching in a cooperative task.", "abstract": "Knowledge and skills can transfer from human teachers to human students. However, such direct transfer is often not scalable for physical tasks, as they require one-to-one interaction, and human teachers are not available in sufficient numbers. Machine learning enables robots to become experts and play the role of teachers to help in this situation. In this work, we formalize cooperative robot teaching as a Markov game, consisting of four key elements: the target task, the student model, the teacher model, and the interactive teaching-learning process. Under a moderate assumption, the Markov game reduces to a partially observable Markov decision process, with an efficient approximate solution. We illustrate our approach on two cooperative tasks, one in a simulated video game and one with a real robot.", "keywords": "Robot Teaching;Human-Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/fe75e7ecd19a1184bcf591533af126841f416ab6.zip", "author": "Cunjun Yu;Yiqing Xu;Linfeng Li;David Hsu", "authorids": "~Cunjun_Yu1;~Yiqing_Xu1;~Linfeng_Li2;~David_Hsu1", "gender": "Unspecified;F;M;M", "homepage": ";https://eeching.github.io/;;http://www.comp.nus.edu.sg/~dyhsu/", "dblp": "232/3014;27/870;;29/331", "google_scholar": "4xwyGM8AAAAJ;bJm1-QQAAAAJ;;S9LHLKEAAAAJ", "orcid": ";;0000-0001-7536-4894;0000-0002-2309-4535", "linkedin": ";yiqing-xu-2746a9166/;;david-hsu-a86200a1/", "or_profile": "~Cunjun_Yu1;~Yiqing_Xu1;~Linfeng_Li2;~David_Hsu1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;u.nus.edu;nus.edu.sg", "position": "PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nyu2022coach,\ntitle={{COACH}: Cooperative Robot Teaching},\nauthor={Cunjun Yu and Yiqing Xu and Linfeng Li and David Hsu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=niys1Zt1blq}\n}", "github": "", "project": "", "reviewers": "aCDa;thAt;GwM5", "site": "https://openreview.net/forum?id=niys1Zt1blq", "pdf_size": 0, "rating": "1;6;10", "confidence": "", "rating_avg": 5.666666666666667, "confidence_avg": 0, "replies_avg": 44, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7044114362045025241&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "nuAGobCwb8V", "title": "Representation Learning for Object Detection from Unlabeled Point Cloud Sequences", "track": "main", "status": "Poster", "tldr": "Designing self-supervised tasks using unlabeled point cloud sequences to improve performance of object detection task", "abstract": "Although unlabeled 3D data is easy to collect, state-of-the-art machine learning techniques for 3D object detection still rely on difficult-to-obtain manual annotations. To reduce dependence on the expensive and error-prone process of manual labeling, we propose a technique for representation learning from unlabeled LiDAR point cloud sequences. Our key insight is that moving objects can be reliably detected from point cloud sequences without the need for human-labeled 3D bounding boxes. In a single LiDAR frame extracted from a sequence, the set of moving objects provides sufficient supervision for single-frame object detection. By designing appropriate pretext tasks, we learn point cloud features that generalize to both moving and static unseen objects. We apply these features to object detection, achieving strong performance on self-supervised representation learning and unsupervised object detection tasks. ", "keywords": "Representation learning;object detection;point cloud sequences", "primary_area": "", "supplementary_material": "/attachment/2e56cf5f598496e0bc76d86e80f3b35167ccfa56.zip", "author": "Xiangru Huang;Yue Wang;Vitor Campagnolo Guizilini;Rares Andrei Ambrus;Adrien Gaidon;Justin Solomon", "authorids": "~Xiangru_Huang1;~Yue_Wang2;~Vitor_Campagnolo_Guizilini2;~Rares_Andrei_Ambrus1;~Adrien_Gaidon1;~Justin_Solomon1", "gender": "M;M;M;M;;M", "homepage": "https://people.csail.mit.edu/xrhuang/;https://yuewang.xyz;;http://www.csc.kth.se/~raambrus/;https://adriengaidon.com/;http://people.csail.mit.edu/jsolomon/", "dblp": "134/4071;33/4822-41;;25/76;06/7548.html;80/5094", "google_scholar": "0Out9QwAAAAJ;v-AEFIEAAAAJ;UH9tP6QAAAAJ;2xjjS3oAAAAJ;https://scholar.google.fr/citations?user=2StUgf4AAAAJ;pImSVwoAAAAJ", "orcid": ";;;0000-0002-3111-3812;;0000-0002-7701-7586", "linkedin": ";;vitorguizilini/;rare%C8%99-ambru%C8%99-b04812125/;adrien-gaidon-63ab2358/;justin-solomon-8a587914/", "or_profile": "~Xiangru_Huang1;~Yue_Wang2;~Vitor_Campagnolo_Guizilini2;~Rares_Andrei_Ambrus1;~Adrien_Gaidon1;~Justin_Solomon1", "aff": "Computer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science;Massachusetts Institute of Technology;Toyota Research Institute;Toyota Research Institute;Toyota Research Institute (TRI);Massachusetts Institute of Technology", "aff_domain": "csail.mit.edu;mit.edu;tri.global;tri.global;tri.global;mit.edu", "position": "Postdoc;PhD student;Staff Research Scientist;Researcher;Head of ML;Associate Professor", "bibtex": "@inproceedings{\nhuang2022representation,\ntitle={Representation Learning for Object Detection from Unlabeled Point Cloud Sequences},\nauthor={Xiangru Huang and Yue Wang and Vitor Campagnolo Guizilini and Rares Andrei Ambrus and Adrien Gaidon and Justin Solomon},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=nuAGobCwb8V}\n}", "github": "https://github.com/xiangruhuang/PCSeqLearning", "project": "", "reviewers": "Thig;442W;NRFL;FBsS", "site": "https://openreview.net/forum?id=nuAGobCwb8V", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4366241300655892084&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;1;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Toyota Research Institute", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;", "aff_unique_url": "https://www.csail.mit.edu;https://www.tri.global", "aff_unique_abbr": "CSAIL;TRI", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "o8dLx8OVcNk", "title": "Learning Riemannian Stable Dynamical Systems via Diffeomorphisms", "track": "main", "status": "Poster", "tldr": "Learning Lyapunov-stable Dynamical Systems on Riemannian Manifolds via Diffeormorphisms built on neural manifold ODEs.", "abstract": "Dexterous and autonomous robots should be capable of executing elaborated dynamical motions skillfully. Learning techniques may be leveraged to build models of such dynamic skills. To accomplish this, the learning model needs to encode a stable vector field that resembles the desired motion dynamics. This is challenging as the robot state does not evolve on a Euclidean space, and therefore the stability guarantees and vector field encoding need to account for the geometry arising from, for example, the orientation representation. To tackle this problem, we propose learning Riemannian stable dynamical systems (RSDS) from demonstrations, allowing us to account for different geometric constraints resulting from the dynamical system state representation. Our approach provides Lyapunov-stability guarantees on Riemannian manifolds that are enforced on the desired motion dynamics via diffeomorphisms built on neural manifold ODEs. We show that our Riemannian approach makes it possible to learn stable dynamical systems displaying complicated vector fields on both illustrative examples and real-world manipulation tasks, where Euclidean approximations fail.", "keywords": "Dynamical systems;Riemannian manifolds;Motion learning;Learning from demonstrations;Neural ODEs", "primary_area": "", "supplementary_material": "/attachment/4e964d99cf050de584044958817b9edcd7497bfb.zip", "author": "Jiechao Zhang;Hadi Beik Mohammadi;Leonel Rozo", "authorids": "~Jiechao_Zhang1;~Hadi_Beik_Mohammadi1;~Leonel_Rozo1", "gender": "M;M;M", "homepage": ";;https://leonelrozo.weebly.com/", "dblp": ";;10/9515", "google_scholar": ";https://scholar.google.de/citations?hl=en;https://scholar.google.it/citations?user=vLWgi-YAAAAJ", "orcid": ";;0000-0001-5970-9135", "linkedin": "www.linkedin.com/in/JiechaoZhang;;leonelrozo/", "or_profile": "~Jiechao_Zhang1;~Hadi_Beik_Mohammadi1;~Leonel_Dario_Rozo1", "aff": "Karlsruher Institut f\u00fcr Technologie;Bosch;Robert Bosch GmbH, Bosch", "aff_domain": "kit.edu;bosch.com;de.bosch.com", "position": "MS student;PhD student;Researcher", "bibtex": "@inproceedings{\nzhang2022learning,\ntitle={Learning Riemannian Stable Dynamical Systems via Diffeomorphisms},\nauthor={Jiechao Zhang and Hadi Beik Mohammadi and Leonel Rozo},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=o8dLx8OVcNk}\n}", "github": "", "project": "", "reviewers": "hQF2;AVAQ;SPAS;eXxr", "site": "https://openreview.net/forum?id=o8dLx8OVcNk", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4732056191298695155&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Robert Bosch GmbH", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.bosch.com", "aff_unique_abbr": "KIT;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "oLLOSt3zV4", "title": "Fast Lifelong Adaptive Inverse Reinforcement Learning from Demonstrations", "track": "main", "status": "Poster", "tldr": "", "abstract": "Learning from Demonstration (LfD) approaches empower end-users to teach robots novel tasks via demonstrations of the desired behaviors, democratizing access to robotics. However, current LfD frameworks are not capable of fast adaptation to heterogeneous human demonstrations nor the large-scale deployment in ubiquitous robotics applications. In this paper, we propose a novel LfD framework, Fast Lifelong Adaptive Inverse Reinforcement learning (FLAIR). Our approach (1) leverages learned strategies to construct policy mixtures for fast adaptation to new demonstrations, allowing for quick end-user personalization, (2) distills common knowledge across demonstrations, achieving accurate task inference; and (3) expands its model only when needed in lifelong deployments, maintaining a concise set of prototypical strategies that can approximate all behaviors via policy mixtures. We empirically validate that FLAIR achieves adaptability (i.e., the robot adapts to heterogeneous, user-specific task preferences), efficiency (i.e., the robot achieves sample-efficient adaptation), and scalability (i.e., the model grows sublinearly with the number of demonstrations while maintaining high performance). FLAIR surpasses benchmarks across three control tasks with an average 57% improvement in policy returns and an average 78% fewer episodes required for demonstration modeling using policy mixtures. Finally, we demonstrate the success of FLAIR in a table tennis task and find users rate FLAIR as having higher task ($p<.05$) and personalization ($p<.05$) performance. ", "keywords": "Personalized Learning;Learning from Heterogeneous Demonstration;Inverse Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/808d4dc64c268642b12af3609b238a5ff0cc9711.zip", "author": "Letian Chen;Sravan Jayanthi;Rohan R Paleja;Daniel Martin;Viacheslav Zakharov;Matthew Gombolay", "authorids": "~Letian_Chen1;sjayanthi@gatech.edu;~Rohan_R_Paleja1;dmartin20576@gmail.com;chesl97@gmail.com;~Matthew_Gombolay1", "gender": "M;;M;;;M", "homepage": "http://letianchen.me/;;https://rohanpaleja.com/;;;https://core-robotics.gatech.edu/", "dblp": "232/1880;;237/8623;;;144/1022", "google_scholar": "SAeHYeQAAAAJ;;xjnQbKgAAAAJ;;;Ihyz20wAAAAJ", "orcid": "0000-0001-9238-7342;;;;;", "linkedin": "letianchen/;;;;;", "or_profile": "~Letian_Chen1;sjayanthi@gatech.edu;~Rohan_R_Paleja1;dmartin20576@gmail.com;chesl97@gmail.com;~Matthew_Gombolay1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology;;;Georgia Institute of Technology", "aff_domain": "gatech.edu;;gatech.edu;;;cc.gatech.edu", "position": "PhD student;;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nchen2022fast,\ntitle={Fast Lifelong Adaptive Inverse Reinforcement Learning from Demonstrations},\nauthor={Letian Chen and Sravan Jayanthi and Rohan R Paleja and Daniel Martin and Viacheslav Zakharov and Matthew Gombolay},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=oLLOSt3zV4}\n}", "github": "https://github.com/CORE-Robotics-Lab/FLAIR", "project": "", "reviewers": "iYLw;bJrX;rqFS", "site": "https://openreview.net/forum?id=oLLOSt3zV4", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2121496381504441133&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "oPRhm0Aben_", "title": "Task-Relevant Failure Detection for Trajectory Predictors in Autonomous Vehicles", "track": "main", "status": "Poster", "tldr": "We develop a probabilistic run-time monitor that detects when a task-relevant prediction failure occurs by propagating trajectory prediction errors to a planning cost and reasoning about their impact on the autonomous vehicle.", "abstract": "In modern autonomy stacks, prediction modules are paramount to planning motions in the presence of other mobile agents. However, failures in prediction modules can mislead the downstream planner into making unsafe decisions. Indeed, the high uncertainty inherent to the task of trajectory forecasting ensures that such mispredictions occur frequently. Motivated by the need to improve safety of autonomous vehicles without compromising on their performance, we develop a probabilistic run-time monitor that detects when a \"harmful\" prediction failure occurs, i.e., a task-relevant failure detector. We achieve this by propagating trajectory prediction errors to the planning cost to reason about their impact on the AV. Furthermore, our detector comes equipped with performance measures on the false-positive and the false-negative rate and allows for data-free calibration. In our experiments we compared our detector with various others and found that our detector has the highest area under the receiver operator characteristic curve.", "keywords": "Run-time Monitoring;Autonomous Vehicles;Trajectory Prediction", "primary_area": "", "supplementary_material": "/attachment/2c4c4ca1804ac28a8d25c42d2a0c6bc5126e98ea.zip", "author": "Alec Farid;Sushant Veer;Boris Ivanovic;Karen Leung;Marco Pavone", "authorids": "~Alec_Farid1;~Sushant_Veer1;~Boris_Ivanovic1;~Karen_Leung2;~Marco_Pavone1", "gender": ";M;;;M", "homepage": ";;http://www.borisivanovic.com/;http://faculty.washington.edu/kymleung/;https://web.stanford.edu/~pavone/", "dblp": ";173/5950;203/8356;;91/3382-1.html", "google_scholar": ";1FiIlQsAAAAJ;ey9AQcEAAAAJ;;RhOpyXcAAAAJ", "orcid": ";;0000-0002-8698-202X;;", "linkedin": ";;boris-ivanovic-a3103064;;", "or_profile": "~Alec_Farid1;~Sushant_Veer1;~Boris_Ivanovic1;~Karen_Leung2;~Marco_Pavone1", "aff": ";NVIDIA;NVIDIA;NVIDIA;Stanford University", "aff_domain": ";nvidia.com;nvidia.com;nvidia.com;stanford.edu", "position": ";Researcher;Researcher;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nfarid2022taskrelevant,\ntitle={Task-Relevant Failure Detection for Trajectory Predictors in Autonomous Vehicles},\nauthor={Alec Farid and Sushant Veer and Boris Ivanovic and Karen Leung and Marco Pavone},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=oPRhm0Aben_}\n}", "github": "https://github.com/NVlabs/pred-fail-detector", "project": "", "reviewers": "nVyF;dWwY;t48G", "site": "https://openreview.net/forum?id=oPRhm0Aben_", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6162716714810201955&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "NVIDIA;Stanford University", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.stanford.edu", "aff_unique_abbr": "NVIDIA;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "oud6xgdpqVM", "title": "QuaDUE-CCM: Interpretable Distributional Reinforcement Learning using Uncertain Contraction Metrics for Precise Quadrotor Trajectory Tracking", "track": "main", "status": "Poster", "tldr": "A quadrotor trajectory tracking framework that integrates a distributional reinforcement learning into a control-contraction-metric-based nonlinear optimal control problem.", "abstract": "Accuracy and stability are common requirements for Quadrotor trajectory tracking systems. Designing an accurate and stable tracking controller remains challenging, particularly in unknown and dynamic environments with complex aerodynamic disturbances. We propose a Quantile-approximation-based Distributional-reinforced Uncertainty Estimator (QuaDUE) to accurately identify the effects of aerodynamic disturbances, i.e., the uncertainties between the true and estimated Control Contraction Metrics (CCMs). Taking inspiration from contraction theory and integrating the QuaDUE for uncertainties, our novel CCM-based trajectory tracking framework tracks any feasible reference trajectory precisely whilst guaranteeing exponential convergence. More importantly, the convergence and training acceleration of the distributional RL are guaranteed and analyzed, respectively, from theoretical perspectives. We also demonstrate our system under unknown and diverse aerodynamic forces. Under large aerodynamic forces (>2~ m/s^2), compared with the classic data-driven approach, our QuaDUE-CCM achieves at least a 56.6% improvement in tracking error. Compared with QuaDRED-MPC, a distributional RL-based approach, QuaDUE-CCM achieves at least a 3 times improvement in contraction rate.", "keywords": "Quadrotor trajectory tracking;Learning-based control", "primary_area": "", "supplementary_material": "/attachment/9484e8294b99777ea3e23827ebde0c5edd651674.zip", "author": "YANRAN WANG;James O'Keeffe;QIUCHEN QIAN;David Boyle", "authorids": "~YANRAN_WANG3;~James_O'Keeffe1;~QIUCHEN_QIAN1;~David_Boyle1", "gender": "M;M;M;M", "homepage": "https://alex-yanranwang.github.io/;;;https://www.imperial.ac.uk/people/david.boyle", "dblp": ";;;08/1254", "google_scholar": "https://scholar.google.com/citations?hl=en;ae9_GB0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=XRzAhnYAAAAJ", "orcid": "0000-0003-1107-4235;;;0000-0002-1993-4482", "linkedin": "yanran-wang-3882a223b/;;;", "or_profile": "~YANRAN_WANG3;~James_O'Keeffe1;~QIUCHEN_QIAN1;~David_Boyle1", "aff": "Imperial College London;Imperial College London, Imperial College London;Imperial College London, Imperial College London;Imperial College London, Imperial College London", "aff_domain": "imperial.ac.uk;imperial.ac.uk;imperial.ac.uk;imperial.ac.uk", "position": "PhD student;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2022quadueccm,\ntitle={Qua{DUE}-{CCM}: Interpretable Distributional Reinforcement Learning using Uncertain Contraction Metrics for Precise Quadrotor Trajectory Tracking},\nauthor={YANRAN WANG and James O'Keeffe and QIUCHEN QIAN and David Boyle},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=oud6xgdpqVM}\n}", "github": "", "project": "", "reviewers": "VCFb;PQAG;U1k2;6Poj", "site": "https://openreview.net/forum?id=oud6xgdpqVM", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10210471672984077991&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "pPR--ivXwPD", "title": "Exploring with Sticky Mittens: Reinforcement Learning with Expert Interventions via Option Templates", "track": "main", "status": "Poster", "tldr": "Adding expert intervention in the training phase can achieve orders of magnitude reduction in sample complexity, which is typically very high for most realistic RL algorithms.", "abstract": "Long horizon robot learning tasks with sparse rewards pose a significant challenge for current reinforcement learning algorithms.\nA key feature enabling humans to learn challenging control tasks is that they often receive expert intervention that enables them to understand the high-level structure of the task before mastering low-level control actions.\nWe propose a framework for leveraging expert intervention to solve long-horizon reinforcement learning tasks. We consider \\emph{option templates}, which are specifications encoding a potential option that can be trained using reinforcement learning. We formulate expert intervention as allowing the agent to execute option templates before learning an implementation. This enables them to use an option, before committing costly resources to learning it. \nWe evaluate our approach on three challenging reinforcement learning problems, showing that it outperforms state-of-the-art approaches by two orders of magnitude. Videos of trained agents and our code can be found at: https://sites.google.com/view/stickymittens", "keywords": "Sample-Efficient Reinforcement Learning;Expert Intervention;Options;Planning with Primitives", "primary_area": "", "supplementary_material": "/attachment/6752d4aec2ee1491027a4ace25a518e80159584d.zip", "author": "Souradeep Dutta;Kaustubh Sridhar;Osbert Bastani;Edgar Dobriban;James Weimer;Insup Lee;Julia Parish-Morris", "authorids": "~Souradeep_Dutta2;~Kaustubh_Sridhar1;~Osbert_Bastani1;~Edgar_Dobriban2;~James_Weimer1;~Insup_Lee1;~Julia_Parish-Morris1", "gender": "M;M;M;;M;;", "homepage": "https://sites.google.com/site/duttasouradeep39/;https://kaustubhsridhar.github.io/;http://obastani.github.io;https://statistics.wharton.upenn.edu/profile/dobriban/;https://jamesweimer.net;https://www.cis.upenn.edu/~lee/;", "dblp": ";289/5808;21/11275;99/11269;79/11048.html;l/InsupLee.html;184/8873", "google_scholar": ";V-HiOnUAAAAJ;cxYepGkAAAAJ;aGvH4yMAAAAJ;IeuLakwAAAAJ;qPlUgrgAAAAJ;", "orcid": ";;;;0000-0001-8167-9163;0000-0003-2672-1132;", "linkedin": ";kaustubh-sridhar-8636797a/;;edgar-dobriban/;;;", "or_profile": "~Souradeep_Dutta2;~Kaustubh_Sridhar1;~Osbert_Bastani1;~Edgar_Dobriban2;~James_Weimer1;~Insup_Lee1;~Julia_Parish-Morris1", "aff": "University of Pennsylvania;AWS AI Labs;University of Pennsylvania;The Wharton School, University of Pennsylvania;Vanderbilt University;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;amazon.com;upenn.edu;wharton.upenn.edu;vanderbilt.edu;upenn.edu;upenn.edu", "position": "Postdoc;Intern;Assistant Professor;Assistant Professor;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndutta2022exploring,\ntitle={Exploring with Sticky Mittens: Reinforcement Learning with Expert Interventions via Option Templates},\nauthor={Souradeep Dutta and Kaustubh Sridhar and Osbert Bastani and Edgar Dobriban and James Weimer and Insup Lee and Julia Parish-Morris},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=pPR--ivXwPD}\n}", "github": "https://github.com/sticky-mittens", "project": "", "reviewers": "XdN5;dp9v;zrnS", "site": "https://openreview.net/forum?id=pPR--ivXwPD", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6271690672305045309&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "University of Pennsylvania;Amazon;Vanderbilt University", "aff_unique_dep": ";AWS AI Labs;", "aff_unique_url": "https://www.upenn.edu;https://aws.amazon.com;https://www.vanderbilt.edu", "aff_unique_abbr": "UPenn;AWS;Vanderbilt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "pVSaWTgDmCu", "title": "Planning Paths through Occlusions in Urban Environments", "track": "main", "status": "Oral", "tldr": "Planning through occluded urban environments using image inpainting techniques.", "abstract": "This paper presents a novel framework for planning in unknown and occluded urban spaces. We specifically focus on turns and intersections where occlusions significantly impact navigability. Our approach uses an inpainting model to fill in a sparse, occluded, semantic lidar point cloud and plans dynamically feasible paths for a vehicle to traverse through the open and inpainted spaces. We demonstrate our approach using a car\u2019s lidar data with real-time occlusions, and show that by inpainting occluded areas, we can plan longer paths, with more turn\noptions compared to without inpainting; in addition, our approach more closely follows paths derived from a planner with no occlusions (called the ground truth) compared to other state of the art approaches.", "keywords": "Navigation;Occluded Environments;Semantic Scene Understanding", "primary_area": "", "supplementary_material": "/attachment/79718c75d1c85c89fcb392aff00b43c23e955a0c.zip", "author": "Yutao Han;Youya Xia;Guo-Jun Qi;Mark Campbell", "authorids": "~Yutao_Han1;~Youya_Xia1;~Guo-Jun_Qi1;~Mark_Campbell1", "gender": "M;;M;M", "homepage": ";https://www.xiayouya.com;http://maple-lab.net/gqi/;http://campbell.mae.cornell.edu", "dblp": ";227/2270;41/943;", "google_scholar": "https://scholar.google.co.uk/citations?user=x4PaP8sAAAAJ;EnW0ddoAAAAJ;https://scholar.google.com.tw/citations?user=Nut-uvoAAAAJ;e1iAhHQAAAAJ", "orcid": ";;0000-0003-3508-1851;", "linkedin": "yh675;;;", "or_profile": "~Yutao_Han1;~Youya_Xia1;~Guo-Jun_Qi1;~Mark_Campbell1", "aff": "Innopeak Technology (OPPO Research USA);Cornell University;Guangdong OPPO Mobile Telecommunications Corp.,Ltd.;Cornell University", "aff_domain": "innopeaktech.com;cornell.edu;oppo.com;cornell.edu", "position": "Researcher;PhD student;Dean and Chief Scientist;Full Professor", "bibtex": "@inproceedings{\nhan2022planning,\ntitle={Planning Paths through Occlusions in Urban Environments},\nauthor={Yutao Han and Youya Xia and Guo-Jun Qi and Mark Campbell},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=pVSaWTgDmCu}\n}", "github": "https://github.com/genplanning/generative_planning", "project": "", "reviewers": "apxG;hna9;dgc4;7Fav", "site": "https://openreview.net/forum?id=pVSaWTgDmCu", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hj7IVz3NxpsJ:scholar.google.com/&scioq=Planning+Paths+through+Occlusions+in+Urban+Environments&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "OPPO Research USA;Cornell University;OPPO Mobile Telecommunications Corp.,Ltd.", "aff_unique_dep": "Innopeak Technology;;", "aff_unique_url": "https://www.oppo.com/en;https://www.cornell.edu;https://www.oppo.com", "aff_unique_abbr": "OPPO;Cornell;OPPO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "pn-HOPBioUE", "title": "Leveraging Fully Observable Policies for Learning under Partial Observability", "track": "main", "status": "Poster", "tldr": "A method to utilize given fully observable policies to learn partially observable policies during offline training", "abstract": "Reinforcement learning in partially observable domains is challenging due to the lack of observable state information. Thankfully, learning offline in a simulator with such state information is often possible. In particular, we propose a method for partially observable reinforcement learning that uses a fully observable policy (which we call a \\emph{state expert}) during training to improve performance. Based on Soft Actor-Critic (SAC), our agent balances performing actions similar to the state expert and getting high returns under partial observability. Our approach can leverage the fully-observable policy for exploration and parts of the domain that are fully observable while still being able to learn under partial observability. On six robotics domains, our method outperforms pure imitation, pure reinforcement learning, the sequential or parallel combination of both types, and a recent state-of-the-art method in the same setting. A successful policy transfer to a physical robot in a manipulation task from pixels shows our approach's practicality in learning interesting policies under partial observability.", "keywords": "Partial Observability;Imitation Learning;Fully Observable Expert", "primary_area": "", "supplementary_material": "/attachment/c184f706130de49ba0f158ed25610ba0fe5c8ba5.zip", "author": "Hai Huu Nguyen;Andrea Baisero;Dian Wang;Christopher Amato;Robert Platt", "authorids": "~Hai_Huu_Nguyen1;~Andrea_Baisero1;~Dian_Wang1;~Christopher_Amato1;~Robert_Platt1", "gender": "M;M;M;M;", "homepage": "https://hai-h-nguyen.github.io/;;https://pointw.github.io/;http://www.ccs.neu.edu/home/camato/index.html;http://www.ccs.neu.edu/home/rplatt/", "dblp": ";135/3247;191/1369-1;10/3254;39/5434", "google_scholar": "5b9ncWoAAAAJ;;CckjtfQAAAAJ;-8-sD-sAAAAJ;Z4Y5S2oAAAAJ", "orcid": ";;;;", "linkedin": ";;dianwang1007;;", "or_profile": "~Hai_Huu_Nguyen1;~Andrea_Baisero1;~Dian_Wang1;~Christopher_Amato1;~Robert_Platt1", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;northeastern.edu;neu.edu;neu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nnguyen2022leveraging,\ntitle={Leveraging Fully Observable Policies for Learning under Partial Observability},\nauthor={Hai Huu Nguyen and Andrea Baisero and Dian Wang and Christopher Amato and Robert Platt},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=pn-HOPBioUE}\n}", "github": "", "project": "", "reviewers": "Spnc;oqag;6hqs;a2e5", "site": "https://openreview.net/forum?id=pn-HOPBioUE", "pdf_size": 0, "rating": "4;4;4;6", "confidence": "", "rating_avg": 4.5, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3486907727352162482&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "qDtbMK67PJG", "title": "Learning Bimanual Scooping Policies for Food Acquisition", "track": "main", "status": "Poster", "tldr": "We develop a system for bimanual scooping that recognizes and prevents scooping failures to avoid breaking fragile food items during acquisition.", "abstract": "A robotic feeding system must be able to acquire a variety of foods. Prior bite acquisition works consider single-arm spoon scooping or fork skewering, which do not generalize to foods with complex geometries and deformabilities. For example, when acquiring a group of peas, skewering could smoosh the peas while scooping without a barrier could result in chasing the peas on the plate. In order to acquire foods with such diverse properties, we propose stabilizing food items during scooping using a second arm, for example, by pushing peas against the spoon with a flat surface to prevent dispersion. The addition of this second stabilizing arm can lead to a new set of challenges. Critically, these strategies should stabilize the food scene without interfering with the acquisition motion, which is especially difficult for easily breakable high-risk food items, such as tofu. These high-risk foods can break between the pusher and spoon during scooping, which can lead to food waste falling onto the plate or out of the workspace. We propose a general bimanual scooping primitive and an adaptive stabilization strategy that enables successful acquisition of a diverse set of food geometries and physical properties. Our approach, CARBS: Coordinated Acquisition with Reactive Bimanual Scooping, learns to stabilize without impeding task progress by identifying high-risk foods and robustly scooping them using closed-loop visual feedback. We find that CARBS is able to generalize across food shape, size, and deformability and is additionally able to manipulate multiple food items simultaneously. CARBS achieves 87.0% success on scooping rigid foods, which is 25.8% more successful than a single-arm baseline, and reduces food breakage by 16.2% compared to an analytical baseline. Videos can be found on our website at https://sites.google.com/view/bimanualscoop-corl22/home.", "keywords": "Bimanual Manipulation;Food Acquisition;Robot-Assisted Feeding;Deformable Object Manipulation", "primary_area": "", "supplementary_material": "/attachment/4b5c65916bd03a6665bc140ee328eeb0aa93c57b.zip", "author": "Jennifer Grannen;Yilin Wu;Suneel Belkhale;Dorsa Sadigh", "authorids": "~Jennifer_Grannen1;~Yilin_Wu1;~Suneel_Belkhale1;~Dorsa_Sadigh1", "gender": ";F;M;F", "homepage": "https://jenngrannen.com;http://cs.cmu.edu/~yilinwu;https://github.com/suneelbelkhale;https://dorsa.fyi/", "dblp": ";66/3299.html;236/5069;117/3174", "google_scholar": "O5wWFpIAAAAJ;lyG0vMQAAAAJ;;ZaJEZpYAAAAJ", "orcid": ";;0000-0002-3963-7987;", "linkedin": ";;suneel-b-032b1a101/;", "or_profile": "~Jennifer_Grannen1;~Yilin_Wu1;~Suneel_Belkhale1;~Dorsa_Sadigh1", "aff": "Computer Science Department, Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngrannen2022learning,\ntitle={Learning Bimanual Scooping Policies for Food Acquisition},\nauthor={Jennifer Grannen and Yilin Wu and Suneel Belkhale and Dorsa Sadigh},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=qDtbMK67PJG}\n}", "github": "", "project": "", "reviewers": "rkd9;1gVW;FpM9;aovx", "site": "https://openreview.net/forum?id=qDtbMK67PJG", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9158797375235859986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "qUhkhHw8Dz", "title": "VideoDex: Learning Dexterity from Internet Videos", "track": "main", "status": "Poster", "tldr": "Learning Robot Action Priors from Human Videos", "abstract": "To build general robotic agents that can operate in many environments, it is often imperative for the robot to collect experience in the real world. However, this is often not feasible due to safety, time and hardware restrictions. We thus propose leveraging the next best thing as real world experience: internet videos of humans using their hands. Visual priors, such as visual features, are often learned from videos, but we believe that more information from videos can be utilized as a stronger prior. We build a learning algorithm, Videodex, that leverages visual, action and physical priors from human video datasets to guide robot behavior. These action and physical priors in the neural network dictate the typical human behavior for a particular robot task. We test our approach on a robot arm and dexterous hand based system and show strong results on many different manipulation tasks, outperforming various state-of-the-art methods. For videos and supplemental material visit our website at https://video-dex.github.io.", "keywords": "Dexterous Manipulation;Large Scale Robotics;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/b5283b7965795f659f18d33345ce3f84f3388c22.zip", "author": "Kenneth Shaw;Shikhar Bahl;Deepak Pathak", "authorids": "~Kenneth_Shaw1;~Shikhar_Bahl1;~Deepak_Pathak1", "gender": "M;;M", "homepage": "https://www.linkedin.com/in/kenny-shaw/;https://www.cs.cmu.edu/~sbahl2/;https://www.cs.cmu.edu/~dpathak/", "dblp": ";223/4390;155/9860", "google_scholar": ";bdHgGgEAAAAJ;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ", "orcid": ";;", "linkedin": "kenny-shaw/;;pathak22/", "or_profile": "~Kenneth_Shaw1;~Shikhar_Bahl1;~Deepak_Pathak1", "aff": "Carnegie Mellon University;Meta Facebook;Carnegie Mellon University", "aff_domain": "cmu.edu;meta.com;cmu.edu", "position": "MS student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshaw2022videodex,\ntitle={VideoDex: Learning Dexterity from Internet Videos},\nauthor={Kenneth Shaw and Shikhar Bahl and Deepak Pathak},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=qUhkhHw8Dz}\n}", "github": "", "project": "", "reviewers": "1pwQ;nwhC;KbvG", "site": "https://openreview.net/forum?id=qUhkhHw8Dz", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6059066157257783528&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "qUvTmyGpnm7", "title": "Learning Multi-Object Dynamics with Compositional Neural Radiance Fields", "track": "main", "status": "Poster", "tldr": "We present a method to learn compositional multi-object dynamics models from image observations based on implicit object encoders, Neural Radiance Fields (NeRFs), and graph neural networks.", "abstract": "We present a method to learn compositional multi-object dynamics models from image observations based on implicit object encoders, Neural Radiance Fields (NeRFs), and graph neural networks. NeRFs have become a popular choice for representing scenes due to their strong 3D prior. However, most NeRF approaches are trained on a single scene, representing the whole scene with a global model, making generalization to novel scenes, containing different numbers of objects, challenging. Instead, we present a compositional, object-centric auto-encoder framework that maps multiple views of the scene to a set of latent vectors representing each object separately. The latent vectors parameterize individual NeRFs from which the scene can be reconstructed. Based on those latent vectors, we train a graph neural network dynamics model in the latent space to achieve compositionality for dynamics prediction. A key feature of our approach is that the latent vectors are forced to encode 3D information through the NeRF decoder, which enables us to incorporate structural priors in learning the dynamics models, making long-term predictions more stable compared to several baselines. Simulated and real world experiments show that our method can model and learn the dynamics of compositional scenes including rigid and deformable objects.\nVideo: https://dannydriess.github.io/compnerfdyn/", "keywords": "Neural Radiance Fields;Dynamics Model Learning;Neural Implicit Representations;Visual Prediction;Graph Neural Networks;Representation Learning;Robotic Manipulation", "primary_area": "", "supplementary_material": "/attachment/ea6808af55e0b5bcf9e821b0f80f704666cdcc03.zip", "author": "Danny Driess;Zhiao Huang;Yunzhu Li;Russ Tedrake;Marc Toussaint", "authorids": "~Danny_Driess1;~Zhiao_Huang1;~Yunzhu_Li1;~Russ_Tedrake1;~Marc_Toussaint3", "gender": ";M;M;M;M", "homepage": "https://dannydriess.github.io/;;https://yunzhuli.github.io/;http://people.csail.mit.edu/russt;https://www.user.tu-berlin.de/mtoussai/", "dblp": ";172/1410;182/1831;73/1296;t/MarcToussaint", "google_scholar": "https://scholar.google.de/citations?user=wxnzyjwAAAAJ;;WlA92lcAAAAJ;nxNkEiYAAAAJ;t2X4Mg8AAAAJ", "orcid": ";;;;0000-0002-5487-6767", "linkedin": ";;;;marctoussaint/", "or_profile": "~Danny_Driess1;~Zhiao_Huang1;~Yunzhu_Li1;~Russ_Tedrake1;~Marc_Toussaint3", "aff": "Technische Universit\u00e4t Berlin;University of California, San Diego, University of California, San Diego;Massachusetts Institute of Technology;Massachusetts Institute of Technology;TU Berlin", "aff_domain": "tu-berlin.de;eng.ucsd.edu;mit.edu;mit.edu;tu-berlin.de", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndriess2022learning,\ntitle={Learning Multi-Object Dynamics with Compositional Neural Radiance Fields},\nauthor={Danny Driess and Zhiao Huang and Yunzhu Li and Russ Tedrake and Marc Toussaint},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=qUvTmyGpnm7}\n}", "github": "", "project": "", "reviewers": "eGyX;d3gL;ZKto;35gm", "site": "https://openreview.net/forum?id=qUvTmyGpnm7", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6675974802688516481&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin;University of California, San Diego;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tu-berlin.de;https://www.ucsd.edu;https://web.mit.edu", "aff_unique_abbr": "TU Berlin;UCSD;MIT", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Diego;Berlin", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Germany;United States" }, { "id": "qr0wqg8NqkL", "title": "TrackletMapper: Ground Surface Segmentation and Mapping from Traffic Participant Trajectories", "track": "main", "status": "Poster", "tldr": "We introduce TrackletMapper, an automatic image annotation method that leverages tracklets of traffic participants to semantically annotate images.", "abstract": "Robustly classifying ground infrastructure such as roads and street crossings is an essential task for mobile robots operating alongside pedestrians. While many semantic segmentation datasets are available for autonomous vehicles, models trained on such datasets exhibit a large domain gap when deployed on robots operating in pedestrian spaces. Manually annotating images recorded from pedestrian viewpoints is both expensive and time-consuming. To overcome this challenge, we propose \\textit{TrackletMapper}, a framework for annotating ground surface types such as sidewalks, roads, and street crossings from object tracklets without requiring human-annotated data. To this end, we project the robot ego-trajectory and the paths of other traffic participants into the ego-view camera images, creating sparse semantic annotations for multiple types of ground surfaces from which a ground segmentation model can be trained. We further show that the model can be self-distilled for additional performance benefits by aggregating a ground surface map and projecting it into the camera images, creating a denser set of training annotations compared to the sparse tracklet annotations. We qualitatively and quantitatively attest our findings on a novel large-scale dataset for mobile robots operating in pedestrian areas. Code and dataset will be made available upon acceptance of the manuscript.", "keywords": "Knowledge Distillation;Semantic Segmentation;Navigation", "primary_area": "", "supplementary_material": "/attachment/2a629a7fed285330d65fbc8860f48da14d9aa044.zip", "author": "Jannik Z\u00fcrn;Sebastian Weber;Wolfram Burgard", "authorids": "~Jannik_Z\u00fcrn1;sebastian.weber@students.uni-freiburg.de;wolfram.burgard@utn.de", "gender": "M;;", "homepage": "http://www2.informatik.uni-freiburg.de/~zuern/;;", "dblp": ";;", "google_scholar": "gB9JqUcAAAAJ;;", "orcid": "0000-0001-9516-905X;;", "linkedin": "jannik-zuern/;;", "or_profile": "~Jannik_Z\u00fcrn1;sebastian.weber@students.uni-freiburg.de;wolfram.burgard@utn.de", "aff": "Albert-Ludwigs-Universit\u00e4t Freiburg;;", "aff_domain": "uni-freiburg.de;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nz{\\\"u}rn2022trackletmapper,\ntitle={TrackletMapper: Ground Surface Segmentation and Mapping from Traffic Participant Trajectories},\nauthor={Jannik Z{\\\"u}rn and Sebastian Weber and Wolfram Burgard},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=qr0wqg8NqkL}\n}", "github": "", "project": "", "reviewers": "vAHU;a8Do;x2BQ;sqbj", "site": "https://openreview.net/forum?id=qr0wqg8NqkL", "pdf_size": 0, "rating": "1;4;10;10", "confidence": "", "rating_avg": 6.25, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14565080574455509597&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-freiburg.de", "aff_unique_abbr": "Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "0", "aff_campus_unique": "Freiburg", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "qzMY915hCYX", "title": "Safety-Enhanced Autonomous Driving Using Interpretable Sensor Fusion Transformer", "track": "main", "status": "Poster", "tldr": "", "abstract": "Large-scale deployment of autonomous vehicles has been continually delayed due to safety concerns. On the one hand, comprehensive scene understanding is indispensable, a lack of which would result in vulnerability to rare but complex traffic situations, such as the sudden emergence of unknown objects. However, reasoning from a global context requires access to sensors of multiple types and adequate fusion of multi-modal sensor signals, which is difficult to achieve. On the other hand, the lack of interpretability in learning models also hampers the safety with unverifiable failure causes. In this paper, we propose a safety-enhanced autonomous driving framework, named Interpretable Sensor Fusion Transformer (InterFuser), to fully process and fuse information from multi-modal multi-view sensors for achieving comprehensive scene understanding and adversarial event detection. Besides, intermediate interpretable features are generated from our framework, which provide more semantics and are exploited to better constrain actions to be within the safe sets. We conducted extensive experiments on CARLA benchmarks, where our model outperforms prior methods, ranking the first on the public CARLA Leaderboard.", "keywords": "autonomous driving;sensor fusion;transformer;safety", "primary_area": "", "supplementary_material": "/attachment/7aed383135e2eb52fca7964f6146781f9a4abb35.zip", "author": "Hao Shao;Letian Wang;Ruobing Chen;Hongsheng Li;Yu Liu", "authorids": "~Hao_Shao1;~Letian_Wang1;~Ruobing_Chen1;~Hongsheng_Li3;~Yu_Liu2", "gender": ";M;M;M;M", "homepage": "http://hao-shao.com;;;http://www.ee.cuhk.edu.hk/~hsli;http://liuyu.us", "dblp": "66/3089.html;17/8467;;27/7402-1;97/2274-15", "google_scholar": "https://scholar.google.com.hk/citations?user=D_ZLR1oAAAAJ;https://scholar.google.com.hk/citations?user=HEzCWisAAAAJ;;BN2Ze-QAAAAJ;", "orcid": ";;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/robinc94;;", "or_profile": "~Hao_Shao1;~Letian_Wang1;~Ruobing_Chen1;~Hongsheng_Li3;~Yu_Liu2", "aff": "Tsinghua University;University of Toronto;Sensetime Group;The Chinese University of Hong Kong;SenseTime", "aff_domain": "tsinghua.edu.cn;utoronto.ca;sensetime.com;cuhk.edu.hk;sensetime.com", "position": "MS student;PhD student;Researcher;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nshao2022safetyenhanced,\ntitle={Safety-Enhanced Autonomous Driving Using Interpretable Sensor Fusion Transformer},\nauthor={Hao Shao and Letian Wang and Ruobing Chen and Hongsheng Li and Yu Liu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=qzMY915hCYX}\n}", "github": "https://github.com/opendilab/InterFuser", "project": "", "reviewers": "snM2;8oVA;djxH", "site": "https://openreview.net/forum?id=qzMY915hCYX", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 270, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14419870086106236647&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Tsinghua University;University of Toronto;SenseTime Group;Chinese University of Hong Kong;SenseTime", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.utoronto.ca;https://www.sensetime.com/;https://www.cuhk.edu.hk;https://www.sensetime.com", "aff_unique_abbr": "THU;U of T;Sensetime;CUHK;SenseTime", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Canada" }, { "id": "r-w9Wh-QVnH", "title": "Real-time Mapping of Physical Scene Properties with an Autonomous Robot Experimenter", "track": "main", "status": "Poster", "tldr": "An autonomous robot experimenter discovers dense physical scene properties by providing sparse interaction measurements to a 3D neural field.", "abstract": "Neural fields can be trained from scratch to represent the shape and appearance of 3D scenes efficiently. It has also been shown that they can densely map correlated properties such as semantics, via sparse interactions from a human labeller. In this work, we show that a robot can densely annotate a scene with arbitrary discrete or continuous physical properties via its own fully-autonomous experimental interactions, as it simultaneously scans and maps it with an RGB-D camera. A variety of scene interactions are possible, including poking with force sensing to determine rigidity, measuring local material type with single-pixel spectroscopy or predicting force distributions by pushing. Sparse experimental interactions are guided by entropy to enable high efficiency, with tabletop scene properties densely mapped from scratch in a few minutes from a few tens of interactions.", "keywords": "Neural field;robot experimentation;physical properties", "primary_area": "", "supplementary_material": "/attachment/4925e569fe06788e78d06501468f70c2ce569973.zip", "author": "Iain Haughton;Edgar Sucar;Andre Mouton;Edward Johns;Andrew Davison", "authorids": "~Iain_Haughton1;~Edgar_Sucar1;~Andre_Mouton3;~Edward_Johns1;~Andrew_Davison1", "gender": "M;M;M;M;M", "homepage": ";;;https://www.robot-learning.uk;http://www.doc.ic.ac.uk/~ajd/", "dblp": ";200/8624;;68/9968;d/AndrewJDavison", "google_scholar": ";https://scholar.google.co.uk/citations?user=OsDdSWgAAAAJ;https://scholar.google.co.uk/citations?user=K0V7E4MAAAAJ;https://scholar.google.co.uk/citations?user=sMIUkiQAAAAJ;https://scholar.google.co.uk/citations?user=A0ae1agAAAAJ", "orcid": ";;;0000-0002-8914-8786;", "linkedin": "iain-haughton-194321135/;;andre-mouton/;https://uk.linkedin.com/in/edward-johns-1b24845a;", "or_profile": "~Iain_Haughton1;~Edgar_Sucar1;~Andre_Mouton3;~Edward_Johns1;~Andrew_Davison1", "aff": "Imperial College London;Imperial College London;Dyson Ltd;Imperial College London;Imperial College London", "aff_domain": "ic.ac.uk;imperial.ac.uk;dyson.com;imperial.ac.uk;imperial.ac.uk", "position": "Researcher;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhaughton2022realtime,\ntitle={Real-time Mapping of Physical Scene Properties with an Autonomous Robot Experimenter},\nauthor={Iain Haughton and Edgar Sucar and Andre Mouton and Edward Johns and Andrew Davison},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=r-w9Wh-QVnH}\n}", "github": "", "project": "", "reviewers": "yfuZ;jBNQ;CQYj;beEy", "site": "https://openreview.net/forum?id=r-w9Wh-QVnH", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15602849145568368621&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Imperial College London;Dyson", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.dyson.com", "aff_unique_abbr": "ICL;Dyson", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "rJIwfTxTXvm", "title": "Learning to Correct Mistakes: Backjumping in Long-Horizon Task and Motion Planning", "track": "main", "status": "Poster", "tldr": "", "abstract": "As robots become increasingly capable of manipulation and long-term autonomy, long-horizon task and motion planning problems are becoming increasingly important. A key challenge in such problems is that early actions in the plan may make future actions infeasible. When reaching a dead-end in the search, most existing planners use backtracking, which exhaustively reevaluates motion-level actions, often resulting in inefficient planning, especially when the search depth is large. In this paper, we propose to learn backjumping heuristics which identify the culprit action directly using supervised learning models to guide the task-level search. Based on evaluations of two different tasks, we find that our method significantly improves planning efficiency compared to backtracking and also generalizes to problems with novel numbers of objects.", "keywords": "task and motion planning;heuristic learning;supervised leanring", "primary_area": "", "supplementary_material": "/attachment/3b6668059a79069f45d79fa2669eb6986ae5c091.zip", "author": "Yoonchang Sung;Zizhao Wang;Peter Stone", "authorids": "~Yoonchang_Sung1;~Zizhao_Wang3;~Peter_Stone1", "gender": ";M;M", "homepage": "https://yoonchangsung.com/;;http://www.cs.utexas.edu/~pstone", "dblp": ";245/5008;s/PeterStone", "google_scholar": ";https://scholar.google.ca/citations?user=V4KQIWsAAAAJ;qnwjcfAAAAAJ", "orcid": ";;0000-0002-6795-420X", "linkedin": ";;", "or_profile": "~Yoonchang_Sung1;~Zizhao_Wang3;~Peter_Stone1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu;utexas.edu", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nsung2022learning,\ntitle={Learning to Correct Mistakes: Backjumping in Long-Horizon Task and Motion Planning},\nauthor={Yoonchang Sung and Zizhao Wang and Peter Stone},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=rJIwfTxTXvm}\n}", "github": "", "project": "", "reviewers": "q4VZ;MGEc;Kqwz;MW4W", "site": "https://openreview.net/forum?id=rJIwfTxTXvm", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6448217433580292227&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "rbIzq-I84i_", "title": "Graph network simulators can learn discontinuous, rigid contact dynamics", "track": "main", "status": "Poster", "tldr": "Graph network simulators can accurately and efficiently model discontinuous, rigid body contact ", "abstract": "Recent years have seen a rise in techniques for modeling discontinuous dynamics, such as rigid contact or switching motion modes, using deep learning. A common claim is that deep networks are incapable of accurately modeling rigid-body dynamics without explicit modules for handling contacts, due to the continuous nature of how deep networks are parameterized. Here we investigate this claim with experiments on established real and simulated datasets and show that general-purpose graph network simulators, with no contact-specific assumptions, can learn and predict contact discontinuities. Furthermore, contact dynamics learned by graph network simulators capture real-world cube tossing trajectories more accurately than highly engineered robotics simulators, even when provided with only 8 -- 16 trajectories. Overall, this suggests that rigid-body dynamics do not pose a fundamental challenge for deep networks with the appropriate general architecture and parameterization. Instead, our work opens new directions for considering when deep learning-based models might be preferable to traditional simulation environments for accurately modeling real-world contact dynamics.", "keywords": "graph networks;contacts;rigid body dynamics;simulation", "primary_area": "", "supplementary_material": "/attachment/e5a5bde7d5a295373682f0a3c1b9950105bd276a.zip", "author": "Kelsey R Allen;Tatiana Lopez Guevara;Yulia Rubanova;Kim Stachenfeld;Alvaro Sanchez-Gonzalez;Peter Battaglia;Tobias Pfaff", "authorids": "~Kelsey_R_Allen1;zepolitat@deepmind.com;~Yulia_Rubanova2;~Kim_Stachenfeld1;~Alvaro_Sanchez-Gonzalez1;~Peter_Battaglia1;~Tobias_Pfaff1", "gender": "F;;;F;M;M;M", "homepage": ";;;https://neurokim.com/;;;http://tobiaspfaff.com", "dblp": "153/9528;;;155/1888;222/1889;41/3400;67/7591", "google_scholar": "kpcjFekAAAAJ;;;jNtH2WUAAAAJ;https://scholar.google.co.uk/citations?user=d1oQ8NcAAAAJ;https://scholar.google.co.uk/citations?user=nQ7Ij30AAAAJ;3oUgDKQAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Kelsey_R_Allen1;zepolitat@deepmind.com;~Yulia_Rubanova2;~Kim_Stachenfeld1;~Alvaro_Sanchez-Gonzalez1;~Peter_Battaglia1;~Tobias_Pfaff1", "aff": "Google;;;Google DeepMind;Google DeepMind;Google DeepMind;Deepmind", "aff_domain": "deepmind.com;;;deepmind.com;google.com;google.com;google.com", "position": "Research Scientist;;;Research Scientist;Senior Research Engineer;Researcher;Research scientist", "bibtex": "@inproceedings{\nallen2022graph,\ntitle={Graph network simulators can learn discontinuous, rigid contact dynamics},\nauthor={Kelsey R Allen and Tatiana Lopez Guevara and Yulia Rubanova and Kim Stachenfeld and Alvaro Sanchez-Gonzalez and Peter Battaglia and Tobias Pfaff},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=rbIzq-I84i_}\n}", "github": "", "project": "", "reviewers": "ETfg;MGH6;ehVb;8UbN", "site": "https://openreview.net/forum?id=rbIzq-I84i_", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14738281920794786024&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://deepmind.com", "aff_unique_abbr": "Google;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "s6NEzqZKaP-", "title": "Visuotactile Affordances for Cloth Manipulation with Local Control", "track": "main", "status": "Poster", "tldr": "Visuotactile edge grasp affordances and tactile sliding for cloth manipulation", "abstract": "Cloth in the real world is often crumpled, self-occluded, or folded in on itself such that key regions, such as corners, are not directly graspable, making manipulation difficult. We propose a system that leverages visual and tactile perception to unfold the cloth via grasping and sliding on edges. Doing so, the robot is able to grasp two adjacent corners, enabling subsequent manipulation tasks like folding or hanging. We develop tactile perception networks that classify whether an edge is grasped and estimate the pose of the edge. We use the edge classification network to supervise a visuotactile edge grasp affordance network that can grasp edges with a 90% success rate. Once an edge is grasped, we demonstrate that the robot can slide along the cloth to the adjacent corner using tactile pose estimation/control in real time.", "keywords": "Multi-modal learning;Cloth manipulation;Tactile control", "primary_area": "", "supplementary_material": "/attachment/5c185a438a895165a9a44eebda8e0c4d9cd494f1.zip", "author": "Neha Sunil;Shaoxiong Wang;Yu She;Edward Adelson;Alberto Rodriguez Garcia", "authorids": "~Neha_Sunil1;~Shaoxiong_Wang1;shey@purdue.edu;~Edward_Adelson1;~Alberto_Rodriguez_Garcia1", "gender": "F;M;;M;M", "homepage": ";http://shaoxiongwang.com/;;http://persci.mit.edu/people/adelson;http://mcube.mit.edu/", "dblp": ";188/2766;;;", "google_scholar": "2bZ5LxEAAAAJ;NbVgjv0AAAAJ;;;AC93g9kAAAAJ", "orcid": ";;;0000-0003-2222-6775;", "linkedin": ";;;;", "or_profile": "~Neha_Sunil1;~Shaoxiong_Wang1;shey@purdue.edu;~Edward_Adelson1;~Alberto_Rodriguez_Garcia1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;;mit.edu;mit.edu", "position": "PhD student;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsunil2022visuotactile,\ntitle={Visuotactile Affordances for Cloth Manipulation with Local Control},\nauthor={Neha Sunil and Shaoxiong Wang and Yu She and Edward Adelson and Alberto Rodriguez Garcia},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=s6NEzqZKaP-}\n}", "github": "", "project": "", "reviewers": "6ikq;Bk2e;MA9Y;a2rY", "site": "https://openreview.net/forum?id=s6NEzqZKaP-", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13077121258296308627&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "sK2aWU7X9b8", "title": "Training Robots to Evaluate Robots: Example-Based Interactive Reward Functions for Policy Learning", "track": "main", "status": "Oral", "tldr": "We present LIRF, a framework for conveniently training example-based interactive robot policies to evaluate robot task policies, in order both to provide rewards to train them, and to verify their execution in partially observed tasks.", "abstract": "Physical interactions can often help reveal information that is not readily apparent. For example, we may tug at a table leg to evaluate whether it is built well, or turn a water bottle upside down to check that it is watertight. We propose to train robots to acquire such interactive behaviors automatically, for the purpose of evaluating the result of an attempted robotic skill execution. These evaluations in turn serve as \"interactive reward functions\" (IRFs) for training reinforcement learning policies to perform the target skill, such as screwing the table leg tightly. In addition, even after task policies are fully trained, IRFs can serve as verification mechanisms that improve online task execution. For any given task, our IRFs can be conveniently trained using only examples of successful outcomes, and no further specification is needed to train the task policy thereafter. In our evaluations on door locking and weighted block stacking in simulation, and screw tightening on a real robot, IRFs enable large performance improvements, even outperforming baselines with access to demonstrations or carefully engineered rewards.", "keywords": "Reinforcement Learning;Interactive Perception;Task Specification", "primary_area": "", "supplementary_material": "/attachment/6de0f9f7d0cf562ab61092cd5982296eaeb2ea40.zip", "author": "Kun Huang;Edward S. Hu;Dinesh Jayaraman", "authorids": "~Kun_Huang6;~Edward_S._Hu1;~Dinesh_Jayaraman2", "gender": "M;M;M", "homepage": ";https://www.seas.upenn.edu/~dineshj/;https://www.edwardshu.com", "dblp": ";145/3870;245/4627", "google_scholar": "RYLugBwAAAAJ;QxLpghAAAAAJ;", "orcid": ";0000-0002-6888-3095;", "linkedin": "kun-huang-620034171/;dinesh-jayaraman-44b31539/;", "or_profile": "~Kun_Huang6;~Dinesh_Jayaraman2;~Edward_Shichao_Hu1", "aff": "School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;upenn.edu;upenn.edu", "position": "MS student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhuang2022training,\ntitle={Training Robots to Evaluate Robots: Example-Based Interactive Reward Functions for Policy Learning},\nauthor={Kun Huang and Edward S. Hu and Dinesh Jayaraman},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=sK2aWU7X9b8}\n}", "github": "https://github.com/penn-pal-lab/interactive_reward_functions", "project": "", "reviewers": "T7V5;FXQi;2X1d;BeWt", "site": "https://openreview.net/forum?id=sK2aWU7X9b8", "pdf_size": 0, "rating": "10;10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 23, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12126605991600474410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "School of Engineering and Applied Science", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "sygvGP-YLfx", "title": "See, Hear, and Feel: Smart Sensory Fusion for Robotic Manipulation", "track": "main", "status": "Poster", "tldr": "We use vision, audio, and touch on two challenging robotic manipulation tasks---dense packing and pouring, and demonstrate the benefit of multisensory perception in robot learning.", "abstract": "Humans use all of their senses to accomplish different tasks in everyday activities. In contrast, existing work on robotic manipulation mostly relies on one, or occasionally two modalities, such as vision and touch. In this work, we systematically study how visual, auditory, and tactile perception can jointly help robots to solve complex manipulation tasks. We build a robot system that can see with a camera, hear with a contact microphone, and feel with a vision-based tactile sensor, with all three sensory modalities fused with a self-attention model. Results on two challenging tasks, dense packing and pouring, demonstrate the necessity and power of multisensory perception for robotic manipulation: vision displays the global status of the robot but can often suffer from occlusion, audio provides immediate feedback of key moments that are even invisible, and touch offers precise local geometry for decision making. Leveraging all three modalities, our robotic system significantly outperforms prior methods.", "keywords": "Multisensory Perception;Robotic Manipulation;Robot Learning", "primary_area": "", "supplementary_material": "/attachment/169a3338f0eeb6099c67f5f23325348c23ff7eff.zip", "author": "Hao Li;Yizhi Zhang;Junzhe Zhu;Shaoxiong Wang;Michelle A Lee;Huazhe Xu;Edward Adelson;Li Fei-Fei;Ruohan Gao;Jiajun Wu", "authorids": "~Hao_Li23;~Yizhi_Zhang1;~Junzhe_Zhu1;~Shaoxiong_Wang1;~Michelle_A_Lee1;~Huazhe_Xu1;~Edward_Adelson1;~Li_Fei-Fei1;~Ruohan_Gao2;~Jiajun_Wu1", "gender": "Not Specified;F;;M;;M;M;F;M;M", "homepage": "https://haolirobo.github.io;;;http://shaoxiongwang.com/;http://stanford.edu/~mishlee/;http://hxu.rocks;http://persci.mit.edu/people/adelson;https://profiles.stanford.edu/fei-fei-li;https://ruohangao.github.io/;https://jiajunwu.com", "dblp": ";https://dblp.org/rec/conf/corl/LiZZWLXA0G022.html;;188/2766;;164/9006;;79/2528;176/5787;117/4768", "google_scholar": "IDmUyTEAAAAJ;;ss3SR9YAAAAJ;NbVgjv0AAAAJ;;t9HPFawAAAAJ;;rDfyQnIAAAAJ;i02oEgMAAAAJ;2efgcS0AAAAJ", "orcid": "0000-0001-5030-457X;;;;;;0000-0003-2222-6775;;0000-0002-8346-1114;0000-0002-4176-343X", "linkedin": "hao-li-sjtu-pu/;yizhi-zhang;;;;;;fei-fei-li-4541247/;;jiajunwu/", "or_profile": "~Hao_Li23;~Yizhi_Zhang1;~Junzhe_Zhu1;~Shaoxiong_Wang1;~Michelle_A_Lee1;~Huazhe_Xu1;~Edward_Adelson1;~Li_Fei-Fei1;~Ruohan_Gao2;~Jiajun_Wu1", "aff": "Stanford University;Stanford University;Stanford University;Massachusetts Institute of Technology;New York University;Stanford University;Massachusetts Institute of Technology;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;mit.edu;nyu.edu;stanford.edu;mit.edu;stanford.edu;cs.stanford.edu;stanford.edu", "position": "MS student;MS student;MS student;PhD student;Assistant Professor;Postdoc;Full Professor;Full Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nli2022see,\ntitle={See, Hear, and Feel: Smart Sensory Fusion for Robotic Manipulation},\nauthor={Hao Li and Yizhi Zhang and Junzhe Zhu and Shaoxiong Wang and Michelle A Lee and Huazhe Xu and Edward Adelson and Li Fei-Fei and Ruohan Gao and Jiajun Wu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=sygvGP-YLfx}\n}", "github": "", "project": "", "reviewers": "2ByC;yYyH;YUYT;329W", "site": "https://openreview.net/forum?id=sygvGP-YLfx", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 10, "corr_rating_confidence": 0, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12754123598906236684&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;2;0;1;0;0;0", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu;https://www.nyu.edu", "aff_unique_abbr": "Stanford;MIT;NYU", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "t-IO7wCaNgH", "title": "Learning Temporally Extended Skills in Continuous Domains as Symbolic Actions for Planning", "track": "main", "status": "Oral", "tldr": "We jointly learn continuous control skills and a forward model of their effect in a symbolic abstraction, which can be leveraged for planning and plan execution", "abstract": "Problems which require both long-horizon planning and continuous control capabilities pose significant challenges to existing reinforcement learning agents. In this paper we introduce a novel hierarchical reinforcement learning agent which links temporally extended skills for continuous control with a forward model in a symbolic discrete abstraction of the environment's state for planning. We term our agent SEADS for Symbolic Effect-Aware Diverse Skills. We formulate an objective and corresponding algorithm which leads to unsupervised learning of a diverse set of skills through intrinsic motivation given a known state abstraction. The skills are jointly learned with the symbolic forward model which captures the effect of skill execution in the state abstraction. After training, we can leverage the skills as symbolic actions using the forward model for long-horizon planning and subsequently execute the plan using the learned continuous-action control skills. The proposed algorithm learns skills and forward models that can be used to solve complex tasks which require both continuous control and long-horizon planning capabilities with high success rate. It compares favorably with other flat and hierarchical reinforcement learning baseline agents and is successfully demonstrated with a real robot.", "keywords": "temporally extended skill learning;hierarchical reinforcement learning;diverse skill learning", "primary_area": "", "supplementary_material": "/attachment/c8c391f43906b538f1550b7aa9d245a4687dff92.zip", "author": "Jan Achterhold;Markus Krimmel;Joerg Stueckler", "authorids": "~Jan_Achterhold1;~Markus_Krimmel1;~Joerg_Stueckler2", "gender": ";M;M", "homepage": ";;https://is.mpg.de/employees/jstueckler", "dblp": "238/2740;324/2544;99/3327", "google_scholar": "https://scholar.google.de/citations?user=PqkeMMEAAAAJ;;https://scholar.google.de/citations?user=xrOzfucAAAAJ", "orcid": ";;", "linkedin": "https://de.linkedin.com/in/jan-achterhold;;", "or_profile": "~Jan_Achterhold1;~Markus_Krimmel1;~Joerg_Stueckler1", "aff": "University of Tuebingen;Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "uni-tuebingen.de;is.tuebingen.mpg.de;tuebingen.mpg.de", "position": "PhD student;Undergrad student;Group Leader", "bibtex": "@inproceedings{\nachterhold2022learning,\ntitle={Learning Temporally Extended Skills in Continuous Domains as Symbolic Actions for Planning},\nauthor={Jan Achterhold and Markus Krimmel and Joerg Stueckler},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=t-IO7wCaNgH}\n}", "github": "", "project": "", "reviewers": "XcfM;a18m;rgwq;w2Y9", "site": "https://openreview.net/forum?id=t-IO7wCaNgH", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1986649434104596969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Tuebingen;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Uni T\u00fcbingen;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "tGbpgz6yOrI", "title": "R3M: A Universal Visual Representation for Robot Manipulation", "track": "main", "status": "Poster", "tldr": "Pre-training a visual representation on diverse human video datasets, that can be downloaded and used off-the-shelf to enable more data efficient robot learning in simulation and the real world. ", "abstract": "We study how visual representations pre-trained on diverse human video data can enable data-efficient learning of downstream robotic manipulation tasks. Concretely, we pre-train a visual representation using the Ego4D human video dataset using a combination of time-contrastive learning, video-language alignment, and an L1 penalty to encourage sparse and compact representations. The resulting representation, R3M, can be used as a frozen perception module for downstream policy learning. Across a suite of 12 simulated robot manipulation tasks, we find that R3M improves task success by over 20% compared to training from scratch and by over 10% compared to state-of-the-art visual representations like CLIP and MoCo. Furthermore, R3M enables a Franka Emika Panda arm to learn a range of manipulation tasks in a real, cluttered apartment given just 20 demonstrations. ", "keywords": "Visual Representation Learning;Robotic Manipulation", "primary_area": "", "supplementary_material": "", "author": "Suraj Nair;Aravind Rajeswaran;Vikash Kumar;Chelsea Finn;Abhinav Gupta", "authorids": "~Suraj_Nair1;~Aravind_Rajeswaran1;~Vikash_Kumar2;~Chelsea_Finn1;~Abhinav_Gupta1", "gender": "M;M;M;F;M", "homepage": "https://suraj-nair-1.github.io/;http://aravindr93.github.io/;http://vikashplus.github.io/;https://ai.stanford.edu/~cbfinn/;http://www.cs.cmu.edu/~abhinavg", "dblp": ";164/5778;82/7475;131/1783;36/7024-1", "google_scholar": "EHSuFcwAAAAJ;_EJrRVAAAAAJ;nu3W--sAAAAJ;vfPE6hgAAAAJ;https://scholar.google.com.tw/citations?user=bqL73OkAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Suraj_Nair1;~Aravind_Rajeswaran1;~Vikash_Kumar2;~Chelsea_Finn1;~Abhinav_Gupta1", "aff": "Meta Facebook;Meta Facebook;Meta Facebook;Google;Meta Facebook", "aff_domain": "facebook.com;meta.com;facebook.com;google.com;fb.com", "position": "Student Researcher;Research Scientist;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nnair2022rm,\ntitle={R3M: A Universal Visual Representation for Robot Manipulation},\nauthor={Suraj Nair and Aravind Rajeswaran and Vikash Kumar and Chelsea Finn and Abhinav Gupta},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=tGbpgz6yOrI}\n}", "github": "https://github.com/facebookresearch/r3m", "project": "", "reviewers": "JMfs;QzCR;QF3c", "site": "https://openreview.net/forum?id=tGbpgz6yOrI", "pdf_size": 0, "rating": "1;6;10", "confidence": "", "rating_avg": 5.666666666666667, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 615, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7791793210552863237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Meta;Google", "aff_unique_dep": "Meta Platforms, Inc.;Google", "aff_unique_url": "https://meta.com;https://www.google.com", "aff_unique_abbr": "Meta;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "tJE1Yyi8fUX", "title": "DexPoint: Generalizable Point Cloud Reinforcement Learning for Sim-to-Real Dexterous Manipulation", "track": "main", "status": "Poster", "tldr": "Learning generalizable dexterous manipulation with reinforcement learning using point cloud observation.", "abstract": "We propose a sim-to-real framework for dexterous manipulation which can generalize to new objects of the same category in the real world. The key of our framework is to train the manipulation policy with point cloud inputs and dexterous hands. We propose two new techniques to enable joint learning on multiple objects and sim-to-real generalization: (i) using imagined hand point clouds as augmented inputs; and (ii) designing novel contact-based rewards. We empirically evaluate our method using an Allegro Hand to grasp novel objects in both simulation and real world. To the best of our knowledge, this is the first policy learning-based framework that achieves such generalization results with dexterous hands. Our project page is available at https://yzqin.github.io/dexpoint.", "keywords": "Dexterous Manipulation;Policy Learning;Point Clouds;Sim-to-Real", "primary_area": "", "supplementary_material": "/attachment/98cf90b36589ab3dc5c3971ac1c36a4874a75d13.zip", "author": "Yuzhe Qin;Binghao Huang;Zhao-Heng Yin;Hao Su;Xiaolong Wang", "authorids": "~Yuzhe_Qin1;~Binghao_Huang1;~Zhao-Heng_Yin1;~Hao_Su1;~Xiaolong_Wang3", "gender": "M;;M;M;M", "homepage": "https://yzqin.github.io/;https://binghao-huang.github.io/;http://ai.ucsd.edu/~haosu;https://xiaolonw.github.io/;https://zhaohengyin.github.io", "dblp": "241/9337;;09/4945-1;91/952-4;264/9661", "google_scholar": "3KF3AIMAAAAJ;nqoOetAAAAAJ;1P8Zu04AAAAJ;Y8O9N_0AAAAJ;_egJxfMAAAAJ", "orcid": "0000-0002-9321-9305;;;;", "linkedin": ";;;;", "or_profile": "~Yuzhe_Qin1;~Binghao_Huang1;~Hao_Su1;~Xiaolong_Wang3;~Zhao_Heng_Yin1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego;Hong Kong University of Science and Technology", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;ust.hk", "position": "PhD student;MS student;Assistant Professor;Assistant Professor;MPhil", "bibtex": "@inproceedings{\nqin2022dexpoint,\ntitle={DexPoint: Generalizable Point Cloud Reinforcement Learning for Sim-to-Real Dexterous Manipulation},\nauthor={Yuzhe Qin and Binghao Huang and Zhao-Heng Yin and Hao Su and Xiaolong Wang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=tJE1Yyi8fUX}\n}", "github": "", "project": "", "reviewers": "6SGW;aTKU;BaTP;Cjyv", "site": "https://openreview.net/forum?id=tJE1Yyi8fUX", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7835164971948704773&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of California, San Diego;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.ust.hk", "aff_unique_abbr": "UCSD;HKUST", "aff_campus_unique_index": "0;0;0;0;1", "aff_campus_unique": "San Diego;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;China" }, { "id": "tVgD4METs6o", "title": "Motion Style Transfer: Modular Low-Rank Adaptation for Deep Motion Forecasting", "track": "main", "status": "Poster", "tldr": "We propose a modular low-rank adaptation method that enables fast adaptation of deep motion forecasting models to new styles with limited samples", "abstract": "Deep motion forecasting models have achieved great success when trained on a massive amount of data. Yet, they often perform poorly when training data is limited. To address this challenge, we propose a transfer learning approach for efficiently adapting pre-trained forecasting models to new domains, such as unseen agent types and scene contexts. Unlike the conventional fine-tuning approach that updates the whole encoder, our main idea is to reduce the amount of tunable parameters that can precisely account for the target domain-specific motion style. To this end, we introduce two components that exploit our prior knowledge of motion style shifts: (i) a low-rank motion style adapter that projects and adjusts the style features at a low-dimensional bottleneck; and (ii) a modular adapter strategy that disentangles the features of scene context and motion history to facilitate a fine-grained choice of adaptation layers. Through extensive experimentation, we show that our proposed adapter design, coined MoSA, outperforms prior methods on several forecasting benchmarks.", "keywords": "Motion Forecasting;Trajectory Prediction;Distribution Shifts;Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/2a3027993399b2146b7a151bfe1c2ec3d19cebca.zip", "author": "Parth Kothari;Danya Li;Yuejiang Liu;Alexandre Alahi", "authorids": "~Parth_Kothari1;~Danya_Li1;~Yuejiang_Liu1;~Alexandre_Alahi3", "gender": "M;F;;M", "homepage": "https://thedebugger811.github.io/;;https://sites.google.com/view/yuejiangliu;https://vita.epfl.ch/", "dblp": ";;202/5799;48/3455", "google_scholar": ";;https://scholar.google.com/citations?hl=en;UIhXQ64AAAAJ", "orcid": ";0000-0001-5783-0978;;", "linkedin": ";;;", "or_profile": "~Parth_Kothari1;~Danya_Li1;~Yuejiang_Liu1;~Alexandre_Alahi3", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nkothari2022motion,\ntitle={Motion Style Transfer: Modular Low-Rank Adaptation for Deep Motion Forecasting},\nauthor={Parth Kothari and Danya Li and Yuejiang Liu and Alexandre Alahi},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=tVgD4METs6o}\n}", "github": "https://github.com/vita-epfl/motion-style-transfer", "project": "", "reviewers": "Qhox;g1Er;krte;SST5;PiYP", "site": "https://openreview.net/forum?id=tVgD4METs6o", "pdf_size": 0, "rating": "4;4;6;6;6", "confidence": "", "rating_avg": 5.2, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17455089911976190321&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "teEnA3L4aRe", "title": "DiffStack: A Differentiable and Modular Control Stack for Autonomous Vehicles", "track": "main", "status": "Poster", "tldr": "We present a differentiable yet modular stack for autonomous driving with a learned prediction model and differentiable planning and control algorithms that can be trained separately or end-to-end.", "abstract": "Autonomous vehicle (AV) stacks are typically built in a modular fashion, with explicit components performing detection, tracking, prediction, planning, control, etc. While modularity improves reusability, interpretability, and generalizability, it also suffers from compounding errors, information bottlenecks, and integration challenges. To overcome these challenges, a prominent approach is to convert the AV stack into an end-to-end neural network and train it with data. While such approaches have achieved impressive results, they typically lack interpretability and reusability, and they eschew principled analytical components, such as planning and control, in favor of deep neural networks. To enable the joint optimization of AV stacks while retaining modularity, we present DiffStack, a differentiable and modular stack for prediction, planning, and control. Crucially, our model-based planning and control algorithms leverage recent advancements in differentiable optimization to produce gradients, enabling optimization of upstream components, such as prediction, via backpropagation through planning and control. Our results on the nuScenes dataset indicate that end-to-end training with DiffStack yields substantial improvements in open-loop and closed-loop planning metrics by, e.g., learning to make fewer prediction errors that would affect planning. Beyond these immediate benefits, DiffStack opens up new opportunities for fully data-driven yet modular and interpretable AV architectures.", "keywords": "Autonomous Vehicles;Differentiable Algorithms;Motion Forecasting;Planning;Control", "primary_area": "", "supplementary_material": "/attachment/3679b12222862ec35dd495464c8f2c4fb0e43f3e.zip", "author": "Peter Karkus;Boris Ivanovic;Shie Mannor;Marco Pavone", "authorids": "~Peter_Karkus1;~Boris_Ivanovic1;~Shie_Mannor2;~Marco_Pavone1", "gender": "M;;M;M", "homepage": "https://peterkarkus.com/;http://www.borisivanovic.com/;https://shie.net.technion.ac.il;https://web.stanford.edu/~pavone/", "dblp": "154/9692;203/8356;20/1669;91/3382-1.html", "google_scholar": "cjUid0YAAAAJ;ey9AQcEAAAAJ;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ;RhOpyXcAAAAJ", "orcid": ";0000-0002-8698-202X;;", "linkedin": ";boris-ivanovic-a3103064;;", "or_profile": "~Peter_Karkus1;~Boris_Ivanovic1;~Shie_Mannor2;~Marco_Pavone1", "aff": "NVIDIA;NVIDIA;Technion - Israel Institute of Technology, Technion;Stanford University", "aff_domain": "nvidia.com;nvidia.com;technion.il;stanford.edu", "position": "Researcher;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkarkus2022diffstack,\ntitle={DiffStack: A Differentiable and Modular Control Stack for Autonomous Vehicles},\nauthor={Peter Karkus and Boris Ivanovic and Shie Mannor and Marco Pavone},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=teEnA3L4aRe}\n}", "github": "https://sites.google.com/view/diffstack", "project": "", "reviewers": "jUVa;oNtX;67Mn;a5FX", "site": "https://openreview.net/forum?id=teEnA3L4aRe", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11938321184642768638&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "NVIDIA;Technion - Israel Institute of Technology;Stanford University", "aff_unique_dep": "NVIDIA Corporation;;", "aff_unique_url": "https://www.nvidia.com;https://www.technion.ac.il;https://www.stanford.edu", "aff_unique_abbr": "NVIDIA;Technion;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Israel" }, { "id": "th7GW868Pok", "title": "Proactive Robot Assistance via Spatio-Temporal Object Modeling", "track": "main", "status": "Poster", "tldr": "A graph translation network based model that can learn to predict robot actions by modeling future movement of objects in an environment resulting from the user\u2019s routine activities, enabling a robot to assist proactively, without being asked.", "abstract": "Proactive robot assistance enables a robot to anticipate and provide for a user's needs without being explicitly asked. We formulate proactive assistance as the problem of the robot anticipating temporal patterns of object movements associated with everyday user routines, and proactively assisting the user by placing objects to adapt the environment to their needs. We introduce a generative graph neural network to learn a unified spatio-temporal predictive model of object dynamics from temporal sequences of object arrangements. We additionally contribute the Household Object Movements from Everyday Routines (HOMER) dataset, which tracks household objects associated with human activities of daily living across 50+ days for five simulated households. Our model outperforms the leading baseline in predicting object movement, correctly predicting locations for 11.1% more objects and wrongly predicting locations for 11.5% fewer objects used by the human user.", "keywords": "Proactive Robot Assistance;Spatio-Temporal Object Tracking", "primary_area": "", "supplementary_material": "/attachment/8f62bfa0220c500ca9c65a27e8f3ac5400789a4d.zip", "author": "Maithili Patel;Sonia Chernova", "authorids": "~Maithili_Patel1;~Sonia_Chernova2", "gender": "F;F", "homepage": "https://maithili.github.io;https://www.cc.gatech.edu/~chernova/", "dblp": "334/4404;27/1140", "google_scholar": ";EYo_WkEAAAAJ", "orcid": "0000-0001-8730-9198;0000-0001-6320-0825", "linkedin": "maithili/;", "or_profile": "~Maithili_Patel1;~Sonia_Chernova2", "aff": "Allen Institute for Artificial Intelligence;Georgia Institute of Technology", "aff_domain": "allenai.org;gatech.edu", "position": "Research Intern;Associate Professor", "bibtex": "@inproceedings{\npatel2022proactive,\ntitle={Proactive Robot Assistance via Spatio-Temporal Object Modeling},\nauthor={Maithili Patel and Sonia Chernova},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=th7GW868Pok}\n}", "github": "https://github.com/Maithili/SpatioTemporalObjectTracking", "project": "", "reviewers": "EX28;QKuy;fzEn;KdUK", "site": "https://openreview.net/forum?id=th7GW868Pok", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1100661759957216120&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://allenai.org;https://www.gatech.edu", "aff_unique_abbr": "AI2;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "tiPHpS4eA4", "title": "Towards Scale Balanced 6-DoF Grasp Detection in Cluttered Scenes", "track": "main", "status": "Poster", "tldr": "This paper discusses the problem of scale imbalance in 6-DoF grasp detection for the first time and proposes a novel approach to address the difficulty in dealing with small-scale samples.", "abstract": "In this paper, we focus on the problem of feature learning in the presence of scale imbalance for 6-DoF grasp detection and propose a novel approach to especially address the difficulty in dealing with small-scale samples. A Multi-scale Cylinder Grouping (MsCG) module is presented to enhance local geometry representation by combining multi-scale cylinder features and global context. Moreover, a Scale Balanced Learning (SBL) loss and an Object Balanced Sampling (OBS) strategy are designed, where SBL enlarges the gradients of the samples whose scales are in low frequency by apriori weights while OBS captures more points on small-scale objects with the help of an auxiliary segmentation network. They alleviate the influence of the uneven distribution of grasp scales in training and inference respectively. In addition, Noisy-clean Mix (NcM) data augmentation is introduced to facilitate training, aiming to bridge the domain gap between synthetic and raw scenes in an efficient way by generating more data which mix them into single ones at instance-level. Extensive experiments are conducted on the GraspNet-1Billion benchmark and competitive results are reached with significant gains on small-scale cases. Besides, the performance of real-world grasping highlights its generalization ability.", "keywords": "Grasp Detection;Point-cloud Representation;Scale Balance", "primary_area": "", "supplementary_material": "/attachment/e643bc55eda1f19d051d02402597bafe7710266a.zip", "author": "Haoxiang Ma;Di Huang", "authorids": "~Haoxiang_Ma1;~Di_Huang4", "gender": ";M", "homepage": "http://mahaoxiang822.github.io/;http://irip.buaa.edu.cn/dihuang/index.html", "dblp": ";45/780-1", "google_scholar": "RC0U_o0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-2412-9330", "linkedin": ";", "or_profile": "~Haoxiang_Ma1;~Di_Huang4", "aff": "Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nma2022towards,\ntitle={Towards Scale Balanced 6-DoF Grasp Detection in Cluttered Scenes},\nauthor={Haoxiang Ma and Di Huang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=tiPHpS4eA4}\n}", "github": "https://github.com/mahaoxiang822/Scale-Balanced-Grasp", "project": "", "reviewers": "iqJc;XwLV;dLNy", "site": "https://openreview.net/forum?id=tiPHpS4eA4", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4295010405230512010&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "tyXMrpEMkDb", "title": "Detecting Incorrect Visual Demonstrations for Improved Policy Learning", "track": "main", "status": "Poster", "tldr": "A framework for detecting incorrect visual demonstration for improved imitation learning ", "abstract": "Learning tasks only from raw video demonstrations is the current state of the art in robotics visual imitation learning research. The implicit assumption here is that all video demonstrations show an optimal/sub-optimal way of performing the task. What if that is not true? What if one or more videos show a wrong way of executing the task? A task policy learned from such incorrect demonstrations can be potentially unsafe for robots and humans. It is therefore important to analyze the video demonstrations for correctness before handing them over to the policy learning algorithm. This is a challenging task, especially due to the very large state space. This paper proposes a framework to autonomously detect incorrect video demonstrations of sequential tasks consisting of several sub-tasks. We analyze the demonstration pool to identify video(s) for which task-features follow a \u2018disruptive\u2019 sequence. We analyze entropy to measure this disruption and \u2013 through solving a minmax problem \u2013 assign poor weights to incorrect videos. We evaluated the framework with two real-world video datasets: our custom-designed Tea-Making with a YuMi robot and the publicly available 50-Salads. Experimental results show the effectiveness of the proposed framework in detecting incorrect video demonstrations even when they make up 40% of the demonstration set. We also show that various state-of-the-art imitation learning algorithms learn a better policy when incorrect demonstrations are discarded from the training pool.", "keywords": "Imitation Learning;Visual Demonstrations;Incorrect Demonstrations", "primary_area": "", "supplementary_material": "/attachment/92e945f2eeb89b19e4e6375f00ce2fb952d20e1a.zip", "author": "Mostafa Hussein;Momotaz Begum", "authorids": "~Mostafa_Hussein1;~Momotaz_Begum1", "gender": "M;F", "homepage": "https://www.linkedin.com/in/mostafa-hussien-87ab2250/;https://carl.cs.unh.edu", "dblp": ";", "google_scholar": "pVzfRJQAAAAJ;", "orcid": ";", "linkedin": "mostafa-hussien-87ab2250/;", "or_profile": "~Mostafa_Hussein1;~Momotaz_Begum1", "aff": "computer science , University of New Hampshire;University of New Hampshire", "aff_domain": "cs.unh.edu;unh.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nhussein2022detecting,\ntitle={Detecting Incorrect Visual Demonstrations for Improved Policy Learning},\nauthor={Mostafa Hussein and Momotaz Begum},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=tyXMrpEMkDb}\n}", "github": "", "project": "", "reviewers": "K64Q;key5;JG5k;av9b", "site": "https://openreview.net/forum?id=tyXMrpEMkDb", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9263091621230860686&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of New Hampshire", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.unh.edu", "aff_unique_abbr": "UNH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "tyxyBj2w4vw", "title": "Planning with Spatial-Temporal Abstraction from Point Clouds for Deformable Object Manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Effective planning of long-horizon deformable object manipulation requires suitable abstractions at both the spatial and temporal levels. Previous methods typically either focus on short-horizon tasks or make strong assumptions that full-state information is available, which prevents their use on deformable objects. In this paper, we propose PlAnning with Spatial-Temporal Abstraction (PASTA), which incorporates both spatial abstraction (reasoning about objects and their relations to each other) and temporal abstraction (reasoning over skills instead of low-level actions). Our framework maps high-dimension 3D observations such as point clouds into a set of latent vectors and plans over skill sequences on top of the latent set representation. We show that our method can effectively perform challenging sequential deformable object manipulation tasks in the real world, which require combining multiple tool-use skills such as cutting with a knife, pushing with a pusher, and spreading dough with a roller. Additional materials can be found at our project website: https://sites.google.com/view/pasta-plan.", "keywords": "Long horizon planning;deformable object manipulation", "primary_area": "", "supplementary_material": "/attachment/ec47bc5baeca960aae60b0f52eb76d6ecd58b08a.zip", "author": "Xingyu Lin;Carl Qi;Yunchu Zhang;Zhiao Huang;Katerina Fragkiadaki;Yunzhu Li;Chuang Gan;David Held", "authorids": "~Xingyu_Lin1;~Carl_Qi1;~Yunchu_Zhang1;~Zhiao_Huang1;~Katerina_Fragkiadaki1;~Yunzhu_Li1;~Chuang_Gan1;~David_Held1", "gender": "M;M;M;M;F;M;M;M", "homepage": "https://xingyu-lin.github.io;https://carl-qi.github.io/;https://yunchuzhang.github.io/;;https://www.cs.cmu.edu/~katef/;https://yunzhuli.github.io/;http://people.csail.mit.edu/ganchuang/;http://davheld.github.io/", "dblp": ";;;172/1410;21/8780;182/1831;139/6993;22/11147", "google_scholar": ";CdmHB_oAAAAJ;;;FWp7728AAAAJ;WlA92lcAAAAJ;PTeSCbIAAAAJ;0QtU-NsAAAAJ", "orcid": ";;;;;;;", "linkedin": ";carlqi/;;;;;;", "or_profile": "~Xingyu_Lin1;~Carl_Qi1;~Yunchu_Zhang1;~Zhiao_Huang1;~Katerina_Fragkiadaki1;~Yunzhu_Li1;~Chuang_Gan1;~David_Held1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;University of California, San Diego, University of California, San Diego;Carnegie Mellon University;Massachusetts Institute of Technology;MIT-IBM Watson AI Lab;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;eng.ucsd.edu;cmu.edu;mit.edu;ibm.com;cmu.edu", "position": "PhD student;MS student;MS student;PhD student;Assistant Professor;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlin2022planning,\ntitle={Planning with Spatial-Temporal Abstraction from Point Clouds for Deformable Object Manipulation},\nauthor={Xingyu Lin and Carl Qi and Yunchu Zhang and Zhiao Huang and Katerina Fragkiadaki and Yunzhu Li and Chuang Gan and David Held},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=tyxyBj2w4vw}\n}", "github": "", "project": "", "reviewers": "jQsA;ZcC8;zTGC;E6Aq", "site": "https://openreview.net/forum?id=tyxyBj2w4vw", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15959237369145967760&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;0;2;2;0", "aff_unique_norm": "Carnegie Mellon University;University of California, San Diego;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.ucsd.edu;https://web.mit.edu", "aff_unique_abbr": "CMU;UCSD;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "u4QXJbcvx8u", "title": "Laplace Approximation Based Epistemic Uncertainty Estimation in 3D Object Detection", "track": "main", "status": "Poster", "tldr": "In this work, we tailor Laplace approximation for 3D object detection, and propose solutions in Fisher approximation, Bayesian inference, and weight prior determination.", "abstract": "Understanding the uncertainty of predictions is a desirable feature for perceptual modules in critical robotic applications. 3D object detectors are neural networks with high-dimensional output space. It suffers from poor calibration in classification and lacks reliable uncertainty estimation in regression. To provide a reliable epistemic uncertainty estimation, we tailor Laplace approximation for 3D object detectors, and propose an Uncertainty Separation and Aggregation pipeline for Bayesian inference. The proposed Laplace-approximation approach can easily convert a deterministic 3D object detector into a Bayesian neural network capable of estimating epistemic uncertainty. The experiment results on the KITTI dataset empirically validate the effectiveness of our proposed methods, and demonstrate that Laplace approximation performs better uncertainty quality than Monte-Carlo Dropout, DeepEnsembles, and deterministic models.", "keywords": "Laplace approximation;epistemic uncertainty;3D object detection", "primary_area": "", "supplementary_material": "/attachment/976d1aa329ed0530dab4c860a714698254702aed.zip", "author": "Peng Yun;Ming Liu", "authorids": "~Peng_Yun1;~Ming_Liu11", "gender": "M;M", "homepage": "http://pyun.ram-lab.com;https://ram-lab.com", "dblp": "207/2272,;", "google_scholar": "alRGtgwAAAAJ;", "orcid": "0000-0002-8163-267X;", "linkedin": ";", "or_profile": "~Peng_Yun1;~Ming_Liu11", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;ust.hk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nyun2022laplace,\ntitle={Laplace Approximation Based Epistemic Uncertainty Estimation in 3D Object Detection},\nauthor={Peng Yun and Ming Liu},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=u4QXJbcvx8u}\n}", "github": "https://github.com/pyun-ram/OpenPCUCT", "project": "", "reviewers": "z9CZ;dWMY;mzVN;S5g8", "site": "https://openreview.net/forum?id=u4QXJbcvx8u", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "", "rating_avg": 5.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1953718961344279632&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "uCaNr6_dQB0", "title": "Modularity through Attention: Efficient Training and Transfer of Language-Conditioned Policies for Robot Manipulation", "track": "main", "status": "Poster", "tldr": "Efficient language-conditioned robot policy training and transfer through neural networks.", "abstract": "Language-conditioned policies allow robots to interpret and execute human instructions. Learning such policies requires a substantial investment with regards to time and compute resources. Still, the resulting controllers are highly device-specific and cannot easily be transferred to a robot with different morphology, capability, appearance or dynamics. In this paper, we propose a sample-efficient approach for training language-conditioned manipulation policies that allows for rapid transfer across different types of robots. By introducing a novel method, namely Hierarchical Modularity, and adopting supervised attention across multiple sub-modules, we bridge the divide between modular and end-to-end learning and enable the reuse of functional building blocks. In both simulated and real world robot manipulation experiments, we demonstrate that our method outperforms the current state-of-the-art methods and can transfer policies across 4 different robots in a sample-efficient manner. Finally, we show that the functionality of learned sub-modules is maintained beyond the training process and can be used to introspect the robot decision-making process.", "keywords": "Language-Conditioned Learning;Attention;Imitation;Modularity", "primary_area": "", "supplementary_material": "/attachment/8bfdcfd3b3c49e698cbf82c6f645d1c68a50d418.zip", "author": "Yifan Zhou;Shubham Sonawani;Mariano Phielipp;Simon Stepputtis;Heni Amor", "authorids": "~Yifan_Zhou4;~Shubham_Sonawani1;~Mariano_Phielipp2;~Simon_Stepputtis1;~Heni_Amor1", "gender": "M;M;M;;M", "homepage": ";;https://www.intel.com/content/www/us/en/research/researchers/mariano-phielipp.html;https://simonstepputtis.com/;https://cidse.engineering.asu.edu/directory/ben-amor-heni/", "dblp": ";;23/4518;192/7092;18/3990", "google_scholar": ";tOZs-vMAAAAJ;YArRsvEAAAAJ;WUQgzsAAAAAJ;https://scholar.google.com.tw/citations?user=u_7S7VYAAAAJ", "orcid": ";;;0009-0003-0519-3454;", "linkedin": "yifan-zhou-639974191/;;mariano-phielipp-941624;simon-stepputtis/;", "or_profile": "~Yifan_Zhou4;~Shubham_Sonawani1;~Mariano_Phielipp2;~Simon_Stepputtis1;~Heni_Amor1", "aff": "Arizona State University;Arizona State University;Intel Labs;Carnegie Mellon University;Arizona State University", "aff_domain": "asu.edu;asu.edu;intel.com;cmu.edu;asu.edu", "position": "PhD student;PhD student;Principal Researcher;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nzhou2022modularity,\ntitle={Modularity through Attention: Efficient Training and Transfer of Language-Conditioned Policies for Robot Manipulation},\nauthor={Yifan Zhou and Shubham Sonawani and Mariano Phielipp and Simon Stepputtis and Heni Amor},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=uCaNr6_dQB0}\n}", "github": "https://github.com/ir-lab/ModAttn", "project": "", "reviewers": "piKo;CVpY;fi3K", "site": "https://openreview.net/forum?id=uCaNr6_dQB0", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10831143674462851257&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Arizona State University;Intel;Carnegie Mellon University", "aff_unique_dep": ";Intel Labs;", "aff_unique_url": "https://www.asu.edu;https://www.intel.com;https://www.cmu.edu", "aff_unique_abbr": "ASU;Intel;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uPWhEXjyvoo", "title": "HTRON: Efficient Outdoor Navigation with Sparse Rewards via Heavy Tailed Adaptive Reinforce Algorithm", "track": "main", "status": "Poster", "tldr": "", "abstract": "We present a novel approach to improve the performance of deep reinforcement learning (DRL) based outdoor robot navigation systems. Most, existing DRL methods are based on carefully designed dense reward functions that learn the efficient behavior in an environment. \u00a0We circumvent this issue by working only with sparse rewards (which are easy to design) and propose a novel adaptive Heavy-Tailed Reinforce algorithm for Outdoor Navigation called HTRON. Our main idea is to utilize heavy-tailed policy parametrizations which implicitly induce exploration in sparse reward settings. We evaluate the performance of HTRON against Reinforce, PPO, and TRPO algorithms in three different outdoor scenarios: goal-reaching, obstacle avoidance, and uneven terrain navigation. We observe average an increase of 34.41% in terms of success rate, a 15.15% decrease in the average time steps taken to reach the goal, and a 24.9% decrease in the elevation cost compared to the navigation policies obtained by the other methods. Further, we demonstrate that our algorithm can be transferred directly into a Clearpath Husky robot to perform outdoor terrain navigation in real-world scenarios.", "keywords": "Robot Navigation;Deep Reinforcement Learning;Heavy Tailed Policy", "primary_area": "", "supplementary_material": "/attachment/4090f28784281611cce111909d3261395b6f67a1.zip", "author": "Kasun Weerakoon;Souradip Chakraborty;Nare Karapetyan;Adarsh Jagan Sathyamoorthy;Amrit Bedi;Dinesh Manocha", "authorids": "~Kasun_Weerakoon1;~Souradip_Chakraborty1;knare@umd.edu;asathyam@umd.edu;~Amrit_Bedi1;~Dinesh_Manocha3", "gender": "M;M;;;M;M", "homepage": "https://www.kasunweerakoon.org/;https://souradip-umd.github.io/;;;https://sites.google.com/view/amritsinghbedi/home;https://www.cs.umd.edu/people/dmanocha", "dblp": ";264/5758;;;176/2707.html;m/DineshManocha", "google_scholar": "-JsBJxUAAAAJ;https://scholar.google.co.in/citations?user=pvETm1wAAAAJ;;;91WLA6QAAAAJ;X08l_4IAAAAJ", "orcid": ";;;;;0000-0001-7047-9801", "linkedin": "kasun-weerakoon/;;;;;dinesh-manocha-2311846", "or_profile": "~Kasun_Weerakoon1;~Souradip_Chakraborty1;knare@umd.edu;asathyam@umd.edu;~Amrit_Bedi1;~Dinesh_Manocha3", "aff": "University of Maryland, College Park;University of Maryland, College Park;;;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;;;umd.edu;umd.edu", "position": "PhD student;PhD student;;;Researcher;Professor", "bibtex": "@inproceedings{\nweerakoon2022htron,\ntitle={{HTRON}: Efficient Outdoor Navigation with Sparse Rewards via Heavy Tailed Adaptive Reinforce Algorithm},\nauthor={Kasun Weerakoon and Souradip Chakraborty and Nare Karapetyan and Adarsh Jagan Sathyamoorthy and Amrit Bedi and Dinesh Manocha},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=uPWhEXjyvoo}\n}", "github": "", "project": "", "reviewers": "B9Ro;skdZ;N3Ka;cxNk", "site": "https://openreview.net/forum?id=uPWhEXjyvoo", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7094146908373257651&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "uUxDTZK3o3X", "title": "Robustness Certification of Visual Perception Models via Camera Motion Smoothing", "track": "main", "status": "Poster", "tldr": "We study the robustness of the visual perception model under camera motion perturbations and we propose a motion smoothing technique to give a certification guarantee under camera motion perturbations for any black-box image classification models.", "abstract": "A vast literature shows that the learning-based visual perception model is sensitive to adversarial noises, but few works consider the robustness of robotic perception models under widely-existing camera motion perturbations. To this end, we study the robustness of the visual perception model under camera motion perturbations to investigate the influence of camera motion on robotic perception. Specifically, we propose a motion smoothing technique for arbitrary image classification models, whose robustness under camera motion perturbations could be certified. The proposed robustness certification framework based on camera motion smoothing provides effective and scalable robustness guarantees for visual perception modules so that they are applicable to wide robotic applications. As far as we are aware, this is the first work to provide robustness certification for the deep perception module against camera motions, which improves the trustworthiness of robotic perception. A realistic indoor robotic dataset with a dense point cloud map for the entire room, MetaRoom, is introduced for the challenging certifiable robust perception task. We conduct extensive experiments to validate the certification approach via motion smoothing against camera motion perturbations. Our framework guarantees the certified accuracy of 81.7% against camera translation perturbation along depth direction within -0.1m ~ 0.1m. We also validate the effectiveness of our method on the real-world robot by conducting hardware experiments on the robotic arm with an eye-in-hand camera. The code is available at https://github.com/HanjiangHu/camera-motion-smoothing.", "keywords": "Certifiable Robustness;Camera Motion Perturbation;Robotic Perception", "primary_area": "", "supplementary_material": "/attachment/b6009cb206a5084ae56aa3532969f36b08170954.zip", "author": "Hanjiang Hu;Zuxin Liu;Linyi Li;Jiacheng Zhu;Ding Zhao", "authorids": "~Hanjiang_Hu1;~Zuxin_Liu1;~Linyi_Li1;~Jiacheng_Zhu1;~Ding_Zhao1", "gender": "M;M;M;M;", "homepage": "https://cs.cmu.edu/~hanjianh;https://www.zuxin.me;http://linyil.com;https://jiachengzhuml.github.io/;https://safeai-lab.github.io", "dblp": "249/5764;227/3137;99/4340-1.html;40/10195;", "google_scholar": "https://scholar.google.com/citations?hl=en;5ApCTCoAAAAJ;-b0sk-YAAAAJ;rKUnBPgAAAAJ;z7tPc9IAAAAJ", "orcid": ";0000-0001-7412-5074;;;", "linkedin": "hanjiang-hu-54337b196/;zuxin-liu/;;;", "or_profile": "~Hanjiang_Hu1;~Zuxin_Liu1;~Linyi_Li1;~Jiacheng_Zhu1;~Ding_Zhao1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Microsoft Research;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;microsoft.com;andrew.cmu.edu;cmu.edu", "position": "PhD student;PhD student;Research Intern;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2022robustness,\ntitle={Robustness Certification of Visual Perception Models via Camera Motion Smoothing},\nauthor={Hanjiang Hu and Zuxin Liu and Linyi Li and Jiacheng Zhu and Ding Zhao},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=uUxDTZK3o3X}\n}", "github": "https://github.com/HanjiangHu/camera-motion-smoothing", "project": "", "reviewers": "BZ5K;C4we;vwtf;MJ8E", "site": "https://openreview.net/forum?id=uUxDTZK3o3X", "pdf_size": 0, "rating": "1;4;6;10", "confidence": "", "rating_avg": 5.25, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4395238513845280765&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CMU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uhIfIEIiWm_", "title": "Offline Reinforcement Learning for Visual Navigation", "track": "main", "status": "Oral", "tldr": "Coupling the offline learned value function with a topological graph, where the values provide distance estimates, is a great way to make things scale. And it works with diverse rewards, like \"stay in the sun\", or \"stay off my lawn\".\"", "abstract": "Reinforcement learning can enable robots to navigate to distant goals while optimizing user-specified reward functions, including preferences for following lanes, staying on paved paths, or avoiding freshly mowed grass. However, online learning from trial-and-error for real-world robots is logistically challenging, and methods that instead can utilize existing datasets of robotic navigation data could be significantly more scalable and enable broader generalization. In this paper, we present ReViND, the first offline RL system for robotic navigation that can leverage previously collected data to optimize user-specified reward functions in the real-world. We evaluate our system for off-road navigation without any additional data collection or fine-tuning, and show that it can navigate to distant goals using only offline training from this dataset, and exhibit behaviors that qualitatively differ based on the user-specified reward function.", "keywords": "offline reinforcement learning;visual navigation;motion planning", "primary_area": "", "supplementary_material": "/attachment/f84d74368542c79fdb111e6d1f24548f0cfdaa42.zip", "author": "Dhruv Shah;Arjun Bhorkar;Hrishit Leen;Ilya Kostrikov;Nicholas Rhinehart;Sergey Levine", "authorids": "~Dhruv_Shah1;~Arjun_Bhorkar1;~Hrishit_Leen1;~Ilya_Kostrikov1;~Nicholas_Rhinehart1;~Sergey_Levine1", "gender": "M;;M;M;M;M", "homepage": "http://cs.berkeley.edu/~shah;;;;https://leaf.utias.utoronto.ca/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";;;https://dblp.org/pers/k/Kostrikov:Ilya.html;153/2193;80/7594", "google_scholar": ";;;PTS2AOgAAAAJ;xUGZX_MAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;;", "linkedin": ";arjun-bhorkar-306166160/;hrish-leen-1451a71b8/;;;", "or_profile": "~Dhruv_Shah1;~Arjun_Bhorkar1;~Hrishit_Leen1;~Ilya_Kostrikov1;~Nicholas_Rhinehart1;~Sergey_Levine1", "aff": "UC Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;google.com", "position": "PhD student;Undergrad student;Undergrad student;Postdoc;Postdoc;Research Scientist", "bibtex": "@inproceedings{\nshah2022offline,\ntitle={Offline Reinforcement Learning for Visual Navigation},\nauthor={Dhruv Shah and Arjun Bhorkar and Hrishit Leen and Ilya Kostrikov and Nicholas Rhinehart and Sergey Levine},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=uhIfIEIiWm_}\n}", "github": "https://sites.google.com/view/revind", "project": "", "reviewers": "6zkp;V1xj;FZSz;qBaA", "site": "https://openreview.net/forum?id=uhIfIEIiWm_", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3624495257401006572&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;0;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uhhA2OryTjj", "title": "Robust Trajectory Prediction against Adversarial Attacks", "track": "main", "status": "Oral", "tldr": "We propose a new adversarial training framework for training robust trajectory prediction systems by addressing domain specific challenges.", "abstract": "Trajectory prediction using deep neural networks (DNNs) is an essential component of autonomous driving (AD) systems. However, these methods are vulnerable to adversarial attacks, leading to serious consequences such as collisions. In this work, we identify two key ingredients to defend trajectory prediction models against adversarial attacks including (1) designing effective adversarial training methods and (2) adding domain-specific data augmentation to mitigate the performance degradation on clean data. We demonstrate that our method is able to improve the performance by 46\\% on adversarial data and at the cost of only 3\\% performance degradation on clean data, compared to the model trained with clean data. Additionally, compared to existing robust methods, our method can improve performance by 21\\% on adversarial examples and 9\\% on clean data. Our robust model is evaluated with a planner to study its downstream impacts. We demonstrate that our model can significantly reduce the severe accident rates (e.g., collisions and off-road driving).", "keywords": "Adversarial Attack;Trajectory Prediction;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/0193f48f1af1463f51eb49b3b3b5e41ad629abc2.zip", "author": "Yulong Cao;Danfei Xu;Xinshuo Weng;Zhuoqing Mao;Anima Anandkumar;Chaowei Xiao;Marco Pavone", "authorids": "~Yulong_Cao1;~Danfei_Xu1;~Xinshuo_Weng3;~Zhuoqing_Mao1;~Anima_Anandkumar1;~Chaowei_Xiao2;~Marco_Pavone1", "gender": ";M;F;F;M;F;M", "homepage": "https://kikacaty.github.io/;https://cs.stanford.edu/~danfei/;http://www.xinshuoweng.com;https://web.eecs.umich.edu/~zmao/;https://web.stanford.edu/~pavone/;http://tensorlab.cms.caltech.edu/users/anima/;https://xiaocw11.github.io/", "dblp": "207/6576;135/8443;192/1952.html;;91/3382-1.html;;150/3317", "google_scholar": "uclqBzgAAAAJ;J5D4kcoAAAAJ;dthSEsoAAAAJ;Ba_Ci9UAAAAJ;RhOpyXcAAAAJ;bEcLezcAAAAJ;Juoqtj8AAAAJ", "orcid": ";;0000-0002-7894-4381;;;;0000-0002-7043-4926", "linkedin": ";;xinshuoweng;;;anima-anandkumar-35171b1/;", "or_profile": "~Yulong_Cao1;~Danfei_Xu1;~Xinshuo_Weng3;~Zhuoqing_Mao1;~Marco_Pavone1;~anima_anandkumar1;~chaowei_xiao1", "aff": "University of Michigan;NVIDIA;Carnegie Mellon University;University of Michigan;Stanford University;California Institute of Technology;Arizona State University", "aff_domain": "umich.edu;nvidia.com;cmu.edu;umich.edu;stanford.edu;caltech.edu;asu.edu", "position": "PhD student;Research Scientist;PhD student;Professor;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2022robust,\ntitle={Robust Trajectory Prediction against Adversarial Attacks},\nauthor={Yulong Cao and Danfei Xu and Xinshuo Weng and Zhuoqing Mao and Anima Anandkumar and Chaowei Xiao and Marco Pavone},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=uhhA2OryTjj}\n}", "github": "https://github.com/kikacaty/RobustTraj", "project": "", "reviewers": "zQGj;xfRa;N91T;Bth3", "site": "https://openreview.net/forum?id=uhhA2OryTjj", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8925323789154154946&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0;3;4;5", "aff_unique_norm": "University of Michigan;NVIDIA;Carnegie Mellon University;Stanford University;California Institute of Technology;Arizona State University", "aff_unique_dep": ";NVIDIA Corporation;;;;", "aff_unique_url": "https://www.umich.edu;https://www.nvidia.com;https://www.cmu.edu;https://www.stanford.edu;https://www.caltech.edu;https://www.asu.edu", "aff_unique_abbr": "UM;NVIDIA;CMU;Stanford;Caltech;ASU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uv1nRhayFII", "title": "Contrastive Decision Transformers", "track": "main", "status": "Poster", "tldr": "We present ConDT, a neural architecture for reinforcement learning that empirically outperforms prior work with a novel approach to learning return-dependent transformations of a Decision Transformer's input embeddings.", "abstract": "Decision Transformers (DT) have drawn upon the success of Transformers by abstracting Reinforcement Learning as a target-return-conditioned, sequence modeling problem. In our work, we claim that the distribution of DT's target-returns represents a series of different tasks that agents must learn to handle. Work in multi-task learning has shown that separating the representations of input data belonging to different tasks can improve performance. We draw from this approach to construct ConDT (Contrastive Decision Transformer). ConDT leverages an enhanced contrastive loss to train a return-dependent transformation of the input embeddings, which we empirically show clusters these embeddings by their return. We find that ConDT significantly outperforms DT in Open-AI Gym domains by 10% and 39% in visually challenging Atari domains.", "keywords": "Reinforcement Learning;Decision Transformers;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/3b5e82262145442c1201d882833c1417ea8e29f7.zip", "author": "Sachin G Konan;Esmaeil Seraj;Matthew Gombolay", "authorids": "~Sachin_G_Konan1;~Esmaeil_Seraj1;~Matthew_Gombolay1", "gender": "M;M;M", "homepage": "https://sachinkonan.github.io/;https://www.linkedin.com/in/esmaeil-seraj-70590b80/;https://core-robotics.gatech.edu/", "dblp": "310/1751.html;169/3595;144/1022", "google_scholar": "dxi6F8kAAAAJ;k0yj7xUAAAAJ;Ihyz20wAAAAJ", "orcid": ";0000-0002-0147-1037;", "linkedin": "sachin-konan;esmaeil-seraj-70590b80/;", "or_profile": "~Sachin_G_Konan1;~Esmaeil_Seraj1;~Matthew_Gombolay1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;cc.gatech.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkonan2022contrastive,\ntitle={Contrastive Decision Transformers},\nauthor={Sachin G Konan and Esmaeil Seraj and Matthew Gombolay},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=uv1nRhayFII}\n}", "github": "", "project": "", "reviewers": "bC6T;hRuX;werf;JfJf", "site": "https://openreview.net/forum?id=uv1nRhayFII", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15677529299533958856&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "vyR0C7zvozS", "title": "GLSO: Grammar-guided Latent Space Optimization for Sample-efficient Robot Design Automation", "track": "main", "status": "Poster", "tldr": "A framework for sample-efficient robot design automation through unsupervised learning and Bayesian Optimization.", "abstract": "Robots have been used in all sorts of automation, and yet the design of robots remains mainly a manual task. We seek to provide design tools to automate the design of robots themselves. An important challenge in robot design automation is the large and complex design search space which grows exponentially with the number of components, making optimization difficult and sample inefficient. In this work, we present Grammar-guided Latent Space Optimization (GLSO), a framework that transforms design automation into a low-dimensional continuous optimization problem by training a graph variational autoencoder (VAE) to learn a mapping between the graph-structured design space and a continuous latent space. This transformation allows optimization to be conducted in a continuous latent space, where sample efficiency can be significantly boosted by applying algorithms such as Bayesian Optimization. GLSO guides training of the VAE using graph grammar rules and robot world space features, such that the learned latent space focus on valid robots and is easier for the optimization algorithm to explore. Importantly, the trained VAE can be reused to search for designs specialized to multiple different tasks without retraining. We evaluate GLSO by designing robots for a set of locomotion tasks in simulation, and demonstrate that our method outperforms related state-of-the-art robot design automation methods.", "keywords": "Robot Design Automation;Latent Optimization;Graph Grammar", "primary_area": "", "supplementary_material": "/attachment/e452497eb85eda901e25711ea462f022ab246675.zip", "author": "Jiaheng Hu;Julian Whitman;Howie Choset", "authorids": "~Jiaheng_Hu1;jwhitman@cmu.edu;~Howie_Choset1", "gender": "M;;M", "homepage": "https://jiahenghu.github.io/;;http://choset.com", "dblp": ";;c/HowieChoset", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;howie-choset-45b0b21/", "or_profile": "~Jiaheng_Hu1;jwhitman@cmu.edu;~Howie_Choset1", "aff": "Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu", "position": "MS student;;Full Professor", "bibtex": "@inproceedings{\nhu2022glso,\ntitle={{GLSO}: Grammar-guided Latent Space Optimization for Sample-efficient Robot Design Automation},\nauthor={Jiaheng Hu and Julian Whitman and Howie Choset},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=vyR0C7zvozS}\n}", "github": "https://github.com/JiahengHu/GLSO", "project": "", "reviewers": "9cM8;tfFP;4uZ5;t5Qe", "site": "https://openreview.net/forum?id=vyR0C7zvozS", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4944923200639841497&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "wyCdmAJJY1F", "title": "CausalAF: Causal Autoregressive Flow for Safety-Critical Driving Scenario Generation", "track": "main", "status": "Poster", "tldr": "We propose a causal generative model for safety-critical driving scenarios generation to improve the robustness of autonomous driving algorithms.", "abstract": "Generating safety-critical scenarios, which are crucial yet difficult to collect, provides an effective way to evaluate the robustness of autonomous driving systems. However, the diversity of scenarios and efficiency of generation methods are heavily restricted by the rareness and structure of safety-critical scenarios. Therefore, existing generative models that only estimate distributions from observational data are not satisfying to solve this problem. In this paper, we integrate causality as a prior into the scenario generation and propose a flow-based generative framework, Causal Autoregressive Flow (CausalAF). CausalAF encourages the generative model to uncover and follow the causal relationship among generated objects via novel causal masking operations instead of searching the sample only from observational data. By learning the cause-and-effect mechanism of how the generated scenario causes risk situations rather than just learning correlations from data, CausalAF significantly improves learning efficiency. Extensive experiments on three heterogeneous traffic scenarios illustrate that CausalAF requires much fewer optimization resources to effectively generate safety-critical scenarios. We also show that using generated scenarios as additional training samples empirically improves the robustness of autonomous driving algorithms.", "keywords": "Causal Generative Models;Scenario Generation;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/5b4a460fd3c234c2bc846973844df5a6c87b1c6a.zip", "author": "Wenhao Ding;Haohong Lin;Bo Li;Ding Zhao", "authorids": "~Wenhao_Ding1;~Haohong_Lin1;~Bo_Li19;~Ding_Zhao1", "gender": "M;M;F;", "homepage": "https://wenhao.pub;https://hhlin.info/;http://boli.cs.illinois.edu/;https://safeai-lab.github.io", "dblp": "215/3667.html;154/7972;50/3402-26;", "google_scholar": "q2aqI9sAAAAJ;;K8vJkTcAAAAJ;z7tPc9IAAAAJ", "orcid": ";;;", "linkedin": "wenhaoding/;haohong-lin-06572b1a5/;;", "or_profile": "~Wenhao_Ding1;~Haohong_Lin1;~Bo_Li19;~Ding_Zhao1", "aff": "Carnegie Mellon University;Carnegie Mellon University;University of Illinois, Urbana Champaign;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;illinois.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nding2022causalaf,\ntitle={Causal{AF}: Causal Autoregressive Flow for Safety-Critical Driving Scenario Generation},\nauthor={Wenhao Ding and Haohong Lin and Bo Li and Ding Zhao},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=wyCdmAJJY1F}\n}", "github": "", "project": "", "reviewers": "Gk4u;pK7P;aF4L;9o2A", "site": "https://openreview.net/forum?id=wyCdmAJJY1F", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ol2-MjeZqcYJ:scholar.google.com/&scioq=CausalAF:+Causal+Autoregressive+Flow+for+Safety-Critical+Driving+Scenario+Generation&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://illinois.edu", "aff_unique_abbr": "CMU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "x6INXlnUGro", "title": "Learning Agile Skills via Adversarial Imitation of Rough Partial Demonstrations", "track": "main", "status": "Oral", "tldr": "Wasserstein Adversarial Behavior Imitaion enables learning agile skills for quadrupeds from rough and partial demonstrations.", "abstract": "Learning agile skills is one of the main challenges in robotics. To this end, reinforcement learning approaches have achieved impressive results. These methods require explicit task information in terms of a reward function or an expert that can be queried in simulation to provide a target control output, which limits their applicability. In this work, we propose a generative adversarial method for inferring reward functions from partial and potentially physically incompatible demonstrations for successful skill acquirement where reference or expert demonstrations are not easily accessible. Moreover, we show that by using a Wasserstein GAN formulation and transitions from demonstrations with rough and partial information as input, we are able to extract policies that are robust and capable of imitating demonstrated behaviors. Finally, the obtained skills such as a backflip are tested on an agile quadruped robot called Solo 8 and present faithful replication of hand-held human demonstrations.", "keywords": "Adversarial;Imitation Learning;Legged Robots", "primary_area": "", "supplementary_material": "/attachment/a76a60f4cd821c2d9b3db7e0e135a51f47c68ed3.zip", "author": "Chenhao Li;Marin Vlastelica;Sebastian Blaes;Jonas Frey;Felix Grimminger;Georg Martius", "authorids": "~Chenhao_Li3;~Marin_Vlastelica1;~Sebastian_Blaes1;jonfrey@ethz.ch;felix.grimminger@tuebingen.mpg.de;~Georg_Martius1", "gender": ";;M;;;M", "homepage": "https://breadli428.github.io/;;https://sblaes.com;;;https://uni-tuebingen.de/de/264672", "dblp": "186/9145;;163/8117;;;47/2706", "google_scholar": "kw1-DxQAAAAJ;;https://scholar.google.de/citations?user=ftV9OHMAAAAJ;;;https://scholar.google.de/citations?user=b-JF-UIAAAAJ", "orcid": ";;;;;", "linkedin": "chenhao-li-86080b1b0/;;sebastian-blaes/;;;", "or_profile": "~Chenhao_Li3;~Marin_Vlastelica1;~Sebastian_Blaes1;jonfrey@ethz.ch;felix.grimminger@tuebingen.mpg.de;~Georg_Martius1", "aff": ";;Max Planck Institute for Intelligent Systems, Max Planck Institute for Intelligent Systems;;;Max Planck Institute for Intelligent Systems", "aff_domain": ";;is.tue.mpg.de;;;tuebingen.mpg.de", "position": ";;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nli2022learning,\ntitle={Learning Agile Skills via Adversarial Imitation of Rough Partial Demonstrations},\nauthor={Chenhao Li and Marin Vlastelica and Sebastian Blaes and Jonas Frey and Felix Grimminger and Georg Martius},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=x6INXlnUGro}\n}", "github": "", "project": "", "reviewers": "V8R4;Hona;2hjT;V4vA", "site": "https://openreview.net/forum?id=x6INXlnUGro", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11179068907957176862&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "xC-68ANJeK_", "title": "Learning Control Admissibility Models with Graph Neural Networks for Multi-Agent Navigation", "track": "main", "status": "Poster", "tldr": "Control Admissibility Model decouples the properties of goal-reaching and collision avoidance, and is compositional which enables generalization to challengingly high density of agents.", "abstract": "Deep reinforcement learning in continuous domains focuses on learning control policies that map states to distributions over actions that ideally concentrate on the optimal choices in each step. In multi-agent navigation problems, the optimal actions depend heavily on the agents' density. Their interaction patterns grow exponentially with respect to such density, making it hard for learning-based methods to generalize. We propose to switch the learning objectives from predicting the optimal actions to predicting sets of admissible actions, which we call control admissibility models (CAMs), such that they can be easily composed and used for online inference for an arbitrary number of agents. We design CAMs using graph neural networks and develop training methods that optimize the CAMs in the standard model-free setting, with the additional benefit of eliminating the need for reward engineering typically required to balance collision avoidance and goal-reaching requirements. We evaluate the proposed approach in multi-agent navigation environments. We show that the CAM models can be trained in environments with only a few agents and be easily composed for deployment in dense environments with hundreds of agents, achieving better performance than state-of-the-art methods. ", "keywords": "Graph Neural Networks;Multi-Agent Navigation", "primary_area": "", "supplementary_material": "/attachment/8d5cc56bcf6582188040638ba60d98cc84d5771f.zip", "author": "Chenning Yu;Hongzhan Yu;Sicun Gao", "authorids": "~Chenning_Yu1;~Hongzhan_Yu1;~Sicun_Gao1", "gender": ";;M", "homepage": "https://GitHub.com/rainorangelemon;;", "dblp": "319/4367;;22/8296", "google_scholar": ";;", "orcid": ";;", "linkedin": ";hongzhan-yu/;", "or_profile": "~Chenning_Yu1;~Hongzhan_Yu1;~Sicun_Gao1", "aff": "University of California, San Diego;University of California, San Diego;", "aff_domain": "ucsd.edu;ucsd.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nyu2022learning,\ntitle={Learning Control Admissibility Models with Graph Neural Networks for Multi-Agent Navigation},\nauthor={Chenning Yu and Hongzhan Yu and Sicun Gao},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=xC-68ANJeK_}\n}", "github": "https://github.com/rainorangelemon/pytorch_geometric_multiagent", "project": "", "reviewers": "XUE1;JipF;759H", "site": "https://openreview.net/forum?id=xC-68ANJeK_", "pdf_size": 0, "rating": "4;4;6", "confidence": "", "rating_avg": 4.666666666666667, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3803297046640674050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "xK-UtqDpD7L", "title": "Learning to Grasp the Ungraspable with Emergent Extrinsic Dexterity", "track": "main", "status": "Oral", "tldr": "We build a system for the \"Occluded Grasping\" task with the combination of reinforcement learning and extrinsic dexterity that works on a real robot.", "abstract": "A simple gripper can solve more complex manipulation tasks if it can utilize the external environment such as pushing the object against the table or a vertical wall, known as \"Extrinsic Dexterity.\" Previous work in extrinsic dexterity usually has careful assumptions about contacts which impose restrictions on robot design, robot motions, and the variations of the physical parameters. In this work, we develop a system based on reinforcement learning (RL) to address these limitations. We study the task of \"Occluded Grasping\" which aims to grasp the object in configurations that are initially occluded; the robot needs to move the object into a configuration from which these grasps can be achieved. We present a system with model-free RL that successfully achieves this task using a simple gripper with extrinsic dexterity. The policy learns emergent behaviors of pushing the object against the wall to rotate and then grasp it without additional reward terms on extrinsic dexterity. We discuss important components of the system including the design of the RL problem, multi-grasp training and selection, and policy generalization with automatic curriculum. Most importantly, the policy trained in simulation is zero-shot transferred to a physical robot. It demonstrates dynamic and contact-rich motions with a simple gripper that generalizes across objects with various size, density, surface friction, and shape with a 78% success rate. ", "keywords": "Manipulation;Reinforcement Learning;Extrinsic Dexterity", "primary_area": "", "supplementary_material": "/attachment/e4f6d3d20ffdb5e719624b6dc977f65cd04e8340.zip", "author": "Wenxuan Zhou;David Held", "authorids": "~Wenxuan_Zhou1;~David_Held1", "gender": "F;M", "homepage": "https://wenxuan-zhou.github.io/;http://davheld.github.io/", "dblp": ";22/11147", "google_scholar": "picvdvEAAAAJ;0QtU-NsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Wenxuan_Zhou1;~David_Held1", "aff": "Meta AI;Carnegie Mellon University", "aff_domain": "meta.com;cmu.edu", "position": "Intern;Assistant Professor", "bibtex": "@inproceedings{\nzhou2022learning,\ntitle={Learning to Grasp the Ungraspable with Emergent Extrinsic Dexterity},\nauthor={Wenxuan Zhou and David Held},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=xK-UtqDpD7L}\n}", "github": "", "project": "", "reviewers": "Deoh;58SX", "site": "https://openreview.net/forum?id=xK-UtqDpD7L", "pdf_size": 0, "rating": "6;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 8, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14683434911689927019&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;Carnegie Mellon University", "aff_unique_dep": "Meta AI;", "aff_unique_url": "https://meta.com;https://www.cmu.edu", "aff_unique_abbr": "Meta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "xjTUxBfIzE", "title": "Topological Semantic Graph Memory for Image-Goal Navigation", "track": "main", "status": "Oral", "tldr": "A landmark-based topological semantic graph memory for image goal navigation (TSGM) is proposed. It significantly outperforms baselines, boosting path efficiency (SPL). The proposed method is demonstrated in real environment with a jackal robot.", "abstract": "A novel framework is proposed to incrementally collect landmark-based graph memory and use the collected memory for image goal navigation. Given a target image to search, an embodied robot utilizes semantic memory to find the target in an unknown environment.\nIn this paper, we present a topological semantic graph memory (TSGM), which consists of (1) a graph builder that takes the observed RGB-D image to construct a topological semantic graph, (2) a cross graph mixer module that takes the collected nodes to get contextual information, and (3) a memory decoder that takes the contextual memory as an input to find an action to the target. On the task of an image goal navigation, TSGM significantly outperforms competitive baselines by +5.0-9.0% on the success rate and +7.0-23.5% on SPL, which means that the TSGM finds efficient paths. Additionally, we demonstrate our method on a mobile robot in real-world image goal scenarios.", "keywords": "Landmark-Based Navigation;Incremental Topological Memory;Visual Navigation", "primary_area": "", "supplementary_material": "/attachment/de4b3fb34d1801859431538514cc516c5233f8a0.zip", "author": "Nuri Kim;Obin Kwon;Hwiyeon Yoo;Yunho Choi;Jeongho Park;Songhwai Oh", "authorids": "~Nuri_Kim1;~Obin_Kwon1;~Hwiyeon_Yoo1;~Yunho_Choi1;~Jeongho_Park1;~Songhwai_Oh1", "gender": "F;;M;M;M;", "homepage": "https://bareblackfoot.github.io;;https://hwiyeon.github.io/;http://rllab.snu.ac.kr/people/yunho-choi/yunho-choi;http://rllab.snu.ac.kr;https://rllab.snu.ac.kr/", "dblp": ";281/6818;198/0737;;87/4354;17/3173", "google_scholar": "https://scholar.google.co.kr/citations?user=N5uw2S0AAAAJ;lYyI3QcAAAAJ;https://scholar.google.co.kr/citations?user=Tf2kXrcAAAAJ;https://scholar.google.co.kr/citations?user=Tpy8jhgAAAAJ;;VEzNY_oAAAAJ", "orcid": ";;;;;0000-0002-9781-2018", "linkedin": "nuri-kim/;;hwiyeon-yoo-a24604159/;;;", "or_profile": "~Nuri_Kim1;~Obin_Kwon1;~Hwiyeon_Yoo1;~Yunho_Choi1;~Jeongho_Park1;~Songhwai_Oh1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2022topological,\ntitle={Topological Semantic Graph Memory for Image-Goal Navigation},\nauthor={Nuri Kim and Obin Kwon and Hwiyeon Yoo and Yunho Choi and Jeongho Park and Songhwai Oh},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=xjTUxBfIzE}\n}", "github": "https://github.com/rllab-snu/TopologicalSemanticGraphMemory", "project": "", "reviewers": "XjPf;T5E1;dNHx;apkv", "site": "https://openreview.net/forum?id=xjTUxBfIzE", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12064956701915569287&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "y1t0c5eUk2w", "title": "Fusing Priori and Posteriori Metrics for Automatic Dataset Annotation of Planar Grasping", "track": "main", "status": "Poster", "tldr": "", "abstract": "Grasp detection based on deep learning has been a research hot spot in recent years. The performance of grasping detection models relies on high-quality, large-scale grasp datasets. Taking comprehensive consideration of quality, extendability, and annotation cost, metric-based simulation methodology is the most promising way to generate grasp annotation. As experts in grasping, human intuitively tends to make grasp decision based both on priori and posteriori knowledge. Inspired by that, a combination of priori and posteriori grasp metrics is intuitively helpful to improve annotation quality. In this paper, we build a hybrid metric group involving both priori and posteriori metrics and propose a grasp evaluator to merge those metrics to approximate human grasp decision capability. Centered on the evaluator, we have constructed an automatic grasp annotation framework, through which a large-scale, high-quality, low annotation cost planar grasp dataset GMD is automatically generated.", "keywords": "Grasp Detection;Grasp Metric;Grasp Dataset;Automatic Annotation", "primary_area": "", "supplementary_material": "/attachment/412b53b91a1d8233d497424a958a26001dde6599.zip", "author": "Hao Sha;Lai Qianen;Hongxiang Yu;Rong Xiong;Yue Wang", "authorids": "~Hao_Sha3;~Lai_Qianen1;~Hongxiang_Yu1;~Rong_Xiong1;~Yue_Wang1", "gender": "M;M;M;;M", "homepage": "https://stevehao74.github.io/;;;;https://ywang-zju.github.io/", "dblp": ";;;;", "google_scholar": ";;;1hI9bqUAAAAJ;", "orcid": ";;0000-0003-1417-0476;;", "linkedin": ";http://linkedin.com/in/\u4e7e\u6069-\u8d56-413293201;;;", "or_profile": "~Hao_Sha3;~Lai_Qianen1;~Hongxiang_Yu1;~Rong_Xiong1;~Yue_Wang1", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn", "position": "MS student;MS student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsha2022fusing,\ntitle={Fusing Priori and Posteriori Metrics for Automatic Dataset Annotation of Planar Grasping},\nauthor={Hao Sha and Lai Qianen and Hongxiang Yu and Rong Xiong and Yue Wang},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=y1t0c5eUk2w}\n}", "github": "https://github.com/SteveHao74/GMD", "project": "", "reviewers": "MbMM;Vr9d;5FZ9;Bsku;Apob", "site": "https://openreview.net/forum?id=y1t0c5eUk2w", "pdf_size": 0, "rating": "6;6;6;6;10", "confidence": "", "rating_avg": 6.8, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9uKM-GTPmuoJ:scholar.google.com/&scioq=Fusing+Priori+and+Posteriori+Metrics+for+Automatic+Dataset+Annotation+of+Planar+Grasping&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "yPJ9A0GWLg0", "title": "Embodied Concept Learner: Self-supervised Learning of Concepts and Mapping through Instruction Following", "track": "main", "status": "Poster", "tldr": "We propose ECL, which can ground visual concepts, build semantic maps and plan actions to complete tasks by learning purely from human demonstrations and language instructions.", "abstract": "Humans, even at a very early age, can learn visual concepts and understand geometry and layout through active interaction with the environment, and generalize their compositions to complete tasks described by natural languages in novel scenes. To mimic such capability, we propose Embodied Concept Learner (ECL) in an interactive 3D environment. Specifically, a robot agent can ground visual concepts, build semantic maps and plan actions to complete tasks by learning purely from human demonstrations and language instructions, without access to ground-truth semantic and depth supervision from simulations. ECL consists of: (i) an instruction parser that translates the natural languages into executable programs; (ii) an embodied concept learner that grounds visual concepts based on language descriptions; (iii) a map constructor that estimates depth and constructs semantic maps by leveraging the learned concepts; and (iv) a program executor with deterministic policies to execute each program. ECL has several appealing benefits thanks to its modularized design. Firstly, it enables the robotic agent to learn semantics and depth unsupervisedly acting like babies, e.g., ground concepts through active interaction and perceive depth by disparities when moving forward. Secondly, ECL is fully transparent and step-by-step interpretable in long-term planning. Thirdly, ECL could be beneficial for the embodied instruction following (EIF), outperforming previous works on the ALFRED benchmark when the semantic label is not provided. Also, the learned concept can be reused for other downstream tasks, such as reasoning of object states.", "keywords": "Embodied AI;Embodied Concept Learner;Instruction Following", "primary_area": "", "supplementary_material": "/attachment/751b76bd34d36d70ba4da4f0f499d7a939e1865b.zip", "author": "Mingyu Ding;Yan Xu;Zhenfang Chen;David Daniel Cox;Ping Luo;Joshua B. Tenenbaum;Chuang Gan", "authorids": "~Mingyu_Ding1;~Yan_Xu8;~Zhenfang_Chen1;~David_Daniel_Cox1;~Ping_Luo2;~Joshua_B._Tenenbaum1;~Chuang_Gan1", "gender": "M;M;M;;;M;", "homepage": "https://dingmyu.github.io/;https://decayale.github.io/;https://zfchenunique.github.io;;;http://people.csail.mit.edu/ganchuang/;http://luoping.me/", "dblp": "188/5243;;207/5321;48/7659;t/JoshuaBTenenbaum;139/6993;54/4989-2.html", "google_scholar": "w4yTWwoAAAAJ;https://scholar.google.com/citations?pli=1;QSRdIzAAAAAJ;;;PTeSCbIAAAAJ;https://scholar.google.com.hk/citations?hl=en", "orcid": "0000-0001-6556-8359;0000-0002-3462-7931;;;;;0000-0002-6685-7950", "linkedin": "dingmyu/;samuel-yan-xu;\u632f\u65b9-\u9648-512011bb/;;;;", "or_profile": "~Mingyu_Ding1;~Yan_Xu8;~Zhenfang_Chen1;~David_Daniel_Cox1;~Joshua_B._Tenenbaum1;~Chuang_Gan1;~Luo_Ping2", "aff": "University of Hong Kong;The Chinese University of Hong Kong;MIT-IBM Watson AI lab;International Business Machines;Massachusetts Institute of Technology;MIT-IBM Watson AI Lab;The University of Hong Kong", "aff_domain": "hku.hk;cuhk.edu.hk;ibm.com;ibm.com;mit.edu;ibm.com;hku.hk", "position": "PhD student;PhD student;Researcher;IBM Director, MIT-IBM Watson AI Lab;Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nding2022embodied,\ntitle={Embodied Concept Learner: Self-supervised Learning of Concepts and Mapping through Instruction Following},\nauthor={Mingyu Ding and Yan Xu and Zhenfang Chen and David Daniel Cox and Ping Luo and Joshua B. Tenenbaum and Chuang Gan},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=yPJ9A0GWLg0}\n}", "github": "https://github.com/dingmyu/ECL", "project": "", "reviewers": "XMVt;dHJH;Zyvv;TjRQ", "site": "https://openreview.net/forum?id=yPJ9A0GWLg0", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10209238517261770275&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;2;2;0", "aff_unique_norm": "University of Hong Kong;Chinese University of Hong Kong;Massachusetts Institute of Technology;International Business Machines Corporation", "aff_unique_dep": ";;IBM Watson AI lab;", "aff_unique_url": "https://www.hku.hk;https://www.cuhk.edu.hk;https://www.mitibmwatsonailab.org;https://www.ibm.com", "aff_unique_abbr": "HKU;CUHK;MIT-IBM AI Lab;IBM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;1;1;1;0", "aff_country_unique": "China;United States" }, { "id": "zNB_UVj5oKQ", "title": "Meta-Learning Priors for Safe Bayesian Optimization", "track": "main", "status": "Oral", "tldr": "Choosing informative but safe priors for safe Bayesian optimization", "abstract": "In robotics, optimizing controller parameters under safety constraints is an important challenge. Safe Bayesian optimization (BO) quantifies uncertainty in the objective and constraints to safely guide exploration in such settings. Hand-designing a suitable probabilistic model can be challenging however. In the presence of unknown safety constraints, it is crucial to choose reliable model hyper-parameters to avoid safety violations. Here, we propose a data-driven approach to this problem by em meta-learning priors for safe BO from offline data.\nWe build on a meta-learning algorithm, F-PACOH, capable of providing reliable uncertainty quantification in settings of data scarcity. As core contribution, we develop a novel framework for choosing safety-compliant priors in a data-riven manner via empirical uncertainty metrics and a frontier search algorithm. On benchmark functions and a high-precision motion system, we demonstrate that our meta-learnt priors accelerate convergence of safe BO approaches while maintaining safety.\n", "keywords": "Safety;Meta-Learning;Bayesian Optimization;Gaussian Processes", "primary_area": "", "supplementary_material": "/attachment/5618207a60d014af75982a035a1d916069e080e4.zip", "author": "Jonas Rothfuss;Christopher Koenig;Alisa Rupenyan;Andreas Krause", "authorids": "~Jonas_Rothfuss1;koenig@inspire.ethz.ch;~Alisa_Rupenyan2;~Andreas_Krause1", "gender": "M;;F;M", "homepage": "https://las.inf.ethz.ch/people/jonas-rothfuss;;https://alisa-rupenyan.github.io;https://las.inf.ethz.ch/krausea", "dblp": "213/7319.html;;247/9348;87/1831-1.html", "google_scholar": "EfLpX8QAAAAJ;;8jUSNmsAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";;0000-0002-2170-8564;0000-0001-7260-9673", "linkedin": ";;arupenyan/;krausea/", "or_profile": "~Jonas_Rothfuss1;koenig@inspire.ethz.ch;~Alisa_Rupenyan2;~Andreas_Krause1", "aff": "Swiss Federal Institute of Technology;;Swiss Federal Institute of Technology;ETH Zurich", "aff_domain": "ethz.ch;;ethz.ch;ethz.ch", "position": "PhD student;;Senior scientist;Full Professor", "bibtex": "@inproceedings{\nrothfuss2022metalearning,\ntitle={Meta-Learning Priors for Safe Bayesian Optimization},\nauthor={Jonas Rothfuss and Christopher Koenig and Alisa Rupenyan and Andreas Krause},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=zNB_UVj5oKQ}\n}", "github": "", "project": "", "reviewers": "wnHy;jWPh;iFMD;SuCp", "site": "https://openreview.net/forum?id=zNB_UVj5oKQ", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4508549981224049124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "z_hPo2Fu9A3", "title": "RAP: Risk-Aware Prediction for Robust Planning", "track": "main", "status": "Oral", "tldr": "a framework to perform risk-aware prediction, which facilitates robust planning", "abstract": "Robust planning in interactive scenarios requires predicting the uncertain future to make risk-aware decisions. Unfortunately, due to long-tail safety-critical events, the risk is often under-estimated by finite-sampling approximations of probabilistic motion forecasts. This can lead to overconfident and unsafe robot behavior, even with robust planners. Instead of assuming full prediction coverage that robust planners require, we propose to make prediction itself risk-aware. We introduce a new prediction objective to learn a risk-biased distribution over trajectories, so that risk evaluation simplifies to an expected cost estimation under this biased distribution. This reduces sample complexity of the risk estimation during online planning, which is needed for safe real-time performance. Evaluation results in a didactic simulation environment and on a real-world dataset demonstrate the effectiveness of our approach. The code and a demo are available.", "keywords": "Risk Measures;Forecasting;Safety;Human-Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/642cfd8557c51dc4e5c0676c4f86fcb200240305.zip", "author": "Haruki Nishimura;Jean Mercat;Blake Wulfe;Rowan Thomas McAllister;Adrien Gaidon", "authorids": "~Haruki_Nishimura2;~Jean_Mercat1;~Blake_Wulfe1;~Rowan_Thomas_McAllister1;~Adrien_Gaidon1", "gender": ";M;;M;", "homepage": ";http://jean-mercat.netlify.app;;https://rowanmcallister.github.io/;https://adriengaidon.com/", "dblp": ";248/2886;;123/6416;06/7548.html", "google_scholar": ";https://scholar.google.com/citations?hl=fr;;https://scholar.google.co.uk/citations?user=6uIhh6MAAAAJ;https://scholar.google.fr/citations?user=2StUgf4AAAAJ", "orcid": ";0000-0002-4012-9082;;0000-0002-9519-2345;", "linkedin": ";;;rowantmcallister;adrien-gaidon-63ab2358/", "or_profile": "~Haruki_Nishimura2;~Jean_Mercat1;~Blake_Wulfe1;~Rowan_Thomas_McAllister1;~Adrien_Gaidon1", "aff": ";Toyota Research Institute;;Toyota Research Institute;Toyota Research Institute (TRI)", "aff_domain": ";tri.global;;tri.global;tri.global", "position": ";Researcher;;Machine Learning Scientist;Head of ML", "bibtex": "@inproceedings{\nnishimura2022rap,\ntitle={{RAP}: Risk-Aware Prediction for Robust Planning},\nauthor={Haruki Nishimura and Jean Mercat and Blake Wulfe and Rowan Thomas McAllister and Adrien Gaidon},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=z_hPo2Fu9A3}\n}", "github": "https://github.com/TRI-ML/RAP", "project": "", "reviewers": "zkWD;vmFo;QxWd;UeEy", "site": "https://openreview.net/forum?id=z_hPo2Fu9A3", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2504513808794684290&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Toyota Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.tri.global", "aff_unique_abbr": "TRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "zldI4UpuG7v", "title": "Deep Whole-Body Control: Learning a Unified Policy for Manipulation and Locomotion", "track": "main", "status": "Oral", "tldr": "Learning a unified policy for whole-body control of both the arm and legs of a custom-built low-cost quadruped mobile manipulator", "abstract": "An attached arm can significantly increase the applicability of legged robots to several mobile manipulation tasks that are not possible for the wheeled or tracked counterparts. The standard modular control pipeline for such legged manipulators is to decouple the controller into that of manipulation and locomotion. However, this is ineffective. It requires immense engineering to support coordination between the arm and legs, and error can propagate across modules causing non-smooth unnatural motions. It is also biological implausible given evidence for strong motor synergies across limbs. In this work, we propose to learn a unified policy for whole-body control of a legged manipulator using reinforcement learning. We propose Regularized Online Adaptation to bridge the Sim2Real gap for high-DoF control, and Advantage Mixing exploiting the causal dependency in the action space to overcome local minima during training the whole-body system. We also present a simple design for a low-cost legged manipulator, and find that our unified policy can demonstrate dynamic and agile behaviors across several task setups. Videos are at https://maniploco.github.io", "keywords": "Mobile Manipulation;Whole-Body Control;Legged Locomotion", "primary_area": "", "supplementary_material": "/attachment/00ca8ef0d6b236b61142b10e26434692fd29801a.zip", "author": "Zipeng Fu;Xuxin Cheng;Deepak Pathak", "authorids": "~Zipeng_Fu1;~Xuxin_Cheng2;~Deepak_Pathak1", "gender": "M;M;M", "homepage": "https://zipengfu.github.io;https://chengxuxin.github.io;https://www.cs.cmu.edu/~dpathak/", "dblp": "245/1504;;155/9860", "google_scholar": "wMcPTbEAAAAJ;Z8vhOxYAAAAJ;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ", "orcid": ";;", "linkedin": "zipengfu;;pathak22/", "or_profile": "~Zipeng_Fu1;~Xuxin_Cheng2;~Deepak_Pathak1", "aff": "Stanford University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "stanford.edu;cs.cmu.edu;cmu.edu", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nfu2022deep,\ntitle={Deep Whole-Body Control: Learning a Unified Policy for Manipulation and Locomotion},\nauthor={Zipeng Fu and Xuxin Cheng and Deepak Pathak},\nbooktitle={6th Annual Conference on Robot Learning},\nyear={2022},\nurl={https://openreview.net/forum?id=zldI4UpuG7v}\n}", "github": "", "project": "", "reviewers": "KgX5;bkQw;Hcpy;3ot8", "site": "https://openreview.net/forum?id=zldI4UpuG7v", "pdf_size": 0, "rating": "10;10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 165, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8008467424756829296&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.cmu.edu", "aff_unique_abbr": "Stanford;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" } ]