[ { "id": "-JwmfQC6IRt", "title": "Guided Imitation of Task and Motion Planning", "track": "main", "status": "Oral", "tldr": "", "abstract": "While modern policy optimization methods can do complex manipulation from sensory data, they struggle on problems with extended time horizons and multiple sub-goals. On the other hand, task and motion planning (TAMP) methods scale to long horizons but they are computationally expensive and need to precisely track world state. We propose a method that draws on the strength of both methods: we train a policy to imitate a TAMP solver's output. This produces a feed-forward policy that can accomplish multi-step tasks from sensory data. First, we build an asynchronous distributed TAMP solver that can produce supervision data fast enough for imitation learning. Then, we propose a hierarchical policy architecture that lets us use partially trained control policies to speed up the TAMP solver. In robotic manipulation tasks with 7-DoF joint control, the partially trained policies reduce the time needed for planning by a factor of up to 2.6. Among these tasks, we can learn a policy that solves the RoboSuite 4-object pick-place task 88% of the time from object pose observations and a policy that solves the RoboDesk 9-goal benchmark 79% of the time from RGB images (averaged across the 9 disparate tasks).", "keywords": "task and motion planning;mobile manipulation;imitation learning", "primary_area": "", "supplementary_material": "/attachment/95e191b992887b9f7695dc4203dd0393b87fb03f.zip", "author": "Michael James McDonald;Dylan Hadfield-Menell", "authorids": "~Michael_James_McDonald1;~Dylan_Hadfield-Menell2", "gender": "M;M", "homepage": ";http://people.csail.mit.edu/dhm/", "dblp": ";135/8332", "google_scholar": ";4mVPFQ8AAAAJ", "orcid": ";0000-0002-6168-4763", "linkedin": "http://www.linkedin.com/in/michael-mcdonald-604998214;", "or_profile": "~Michael_James_McDonald1;~Dylan_Hadfield-Menell2", "aff": "University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu", "position": "MS student;PhD student", "bibtex": "@inproceedings{\nmcdonald2021guided,\ntitle={Guided Imitation of Task and Motion Planning},\nauthor={Michael James McDonald and Dylan Hadfield-Menell},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=-JwmfQC6IRt}\n}", "github": "", "project": "", "reviewers": "KfVB;1SbR;i7Gj", "site": "https://openreview.net/forum?id=-JwmfQC6IRt", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4944001434705318670&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "-QJ__aPUTN2", "title": "Guiding Multi-Step Rearrangement Tasks with Natural Language Instructions", "track": "main", "status": "Poster", "tldr": "", "abstract": " Enabling human operators to interact with robotic agents using natural language would allow non-experts to intuitively instruct these agents. Towards this goal, we propose a novel Transformer-based model which enables a user to guide a robot arm through a 3D multi-step manipulation task with natural language commands. Our system maps images and commands to masks over grasp or place locations, grounding the language directly in perceptual space. In a suite of block rearrangement tasks, we show that these masks can be combined with an existing manipulation framework without re-training, greatly improving learning efficiency. Our masking model is several orders of magnitude more sample efficient than typical Transformer models, operating with hundreds, not millions, of examples. Our modular design allows us to leverage supervised and reinforcement learning, providing an easy interface for experimentation with different architectures. Our model completes block manipulation tasks with synthetic commands $530\\%$ more often than a UNet-based baseline, and learns to localize actions correctly while creating a mapping of symbols to perceptual input that supports compositional reasoning. We provide a valuable resource for 3D manipulation instruction following research by porting an existing 3D block dataset with crowdsourced language to a simulated environment. Our method's $25.3\\%$ absolute improvement in identifying the correct block on the ported dataset demonstrates its ability to handle syntactic and lexical variation. ", "keywords": "Instruction following;object grasping and manipulation;multimodal fusion;computer vision for robotic applications", "primary_area": "", "supplementary_material": "/attachment/3095e3dc8f248df80d690889f2a6414cdd0a6ec1.zip", "author": "Elias Stengel-Eskin;Andrew Hundt;Zhuohong He;Aditya Murali;Nakul Gopalan;Matthew Gombolay;Gregory D. Hager", "authorids": "~Elias_Stengel-Eskin1;~Andrew_Hundt1;~Zhuohong_He1;~Aditya_Murali1;~Nakul_Gopalan1;~Matthew_Gombolay1;~Gregory_D._Hager1", "gender": "M;M;M;M;;M;M", "homepage": "https://esteng.github.io;https://ahundt.github.io/;https://zooeyhe.com/;http://www.github.com/adit98;http://nakulgopalan.github.io/;https://core-robotics.gatech.edu/;http://www.cs.jhu.edu/~hager/", "dblp": "212/6138;190/7614;;;135/8173;144/1022;12/5814", "google_scholar": "gr_ZVSQAAAAJ;N0JJHwkAAAAJ;;yN5fTGEAAAAJ;dPsQR14AAAAJ;Ihyz20wAAAAJ;https://scholar.google.com.tw/citations?user=ivApfKcAAAAJ", "orcid": "0000-0002-6689-505X;0000-0003-2023-1810;;;;;", "linkedin": ";;zhuohonghe/;;;;gregory-hager-11a1056/", "or_profile": "~Elias_Stengel-Eskin1;~Andrew_Hundt1;~Zhuohong_He1;~Aditya_Murali1;~Nakul_Gopalan1;~Matthew_Gombolay1;~Gregory_D._Hager1", "aff": "Microsoft Research;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Georgia Institute of Technology;Georgia Institute of Technology;Johns Hopkins University", "aff_domain": "research.microsoft.com;jhu.edu;jhu.edu;jhu.edu;gatech.edu;cc.gatech.edu;jhu.edu", "position": "Intern;PhD student;MS student;MS student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nstengel-eskin2021guiding,\ntitle={Guiding Multi-Step Rearrangement Tasks with Natural Language Instructions},\nauthor={Elias Stengel-Eskin and Andrew Hundt and Zhuohong He and Aditya Murali and Nakul Gopalan and Matthew Gombolay and Gregory D. Hager},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=-QJ__aPUTN2}\n}", "github": "", "project": "", "reviewers": "kWqo;gQ8o;h8PP;1smN", "site": "https://openreview.net/forum?id=-QJ__aPUTN2", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 24, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9668876258610123899&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;1;1;2;2;1", "aff_unique_norm": "Microsoft;Johns Hopkins University;Georgia Institute of Technology", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.jhu.edu;https://www.gatech.edu", "aff_unique_abbr": "MSR;JHU;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "0CE82_hBPzA", "title": "Learning Eye-in-Hand Camera Calibration from a Single Image", "track": "main", "status": "Poster", "tldr": "", "abstract": "Eye-in-hand camera calibration is a fundamental and long-studied problem in robotics. We present a study on using learning-based methods for solving this problem online from a single RGB image, whilst training our models with entirely synthetic data. We study three main approaches: one direct regression model that directly predicts the extrinsic matrix from an image, one sparse correspondence model that regresses 2D keypoints and then uses PnP, and one dense correspondence model that uses regressed depth and segmentation maps to enable ICP pose estimation. In our experiments, we benchmark these methods against each other and against well-established classical methods, to find the surprising result that direct regression outperforms other approaches, and we perform noise-sensitivity analysis to gain further insights into these results.", "keywords": "Camera Calibration;Robot Manipulation;Sim-to-Real", "primary_area": "", "supplementary_material": "/attachment/29d23b2333d10af2171888281a42231b49c6bae5.zip", "author": "Eugene Valassakis;Kamil Dreczkowski;Edward Johns", "authorids": "~Eugene_Valassakis1;kamil.dreczkowski15@imperial.ac.uk;~Edward_Johns1", "gender": ";;M", "homepage": "https://eugval.com/;;https://www.robot-learning.uk", "dblp": "272/8780;;68/9968", "google_scholar": "aqrNn7wAAAAJ;;https://scholar.google.co.uk/citations?user=sMIUkiQAAAAJ", "orcid": ";;0000-0002-8914-8786", "linkedin": ";;https://uk.linkedin.com/in/edward-johns-1b24845a", "or_profile": "~Eugene_Valassakis1;kamil.dreczkowski15@imperial.ac.uk;~Edward_Johns1", "aff": "Imperial College London;;Imperial College London", "aff_domain": "imperial.ac.uk;;imperial.ac.uk", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nvalassakis2021learning,\ntitle={Learning Eye-in-Hand Camera Calibration from a Single Image},\nauthor={Eugene Valassakis and Kamil Dreczkowski and Edward Johns},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=0CE82_hBPzA}\n}", "github": "", "project": "", "reviewers": "NfZb;SvQJ;VdxP;mbiM", "site": "https://openreview.net/forum?id=0CE82_hBPzA", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3258191454621873071&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "0QJeE5hkyFZ", "title": "FlingBot: The Unreasonable Effectiveness of Dynamic Manipulation for Cloth Unfolding", "track": "main", "status": "Oral", "tldr": "", "abstract": "High-velocity dynamic actions (e.g., fling or throw) play a crucial role in our everyday interaction with deformable objects by improving our efficiency and effectively expanding our physical reach range. Yet, most prior works have tackled cloth manipulation using exclusively single-arm quasi-static actions, which requires a large number of interactions for challenging initial cloth configurations and strictly limits the maximum cloth size by the robot's reach range. In this work, we demonstrate the effectiveness of dynamic flinging actions for cloth unfolding with our proposed self-supervised learning framework, FlingBot.\nOur approach learns how to unfold a piece of fabric from arbitrary initial configurations using a pick, stretch, and fling primitive for a dual-arm setup from visual observations. The final system achieves over 80% coverage within 3 actions on novel cloths, can unfold cloths larger than the system's reach range, and generalizes to T-shirts despite being trained on only rectangular cloths.\nWe also finetuned FlingBot on a real-world dual-arm robot platform, where it increased the cloth coverage over 4 times more than the quasi-static baseline did. The simplicity of FlingBot combined with its superior performance over quasi-static baselines demonstrates the effectiveness of dynamic actions for deformable object manipulation.", "keywords": "Dynamic manipulation;Cloth manipulation;Self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/9a0eb38aa299b22b1e7a449558960b045df5fdfc.zip", "author": "Huy Ha;Shuran Song", "authorids": "~Huy_Ha1;~Shuran_Song3", "gender": "M;F", "homepage": "https://www.cs.columbia.edu/~huy/;https://shurans.github.io/", "dblp": "277/9554;", "google_scholar": "-3-f_8YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Huy_Ha1;~Shuran_Song3", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;cs.columbia.edu", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nha2021flingbot,\ntitle={FlingBot: The Unreasonable Effectiveness of Dynamic Manipulation for Cloth Unfolding},\nauthor={Huy Ha and Shuran Song},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=0QJeE5hkyFZ}\n}", "github": "", "project": "", "reviewers": "X3S4;BAzn;LQc9;fiDj", "site": "https://openreview.net/forum?id=0QJeE5hkyFZ", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 181, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6764857693775066807&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0WDtVJVwBcf", "title": "Learning Backchanneling Behaviors for a Social Robot via Data Augmentation from Human-Human Conversations", "track": "main", "status": "Poster", "tldr": "", "abstract": "Backchanneling behaviors on a robot, such as nodding, can make talking to a robot feel more natural and engaging by giving a sense that the robot is actively listening. For backchanneling to be effective, it is important that the timing of such cues is appropriate given the humans' conversational behaviors. Recent progress has shown that these behaviors can be learned from datasets of human-human conversations. However, recent data-driven methods tend to overfit to the human speakers that are seen in training data and fail to generalize well to previously unseen speakers. In this paper, we explore the use of data augmentation for effective nodding behavior in a robot. We show that, by augmenting the input speech and visual features, we can produce data-driven models that are more robust to unseen features without collecting additional data. We analyze the efficacy of data-driven backchanneling in a realistic human-robot conversational setting with a user study, showing that users perceived the data-driven model to be better at listening as compared to rule-based and random baselines.", "keywords": "social robots;backchanneling;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Michael Murray;Nick Walker;Amal Nanavati;Patricia Alves-Oliveira;Nikita Filippov;Allison Sauppe;Bilge Mutlu;Maya Cakmak", "authorids": "~Michael_Murray2;~Nick_Walker1;amaln@cs.washington.edu;patri@cs.washington.edu;nikitaf@cs.washington.edu;~Allison_Sauppe1;~Bilge_Mutlu2;~Maya_Cakmak1", "gender": ";M;;;;;;", "homepage": "https://mmurray.com;https://nickwalker.us;;;;;http://bmutlu.github.io;", "dblp": ";14/1613-1;;;;;;", "google_scholar": ";JYaJjE8AAAAJ;;;;;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0001-7711-0003;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Michael_Murray2;~Nick_Walker1;amaln@cs.washington.edu;patri@cs.washington.edu;nikitaf@cs.washington.edu;~Allison_Sauppe1;~Bilge_Mutlu2;~Maya_Cakmak1", "aff": "University of Washington;University of Washington;;;;;University of Wisconsin-Madison;", "aff_domain": "washington.edu;washington.edu;;;;;wisc.edu;", "position": "PhD student;PhD student;;;;;Full Professor;", "bibtex": "@inproceedings{\nmurray2021learning,\ntitle={Learning Backchanneling Behaviors for a Social Robot via Data Augmentation from Human-Human Conversations},\nauthor={Michael Murray and Nick Walker and Amal Nanavati and Patricia Alves-Oliveira and Nikita Filippov and Allison Sauppe and Bilge Mutlu and Maya Cakmak},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=0WDtVJVwBcf}\n}", "github": "", "project": "", "reviewers": "uPNX;kruZ;Uy2q", "site": "https://openreview.net/forum?id=0WDtVJVwBcf", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3499635710547570009&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Washington;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0f7gUXVAcE9", "title": "RICE: Refining Instance Masks in Cluttered Environments with Graph Neural Networks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Segmenting unseen object instances in cluttered environments is an important capability that robots need when functioning in unstructured environments. While previous methods have exhibited promising results, they still tend to provide incorrect results in highly cluttered scenes. We postulate that a network architecture that encodes relations between objects at a high-level can be beneficial. Thus, in this work, we propose a novel framework that refines the output of such methods by utilizing a graph-based representation of instance masks. We train deep networks capable of sampling smart perturbations to the segmentations, and a graph neural network, which can encode relations between objects, to evaluate the perturbed segmentations. Our proposed method is orthogonal to previous works and achieves state-of-the-art performance when combined with them. We demonstrate an application that uses uncertainty estimates generated by our method to guide a manipulator, leading to efficient understanding of cluttered scenes. Code, models, and video can be found at https://github.com/chrisdxie/rice.", "keywords": "Unseen Object Instance Segmentation;Graph Neural Network", "primary_area": "", "supplementary_material": "/attachment/692e1b3e48d97535fb9bd3c2a17ff0b9ece1dd5e.zip", "author": "Chris Xie;Arsalan Mousavian;Yu Xiang;Dieter Fox", "authorids": "~Chris_Xie1;~Arsalan_Mousavian1;~Yu_Xiang3;~Dieter_Fox1", "gender": "M;M;M;M", "homepage": "https://cs.gmu.edu/~amousavi/;https://homes.cs.washington.edu/~fox/;https://chrisdxie.github.io/;https://yuxng.github.io/", "dblp": "164/8572;f/DieterFox;164/8466;00/6716-1", "google_scholar": "fcA9m88AAAAJ;DqXsbPAAAAAJ;iJuDBhEAAAAJ;", "orcid": ";;;0000-0001-9431-5131", "linkedin": ";;;", "or_profile": "~Arsalan_Mousavian1;~Dieter_Fox1;~Christopher_Xie1;~Yu_Xiang1", "aff": "NVIDIA;Department of Computer Science;Department of Computer Science, University of Washington;NVIDIA", "aff_domain": "nvidia.com;cs.washington.edu;cs.washington.edu;nvidia.com", "position": "Research Scientist;Full Professor;PhD student;Research Scientist", "bibtex": "@inproceedings{\nxie2021rice,\ntitle={{RICE}: Refining Instance Masks in Cluttered Environments with Graph Neural Networks},\nauthor={Chris Xie and Arsalan Mousavian and Yu Xiang and Dieter Fox},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=0f7gUXVAcE9}\n}", "github": "", "project": "", "reviewers": "QM46;cMBw;5yWX;1fhJ", "site": "https://openreview.net/forum?id=0f7gUXVAcE9", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12770724340669694172&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "NVIDIA;Unknown Institution;University of Washington", "aff_unique_dep": "NVIDIA Corporation;Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;;https://www.washington.edu", "aff_unique_abbr": "NVIDIA;;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "1GNV9SW95eJ", "title": "Learning Off-Policy with Online Planning", "track": "main", "status": "Oral", "tldr": "", "abstract": "Reinforcement learning (RL) in low-data and risk-sensitive domains requires performant and flexible deployment policies that can readily incorporate constraints during deployment. One such class of policies are the semi-parametric H-step lookahead policies, which select actions using trajectory optimization over a dynamics model for a fixed horizon with a terminal value function. In this work, we investigate a novel instantiation of H-step lookahead with a learned model and a terminal value function learned by a model-free off-policy algorithm, named Learning Off-Policy with Online Planning (LOOP). We provide a theoretical analysis of this method, suggesting a tradeoff between model errors and value function errors, and empirically demonstrate this tradeoff to be beneficial in deep reinforcement learning. Furthermore, we identify the \"Actor Divergence\" issue in this framework and propose Actor Regularized Control (ARC), a modified trajectory optimization procedure. We evaluate our method on a set of robotic tasks for Offline and Online RL and demonstrate improved performance. We also show the flexibility of LOOP to incorporate safety constraints during deployment with a set of navigation environments. We demonstrate that LOOP is a desirable framework for robotics applications based on its strong performance in various important RL settings.\n", "keywords": "Reinforcement Learning;Trajectory Optimization;Safety", "primary_area": "", "supplementary_material": "/attachment/909098bb223c7969b003e8fc58199d4877b85cf2.zip", "author": "Harshit Sikchi;Wenxuan Zhou;David Held", "authorids": "~Harshit_Sikchi1;~Wenxuan_Zhou1;~David_Held1", "gender": "M;F;M", "homepage": "https://hari-sikchi.github.io/;https://wenxuan-zhou.github.io/;http://davheld.github.io/", "dblp": "271/4663;;22/11147", "google_scholar": "jFOPZE0AAAAJ;picvdvEAAAAJ;0QtU-NsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Harshit_Sikchi1;~Wenxuan_Zhou1;~David_Held1", "aff": "University of Texas, Austin;Google DeepMind;Carnegie Mellon University", "aff_domain": "utexas.edu;deepmind.com;cmu.edu", "position": "PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nsikchi2021learning,\ntitle={Learning Off-Policy with Online Planning},\nauthor={Harshit Sikchi and Wenxuan Zhou and David Held},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=1GNV9SW95eJ}\n}", "github": "", "project": "", "reviewers": "ztVy;Sv1h;PXsM", "site": "https://openreview.net/forum?id=1GNV9SW95eJ", "pdf_size": 0, "rating": "10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9239100575492782026&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;Google;Carnegie Mellon University", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.utexas.edu;https://deepmind.com;https://www.cmu.edu", "aff_unique_abbr": "UT Austin;DeepMind;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "1mDC24WX8Yh", "title": "Tactile Image-to-Image Disentanglement of Contact Geometry from Motion-Induced Shear", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robotic touch, particularly when using soft optical tactile sensors, suffers from distortion caused by motion-dependent shear. The manner in which the sensor contacts a stimulus is entangled with the tactile information about the stimulus geometry. In this work, we propose a supervised convolutional deep neural network model that learns to disentangle, in the latent space, the components of sensor deformations caused by contact geometry from those due to sliding-induced shear. The approach is validated by showing a close match between the unsheared images reconstructed from sheared images and their vertical tap (non-sheared) counterparts. In addition, the unsheared tactile images faithfully reconstruct the contact geometry masked in sheared data, and allow robust estimation of the contact pose of use for sliding exploration of various planar shapes. Overall, the contact geometry reconstruction in conjunction with sliding exploration were used for faithful full object reconstruction of various planar shapes. The methods have broad applicability to deep learning models for robots with a shear-sensitive sense of touch.", "keywords": "Robotic Touch;Disentanglement;Shear;Object Reconstruction", "primary_area": "", "supplementary_material": "/attachment/32648752d5099d7207d055d5629b1e9d92795b23.zip", "author": "Anupam K. Gupta;Laurence Aitchison;Nathan F. Lepora", "authorids": "~Anupam_K._Gupta1;~Laurence_Aitchison1;~Nathan_F._Lepora1", "gender": ";;", "homepage": ";http://www.gatsby.ucl.ac.uk/~laurence/;https://www.lepora.com", "dblp": ";155/1918.html;76/10010", "google_scholar": "https://scholar.google.com/citations?hl=en;;fb2WiJgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Anupam_K._Gupta1;~Laurence_Aitchison1;~Nathan_F._Lepora1", "aff": "University of Bristol;University of Bristol;University of Bristol", "aff_domain": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "position": "Postdoctoral Research Associate;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngupta2021tactile,\ntitle={Tactile Image-to-Image Disentanglement of Contact Geometry from Motion-Induced Shear},\nauthor={Anupam K. Gupta and Laurence Aitchison and Nathan F. Lepora},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=1mDC24WX8Yh}\n}", "github": "", "project": "", "reviewers": "JxBB;xr1d;oHk8;gcAq", "site": "https://openreview.net/forum?id=1mDC24WX8Yh", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2090397975461335258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "1yj7yMTtna", "title": "Towards Real Robot Learning in the Wild: A Case Study in Bipedal Locomotion", "track": "main", "status": "Poster", "tldr": "", "abstract": "Algorithms for self-learning systems have made considerable progress in recent years, yet safety concerns and the need for additional instrumentation have so far largely limited learning experiments with real robots to well controlled lab settings. In this paper, we demonstrate how a small bipedal robot can autonomously learn to walk with minimal human intervention and with minimal instrumentation of the environment. We employ data-efficient off-policy deep reinforcement learning to learn to walk end-to-end, directly on hardware, using rewards that are computed exclusively from proprioceptive sensing. To allow the robot to autonomously adapt its behaviour to its environment, we additionally provide the agent with raw RGB camera images as input. By deploying two robots in different geographic locations while sharing data in a distributed learning setup, we achieve higher throughput and greater diversity of the training data.\nOur learning experiments constitute a step towards the long-term vision of learning \"in the wild\" for legged robots, and, to our knowledge, represent the first demonstration of learning a deep neural network controller for bipedal locomotion directly on hardware.", "keywords": "Legged Locomotion;Reinforcement Learning;Vision", "primary_area": "", "supplementary_material": "/attachment/7e87645eb15a2a81cf249544f35e990ad75f9762.zip", "author": "Michael Bloesch;Jan Humplik;Viorica Patraucean;Roland Hafner;Tuomas Haarnoja;Arunkumar Byravan;Noah Yamamoto Siegel;Saran Tunyasuvunakool;Federico Casarini;Nathan Batchelor;Francesco Romano;Stefano Saliceti;Martin Riedmiller;S. M. Ali Eslami;Nicolas Heess", "authorids": "~Michael_Bloesch1;~Jan_Humplik1;~Viorica_Patraucean1;~Roland_Hafner1;~Tuomas_Haarnoja1;~Arunkumar_Byravan1;~Noah_Yamamoto_Siegel1;~Saran_Tunyasuvunakool1;fcasarini@google.com;batchelor@google.com;fraromano@google.com;~Stefano_Saliceti1;~Martin_Riedmiller1;~S._M._Ali_Eslami1;~Nicolas_Heess1", "gender": ";M;F;Not Specified;M;M;;;;;;M;M;M;", "homepage": ";;;;;https://homes.cs.washington.edu/~barun/;;;;;;;https://www.riedmiller.me/;http://arkitus.com/research;", "dblp": "40/8368;215/9213;21/8618;19/765;80/9963;151/9400;259/1484;;;;;;;117/4847;76/9181", "google_scholar": "fn6GhgoAAAAJ;YE9w2BsAAAAJ;https://scholar.google.fr/citations?user=hWzXZUMAAAAJ;;VT7peyEAAAAJ;obYwWiMAAAAJ;l2E0LR4AAAAJ;;;;;UKrS1_IAAAAJ;1gVfqpcAAAAJ;skyUvycAAAAJ;79k7bGEAAAAJ", "orcid": ";;;;;;0000-0002-5746-117X;;;;;;;;", "linkedin": ";;;;tuomas-haarnoja;;noah-y-siegel-8751925b;;;;;stefanosaliceti/;;;", "or_profile": "~Michael_Bloesch1;~Jan_Humplik1;~Viorica_Patraucean1;~Roland_Hafner1;~Tuomas_Haarnoja1;~Arunkumar_Byravan1;~Noah_Yamamoto_Siegel1;~Saran_Tunyasuvunakool1;fcasarini@google.com;batchelor@google.com;fraromano@google.com;~Stefano_Saliceti1;~Martin_Riedmiller1;~S._M._Ali_Eslami1;~Nicolas_Heess1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google;Google DeepMind;;;;;Google DeepMind;;Google;Google DeepMind", "aff_domain": "google.com;google.com;google.com;deepmind.com;deepmind.com;google.com;deepmind.com;;;;;deepmind.com;;google.com;google.com", "position": "Research Scientist;Research scientist;Research scientist;Researcher;Research Scientist;Research Scientist;Researcher;;;;;Mechanical Research Engineer;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nbloesch2021towards,\ntitle={Towards Real Robot Learning in the Wild: A Case Study in Bipedal Locomotion},\nauthor={Michael Bloesch and Jan Humplik and Viorica Patraucean and Roland Hafner and Tuomas Haarnoja and Arunkumar Byravan and Noah Yamamoto Siegel and Saran Tunyasuvunakool and Federico Casarini and Nathan Batchelor and Francesco Romano and Stefano Saliceti and Martin Riedmiller and S. M. Ali Eslami and Nicolas Heess},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=1yj7yMTtna}\n}", "github": "", "project": "", "reviewers": "Mhfu;tDwM;n7gp", "site": "https://openreview.net/forum?id=1yj7yMTtna", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 15, "corr_rating_confidence": 0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1957584146345661033&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "2NcPgLa7yqD", "title": "Tactile Sim-to-Real Policy Transfer via Real-to-Sim Image Translation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Simulation has recently become key for deep reinforcement learning to safely and efficiently acquire general and complex control policies from visual and proprioceptive inputs. Tactile information is not usually considered despite its direct relation to environment interaction. In this work, we present a suite of simulated environments tailored towards tactile robotics and reinforcement learning. A simple and fast method of simulating optical tactile sensors is provided, where high-resolution contact geometry is represented as depth images. Proximal Policy Optimisation (PPO) is used to learn successful policies across all considered tasks. A data-driven approach enables translation of the current state of a real tactile sensor to corresponding simulated depth images. This policy is implemented within a real-time control loop on a physical robot to demonstrate zero-shot sim-to-real policy transfer on several physically-interactive tasks requiring a sense of touch. ", "keywords": "Tactile Robotics;Sim2Real;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/60c85df9d10a1ca67407d85ff02ef61c1cbd6b0e.zip", "author": "Alex Church;John Lloyd;raia hadsell;Nathan F. Lepora", "authorids": "~Alex_Church1;~John_Lloyd1;~raia_hadsell1;~Nathan_F._Lepora1", "gender": ";M;F;", "homepage": "https://github.com/ac-93/;https://www.researchgate.net/profile/John_Lloyd30;http://www.raiahadsell.com;https://www.lepora.com", "dblp": ";;http://dblp.uni-trier.de/pers/hd/h/Hadsell:Raia;76/10010", "google_scholar": ";;EWQnacoAAAAJ;fb2WiJgAAAAJ", "orcid": ";0000-0002-5630-683X;;", "linkedin": "ac93/;john-l-840b16a6/;;", "or_profile": "~Alex_Church1;~John_Lloyd1;~raia_hadsell1;~Nathan_F._Lepora1", "aff": "University of Bristol;;Google DeepMind;University of Bristol", "aff_domain": "bristol.ac.uk;;deepmind.com;bristol.ac.uk", "position": "PhD student;;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nchurch2021tactile,\ntitle={Tactile Sim-to-Real Policy Transfer via Real-to-Sim Image Translation},\nauthor={Alex Church and John Lloyd and raia hadsell and Nathan F. Lepora},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=2NcPgLa7yqD}\n}", "github": "", "project": "", "reviewers": "3knc;7RPs;Cj9a;3URa", "site": "https://openreview.net/forum?id=2NcPgLa7yqD", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5839734596965518062&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Bristol;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.bristol.ac.uk;https://deepmind.com", "aff_unique_abbr": "Bristol;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "2WivNtnaFzx", "title": "SimNet: Enabling Robust Unknown Object Manipulation from Pure Synthetic Data via Stereo", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robot manipulation of unknown objects in unstructured environments is a challenging problem due to the variety of shapes, materials, arrangements and lighting conditions. Even with large-scale real-world data collection, robust perception and manipulation of transparent and reflective objects across various lighting conditions remains challenging. To address these challenges we propose an approach to performing sim-to-real transfer of robotic perception. The underlying model, SimNet, is trained as a single multi-headed neural network using simulated stereo data as input and simulated object segmentation masks, 3D oriented bounding boxes (OBBs), object keypoints and disparity as output. A key component of SimNet is the incorporation of a learned stereo sub-network that predicts disparity. SimNet is evaluated on unknown object detection and deformable object keypoint detection and significantly outperforms a baseline that uses a structured light RGB-D sensor. By inferring grasp positions using the OBB and keypoint predictions, SimNet can be used to perform end-to-end manipulation of unknown objects across our fleet of Toyota HSR robots. In object grasping experiments, SimNet significantly outperforms the RBG-D baseline on optically challenging objects, suggesting that SimNet can enable robust manipulation of unknown objects, including transparent objects, in novel environments.", "keywords": "Sim-to-Real;Computer Vision;Manipulation", "primary_area": "", "supplementary_material": "/attachment/20b490ed5c639307c6f9f195f5707b73ce73360d.zip", "author": "Mike Laskey;Brijen Thananjeyan;Kevin Stone;Thomas Kollar;Mark Tjersland", "authorids": "~Mike_Laskey1;~Brijen_Thananjeyan1;kevin.stone@tri.global;thomas.kollar@tri.global;mark.tjersland@tri.global", "gender": ";M;;;", "homepage": ";http://bthananjeyan.github.io/;;;", "dblp": ";203/5466;;;", "google_scholar": "qwc_1bsAAAAJ;fftO_HsAAAAJ;;;", "orcid": ";;;;", "linkedin": "michael-laskey-4b087ba2/;;;;", "or_profile": "~Mike_Laskey1;~Brijen_Thananjeyan1;kevin.stone@tri.global;thomas.kollar@tri.global;mark.tjersland@tri.global", "aff": "Toyota Research Institute;University of California, Berkeley;;;", "aff_domain": "tri.global;berkeley.edu;;;", "position": "Researcher;PhD student;;;", "bibtex": "@inproceedings{\nlaskey2021simnet,\ntitle={SimNet: Enabling Robust Unknown Object Manipulation from Pure Synthetic Data via Stereo},\nauthor={Mike Laskey and Brijen Thananjeyan and Kevin Stone and Thomas Kollar and Mark Tjersland},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=2WivNtnaFzx}\n}", "github": "", "project": "", "reviewers": "z3Gb;m12X;oTrx;o8ha", "site": "https://openreview.net/forum?id=2WivNtnaFzx", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12795070191304488465&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Toyota Research Institute;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.tri.global;https://www.berkeley.edu", "aff_unique_abbr": "TRI;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "2g_c8okBd_2", "title": "Hierarchically Integrated Models: Learning to Navigate from Heterogeneous Robots", "track": "main", "status": "Poster", "tldr": "", "abstract": "Deep reinforcement learning algorithms require large and diverse datasets in order to learn successful policies for perception-based mobile navigation. However, gathering such datasets with a single robot can be prohibitively expensive. Collecting data with multiple different robotic platforms with possibly different dynamics is a more scalable approach to large-scale data collection. But how can deep reinforcement learning algorithms leverage such heterogeneous datasets? In this work, we propose a deep reinforcement learning algorithm with hierarchically integrated models (HInt). At training time, HInt learns separate perception and dynamics models, and at test time, HInt integrates the two models in a hierarchical manner and plans actions with the integrated model. This method of planning with hierarchically integrated models allows the algorithm to train on datasets gathered by a variety of different platforms, while respecting the physical capabilities of the deployment robot at test time. Our mobile navigation experiments show that HInt outperforms conventional hierarchical policies and single-source approaches.", "keywords": "deep reinforcement learning;multi-robot learning;mobile navigation", "primary_area": "", "supplementary_material": "/attachment/2e1b62b7569d24d12333a857e4912aeab71a2ed0.zip", "author": "Katie Kang;Gregory Kahn;Sergey Levine", "authorids": "~Katie_Kang1;gkahn@berkeley.edu;~Sergey_Levine1", "gender": "F;;M", "homepage": "http://katiekang.com/;;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "236/5055;;80/7594", "google_scholar": ";;8R35rCwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Katie_Kang1;gkahn@berkeley.edu;~Sergey_Levine1", "aff": ";;Google", "aff_domain": ";;google.com", "position": ";;Research Scientist", "bibtex": "@inproceedings{\nkang2021hierarchically,\ntitle={Hierarchically Integrated Models: Learning to Navigate from Heterogeneous Robots},\nauthor={Katie Kang and Gregory Kahn and Sergey Levine},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=2g_c8okBd_2}\n}", "github": "", "project": "", "reviewers": "cpmD;JJFZ;DdRP;KezA", "site": "https://openreview.net/forum?id=2g_c8okBd_2", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11833133492756614244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "2uGN5jNJROR", "title": "iGibson 2.0: Object-Centric Simulation for Robot Learning of Everyday Household Tasks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Recent research in embodied AI has been boosted by the use of simulation environments to develop and train robot learning approaches. However, the use of simulation has skewed the attention to tasks that only require what robotics simulators can simulate: motion and physical contact. We present iGibson 2.0, an open-source simulation environment that supports the simulation of a more diverse set of household tasks through three key innovations. First, iGibson 2.0 supports object states, including temperature, wetness level, cleanliness level, and toggled and sliced states, necessary to cover a wider range of tasks. Second, iGibson 2.0 implements a set of predicate logic functions that map the simulator states to logic states like Cooked or Soaked. Additionally, given a logic state, iGibson 2.0 can sample valid physical states that satisfy it. This functionality can generate potentially infinite instances of tasks with minimal effort from the users. The sampling mechanism allows our scenes to be more densely populated with small objects in semantically meaningful locations. Third, iGibson 2.0 includes a virtual reality (VR) interface to immerse humans in its scenes to collect demonstrations. As a result, we can collect demonstrations from humans on these new types of tasks, and use them for imitation learning. We evaluate the new capabilities of iGibson 2.0 to enable robot learning of novel tasks, in the hope of demonstrating the potential of this new simulator to support new research in embodied AI. iGibson 2.0 and its new dataset are publicly available at http://svl.stanford.edu/igibson/.", "keywords": "Simulation Environment;Embodied AI;Virtual Reality Interface", "primary_area": "", "supplementary_material": "/attachment/d23207ec9f5fb837593edacb235aa79aa358394e.zip", "author": "Chengshu Li;Fei Xia;Roberto Mart\u00edn-Mart\u00edn;Michael Lingelbach;Sanjana Srivastava;Bokui Shen;Kent Elliott Vainio;Cem Gokmen;Gokul Dharan;Tanish Jain;Andrey Kurenkov;Karen Liu;Hyowon Gweon;Jiajun Wu;Li Fei-Fei;Silvio Savarese", "authorids": "~Chengshu_Li1;~Fei_Xia1;~Roberto_Mart\u00edn-Mart\u00edn1;~Michael_Lingelbach1;~Sanjana_Srivastava2;~Bokui_Shen1;~Kent_Elliott_Vainio1;~Cem_Gokmen1;~Gokul_Dharan1;~Tanish_Jain1;~Andrey_Kurenkov1;~Karen_Liu1;~Hyowon_Gweon1;~Jiajun_Wu1;~Li_Fei-Fei1;~Silvio_Savarese1", "gender": "M;M;M;M;;;M;;;M;;;M;F;M;M", "homepage": "https://www.chengshuli.me/;;https://robertomartinmartin.com/;;;;https://www.cemgokmen.com;https://scholar.google.ca/citations?user=aUwQK-QAAAAJ&hl=en;;https://www.andreykurenkov.com;https://cs.stanford.edu/~karenliu;http://sll.stanford.edu;https://jiajunwu.com;https://profiles.stanford.edu/fei-fei-li;;https://cs.stanford.edu/~bshen88", "dblp": "63/6091-2;;153/7670;;;;220/3187;;299/1771;;;;117/4768;79/2528;50/3578;280/3036", "google_scholar": "yay_v9EAAAAJ;pqP5_PgAAAAJ;XOJE8OEAAAAJ;d4xUjL8AAAAJ;sqTh_dwAAAAJ;;wCiI8oUAAAAJ;https://scholar.google.ca/citations?user=aUwQK-QAAAAJ;dz0p-88AAAAJ;mmiHOS4AAAAJ;i28fU0MAAAAJ;;2efgcS0AAAAJ;rDfyQnIAAAAJ;ImpbxLsAAAAJ;mOMChFIAAAAJ", "orcid": "0000-0002-9027-8617;0000-0003-4343-1444;0000-0002-9586-2759;;;;0000-0001-9446-6052;;;;0000-0001-5926-0905;;0000-0002-4176-343X;;;0000-0002-8183-3607", "linkedin": "chengshu/;;;;sanjana-srivastava5/;kent-vainio-4749b0145/;cgokmen/;;tanish-jain/;;;;jiajunwu/;fei-fei-li-4541247/;;", "or_profile": "~Chengshu_Li1;~Fei_Xia1;~Roberto_Mart\u00edn-Mart\u00edn1;~Michael_Lingelbach1;~Sanjana_Srivastava2;~Kent_Elliott_Vainio1;~Cem_Gokmen1;~Gokul_Dharan1;~Tanish_Jain1;~Andrey_Kurenkov1;~Karen_Liu1;~Hyowon_Gweon1;~Jiajun_Wu1;~Li_Fei-Fei1;~Silvio_Savarese1;~William_B._Shen1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;;Stanford University;Stanford University;Stanford University;Stanford University;NVIDIA", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu;stanford.edu;nvidia.com", "position": "PhD student;PhD student;Postdoc;PhD student;PhD student;MS student;MS student;MS student;MS student;PhD student;;Associate Professor;Assistant Professor;Full Professor;Associate professor;Researcher", "bibtex": "@inproceedings{\nli2021igibson,\ntitle={iGibson 2.0: Object-Centric Simulation for Robot Learning of Everyday Household Tasks},\nauthor={Chengshu Li and Fei Xia and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n and Michael Lingelbach and Sanjana Srivastava and Bokui Shen and Kent Elliott Vainio and Cem Gokmen and Gokul Dharan and Tanish Jain and Andrey Kurenkov and Karen Liu and Hyowon Gweon and Jiajun Wu and Li Fei-Fei and Silvio Savarese},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=2uGN5jNJROR}\n}", "github": "", "project": "", "reviewers": "adeL;kM4j;WuTK;zX2o", "site": "https://openreview.net/forum?id=2uGN5jNJROR", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 16, "corr_rating_confidence": 0, "gs_citation": 268, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3972248145516374543&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;1", "aff_unique_norm": "Stanford University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.nvidia.com", "aff_unique_abbr": "Stanford;NVIDIA", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3HZLte8gMYS", "title": "Social Interactions as Recursive MDPs", "track": "main", "status": "Poster", "tldr": "", "abstract": "While machines and robots must interact with humans, providing them with social skills has been a largely overlooked topic. This is mostly a consequence of the fact that tasks such as navigation, command following, and even game playing are well-defined, while social reasoning still mostly remains a pre-theoretic problem. We demonstrate how social interactions can be effectively incorporated into MDPs by reasoning recursively about the goals of other agents. In essence, our method extends the reward function to include a combination of physical goals (something agents want to accomplish in the configuration space, a traditional MDP) and social goals (something agents want to accomplish relative to the goals of other agents). Our Social MDPs allow specifying reward functions in terms of the estimated reward functions of other agents, modeling interactions such as helping or hindering another agent (by maximizing or minimizing the other agent's reward) while balancing this with the actual physical goals of each agent. Our formulation allows for an arbitrary function of another agent's estimated reward structure and physical goals, enabling more complex behaviors such as politely hindering another agent or aggressively helping them. Extending Social MDPs in the same manner as I-POMDPs extension would enable interactions such as convincing another agent that something is true. To what extent the Social MDPs presented here and their potential Social POMDPs variant account for all possible social interactions is unknown, but having a precise mathematical model to guide questions about social interactions has both practical value (we demonstrate how to make zero-shot social inferences and one could imagine chatbots and robots guided by Social MDPs) and theoretical value by bringing the tools of MDP that have so successfully organized research around navigation to hopefully shed light on what social interactions really are given their extreme importance to human well-being and human civilization.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/0a407c769ee9828b760f337c8932e4d0fd9d2b5b.zip", "author": "Ravi Tejwani;Yen-Ling Kuo;Tianmin Shu;Boris Katz;Andrei Barbu", "authorids": "~Ravi_Tejwani1;~Yen-Ling_Kuo1;~Tianmin_Shu1;~Boris_Katz1;~Andrei_Barbu3", "gender": "M;F;;M;M", "homepage": ";http://yenlingkuo.com;;http://people.csail.mit.edu/boris/boris.html;https://0xab.com", "dblp": "180/9380;120/3172;163/2175.html;k/BorisKatz;58/8365", "google_scholar": ";pNkyRs4AAAAJ;YT_ffdwAAAAJ;FdNuUb8AAAAJ;t1rjgHgAAAAJ", "orcid": ";;;;", "linkedin": ";;;;andrei-barbu-1166131", "or_profile": "~Ravi_Tejwani1;~Yen-Ling_Kuo1;~Tianmin_Shu1;~Boris_Katz1;~Andrei_Barbu3", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Postdoc;Principal Research Scientist;Researcher", "bibtex": "@inproceedings{\ntejwani2021social,\ntitle={Social Interactions as Recursive {MDP}s},\nauthor={Ravi Tejwani and Yen-Ling Kuo and Tianmin Shu and Boris Katz and Andrei Barbu},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=3HZLte8gMYS}\n}", "github": "", "project": "", "reviewers": "jXz2;BjXW;N7Wd", "site": "https://openreview.net/forum?id=3HZLte8gMYS", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 28, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16348491412547191894&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4u25M570Iji", "title": "Motion Forecasting with Unlikelihood Training in Continuous Space", "track": "main", "status": "Oral", "tldr": "", "abstract": "Motion forecasting is essential for making safe and intelligent decisions in robotic applications such as autonomous driving. Existing methods often formulate it as a sequence-to-sequence prediction problem, solved in an encoder-decoder framework with a maximum likelihood estimation objective. State-of-the-art models leverage contextual information including the map and states of surrounding agents. However, we observe that they still assign a high probability to unlikely trajectories resulting in unsafe behaviors including road boundary violations. Orthogonally, we propose a new objective, unlikelihood training, which forces predicted trajectories that conflict with contextual information to be assigned a lower probability. We demonstrate that our method can improve state-of-art models' performance on challenging real-world trajectory forecasting datasets (nuScenes and Argoverse) by avoiding up to 56% context-violated prediction and improving up to 9% prediction accuracy.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/24922476b86b8b83fe3d285de823ff0eafacfb25.zip", "author": "Deyao Zhu;Mohamed Zahran;Li Erran Li;Mohamed Elhoseiny", "authorids": "~Deyao_Zhu1;~Mohamed_Zahran1;~Li_Erran_Li1;~Mohamed_Elhoseiny1", "gender": "M;M;;M", "homepage": "https://tsutikgiau.github.io/;;http://www.cs.columbia.edu/~lierranli/;http://www.mohamed-elhoseiny.com", "dblp": "251/6017;;l/ErranLLi.html;125/2894", "google_scholar": "dENNKrsAAAAJ;https://scholar.google.com.eg/citations?user=Wdv4WLYAAAAJ;GkMfzy4AAAAJ;iRBUTOAAAAAJ", "orcid": ";0000-0002-4082-814X;;0000-0001-9659-1551", "linkedin": "deyao-zhu-205774154/;mzahran001/;;mohamed-elhoseiny-8a836215/", "or_profile": "~Deyao_Zhu1;~Mohamed_Zahran1;~Li_Erran_Li1;~Mohamed_Elhoseiny1", "aff": "KAUST;Udacity;Columbia University;KAUST", "aff_domain": "kaust.edu.sa;udacity.com;columbia.edu;kaust.edu.sa", "position": "PhD student;Program Experience Manager;Adjunct Professor;Associate Professor", "bibtex": "@inproceedings{\nzhu2021motion,\ntitle={Motion Forecasting with Unlikelihood Training in Continuous Space},\nauthor={Deyao Zhu and Mohamed Zahran and Li Erran Li and Mohamed Elhoseiny},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=4u25M570Iji}\n}", "github": "", "project": "", "reviewers": "nXVp;Fbgh;XGcM", "site": "https://openreview.net/forum?id=4u25M570Iji", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9904438431792183217&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "King Abdullah University of Science and Technology;Udacity;Columbia University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaust.edu.sa;https://www.udacity.com;https://www.columbia.edu", "aff_unique_abbr": "KAUST;Udacity;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Saudi Arabia;United States" }, { "id": "50523z0PALg", "title": "Using Physics Knowledge for Learning Rigid-body Forward Dynamics with Gaussian Process Force Priors", "track": "main", "status": "Poster", "tldr": "", "abstract": "If a robot's dynamics are difficult to model solely through analytical mechanics, it is an attractive option to directly learn it from data. Yet, solely data-driven approaches require considerable amounts of data for training and do not extrapolate well to unseen regions of the system's state space. In this work, we emphasize that when a robot's links are sufficiently rigid, many analytical functions such as kinematics, inertia functions, and surface constraints encode informative prior knowledge on its dynamics. To this effect, we propose a framework for learning probabilistic forward dynamics that combines physics knowledge with Gaussian processes utilizing automatic differentiation with GPU acceleration. Compared to solely data-driven modeling, the model's data efficiency improves while the model also respects physical constraints. We illustrate the proposed structured model on a seven joint robot arm in PyBullet. Our implementation of the proposed framework can be found here: https://git.io/JP4Fs", "keywords": "Machine Learning;Robotics;Analytical mechanics", "primary_area": "", "supplementary_material": "/attachment/82a064da4dcced70ab3baac69be142cb66465776.zip", "author": "Lucas Rath;Andreas Ren\u00e9 Geist;Sebastian Trimpe", "authorids": "~Lucas_Rath1;~Andreas_Ren\u00e9_Geist1;~Sebastian_Trimpe1", "gender": "M;M;M", "homepage": ";https://andregeist.github.io/;https://www.dsme.rwth-aachen.de/trimpe", "dblp": ";;15/8135", "google_scholar": "ke9vYmYAAAAJ;JDBDDEgAAAAJ;https://scholar.google.de/citations?user=9kzHZssAAAAJ", "orcid": ";0000-0003-2551-2419;0000-0002-2785-2487", "linkedin": "lucas-rath/;andreas-rene-geist/;sebastian-trimpe-2472a0a3/", "or_profile": "~Lucas_Rath1;~Andreas_Ren\u00e9_Geist1;~Sebastian_Trimpe1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max-Planck-Institute for Intelligent Systems;RWTH Aachen University", "aff_domain": "is.mpg.de;is.mpg.de;rwth-aachen.de", "position": "Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nrath2021using,\ntitle={Using Physics Knowledge for Learning Rigid-body Forward Dynamics with Gaussian Process Force Priors},\nauthor={Lucas Rath and Andreas Ren{\\'e} Geist and Sebastian Trimpe},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=50523z0PALg}\n}", "github": "", "project": "", "reviewers": "ZmdF;Ahf1;2B5H;HDXR", "site": "https://openreview.net/forum?id=50523z0PALg", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17239843571126147375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;RWTH Aachen University", "aff_unique_dep": "Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.rwth-aachen.de", "aff_unique_abbr": "MPI-IS;RWTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Aachen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "59aUaAbVfMA", "title": "Neural Posterior Domain Randomization", "track": "main", "status": "Poster", "tldr": "", "abstract": "Combining domain randomization and reinforcement learning is a widely used approach to obtain control policies that can bridge the gap between simulation and reality. However, existing methods make limiting assumptions on the form of the domain parameter distribution which prevents them from utilizing the full power of domain randomization. Typically, a restricted family of probability distributions (e.g., normal or uniform) is chosen a priori for every parameter. Furthermore, straightforward approaches based on deep learning require differentiable simulators, which are either not available or can only simulate a limited class of systems. Such rigid assumptions diminish the applicability of domain randomization in robotics. Building upon recently proposed neural likelihood-free inference methods, we introduce Neural Posterior Domain Randomization (NPDR), an algorithm that alternates between learning a policy from a randomized simulator and adapting the posterior distribution over the simulator\u2019s parameters in a Bayesian fashion. Our approach only requires a parameterized simulator, coarse prior ranges, a policy (optionally with optimization routine), and a small set of real-world observations. Most importantly, the domain parameter distribution is not restricted to a specific family, parameters can be correlated, and the simulator does not have to be differentiable. We show that the presented method is able to efficiently adapt the posterior over the domain parameters to closer match the observed dynamics. Moreover, we demonstrate that NPDR can learn transferable policies using fewer real-world rollouts than comparable algorithms.", "keywords": "sim-to-real;domain randomization;likelihood-free inference", "primary_area": "", "supplementary_material": "/attachment/e41305929981345fd044aacf833b793d4a3135ec.zip", "author": "Fabio Muratore;Theo Gruner;Florian Wiese;Boris Belousov;Michael Gienger;Jan Peters", "authorids": "~Fabio_Muratore1;theosunao.gruner@stud.tu-darmstadt.de;wiese@stud.tu-darmstadt.de;~Boris_Belousov1;michael.gienger@honda-ri.de;~Jan_Peters3", "gender": ";;;;;M", "homepage": ";;;;;https://www.jan-peters.net", "dblp": ";;;;;p/JanPeters1", "google_scholar": "https://scholar.google.de/citations?user=cZAcOyUAAAAJ;;;;;https://scholar.google.de/citations?user=-kIVAcAAAAAJ", "orcid": "0000-0001-8600-2610;;;;;0000-0002-5266-8091", "linkedin": ";;;;;janrpeters/", "or_profile": "~Fabio_Muratore1;theosunao.gruner@stud.tu-darmstadt.de;wiese@stud.tu-darmstadt.de;~Boris_Belousov1;michael.gienger@honda-ri.de;~Jan_Peters3", "aff": ";;;;;Max Planck Institute for Intelligent Systems", "aff_domain": ";;;;;tue.mpg.de", "position": ";;;;;Researcher", "bibtex": "@inproceedings{\nmuratore2021neural,\ntitle={Neural Posterior Domain Randomization},\nauthor={Fabio Muratore and Theo Gruner and Florian Wiese and Boris Belousov and Michael Gienger and Jan Peters},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=59aUaAbVfMA}\n}", "github": "", "project": "", "reviewers": "tnJY;FjeG;wTxL;KMoW", "site": "https://openreview.net/forum?id=59aUaAbVfMA", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8041811915821830931&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "5DjX89Wyhk-", "title": "Predicting Stable Configurations for Semantic Placement of Novel Objects", "track": "main", "status": "Poster", "tldr": "", "abstract": "Human environments contain numerous objects configured in a variety of arrangements. Our goal is to enable robots to repose previously unseen objects according to learned semantic relationships in novel environments. We break this problem down into two parts: (1) finding physically valid locations for the objects and (2) determining if those poses satisfy learned, high-level semantic relationships.\nWe build our models and training from the ground up to be tightly integrated with our proposed planning algorithm for semantic placement of unknown objects. We train our models purely in simulation, with no fine-tuning needed for use in the real world.\nOur approach enables motion planning for semantic rearrangement of unknown objects in scenes with varying geometry from only RGB-D sensing. Our experiments through a set of simulated ablations demonstrate that using a relational classifier alone is not sufficient for reliable planning. We further demonstrate the ability of our planner to generate and execute diverse manipulation plans through a set of real-world experiments with a variety of objects.\n", "keywords": "Deep learning for robotic manipulation;learning for motion planning;semantic manipulation", "primary_area": "", "supplementary_material": "/attachment/2a9f2642aba3d42d2f5acbbbeb63554485bba899.zip", "author": "Chris Paxton;Chris Xie;Tucker Hermans;Dieter Fox", "authorids": "~Chris_Paxton1;~Chris_Xie1;~Tucker_Hermans2;~Dieter_Fox1", "gender": "M;M;M;M", "homepage": "https://cpaxton.github.io/;https://robot-learning.cs.utah.edu;https://homes.cs.washington.edu/~fox/;https://chrisdxie.github.io/", "dblp": ";https://dblp.uni-trier.de/pid/67/4241;f/DieterFox;164/8466", "google_scholar": "I1mOQpAAAAAJ;G5_VFfkAAAAJ;DqXsbPAAAAAJ;iJuDBhEAAAAJ", "orcid": ";0000-0003-2496-2768;;", "linkedin": ";;;", "or_profile": "~Chris_Paxton1;~Tucker_Hermans2;~Dieter_Fox1;~Christopher_Xie1", "aff": "NVIDIA;University of Utah;Department of Computer Science;Department of Computer Science, University of Washington", "aff_domain": "nvidia.com;utah.edu;cs.washington.edu;cs.washington.edu", "position": "Researcher;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\npaxton2021predicting,\ntitle={Predicting Stable Configurations for Semantic Placement of Novel Objects},\nauthor={Chris Paxton and Chris Xie and Tucker Hermans and Dieter Fox},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=5DjX89Wyhk-}\n}", "github": "", "project": "", "reviewers": "Tchz;f3r9;54xG", "site": "https://openreview.net/forum?id=5DjX89Wyhk-", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10144308155715900984&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "NVIDIA;University of Utah;Unknown Institution;University of Washington", "aff_unique_dep": "NVIDIA Corporation;;Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;https://www.utah.edu;;https://www.washington.edu", "aff_unique_abbr": "NVIDIA;Utah;;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "5P_3bRWiRsF", "title": "RoCUS: Robot Controller Understanding via Sampling", "track": "main", "status": "Poster", "tldr": "", "abstract": "As robots are deployed in complex situations, engineers and end users must develop a holistic understanding of their behaviors, capabilities, and limitations. Some behaviors are directly optimized by the objective function. They often include success rate, completion time or energy consumption. Other behaviors -- e.g., collision avoidance, trajectory smoothness or motion legibility -- are typically emergent but equally important for safe and trustworthy deployment. Designing an objective which optimizes every aspect of robot behavior is hard. In this paper, we advocate for systematic analysis of a wide array of behaviors for holistic understanding of robot controllers and, to this end, propose a framework, RoCUS, which uses Bayesian posterior sampling to find situations where the robot controller exhibits user-specified behaviors, such as highly jerky motions. We use RoCUS to analyze three controller classes (deep learning models, rapidly exploring random trees and dynamical system formulations) on two domains (2D navigation and a 7 degree-of-freedom arm reaching), and uncover insights to further our understanding of these controllers and ultimately improve their designs. ", "keywords": "debugging and evaluation;algorithmic transparency", "primary_area": "", "supplementary_material": "/attachment/96367a67d1ea3600aab18b5b59ed73f9bf841861.zip", "author": "Yilun Zhou;Serena Booth;Nadia Figueroa;Julie Shah", "authorids": "~Yilun_Zhou2;~Serena_Booth1;~Nadia_Figueroa1;~Julie_Shah2", "gender": "M;F;F;F", "homepage": "https://yilunzhou.github.io/;http://www.slbooth.com;https://nbfigueroa.github.io/;https://interactive.mit.edu", "dblp": "163/2243.html;;116/8822;", "google_scholar": "3DGRp2oAAAAJ;sf3ROEUAAAAJ;1NQRXHQAAAAJ;", "orcid": ";;0000-0002-6873-4671;", "linkedin": ";;nadiabarbara/;", "or_profile": "~Yilun_Zhou2;~Serena_Booth1;~Nadia_Figueroa1;~Julie_Shah2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nzhou2021rocus,\ntitle={Ro{CUS}: Robot Controller Understanding via Sampling},\nauthor={Yilun Zhou and Serena Booth and Nadia Figueroa and Julie Shah},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=5P_3bRWiRsF}\n}", "github": "", "project": "", "reviewers": "UK3k;YUw7;U7Po;h5ZA", "site": "https://openreview.net/forum?id=5P_3bRWiRsF", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12309707908889688975&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7uSBJDoP7tY", "title": "A System for General In-Hand Object Re-Orientation", "track": "main", "status": "Oral", "tldr": "", "abstract": "In-hand object reorientation has been a challenging problem in robotics due to high dimensional actuation space and the frequent change in contact state between the fingers and the objects. We present a simple model-free framework that can learn to reorient objects with both the hand facing upwards and downwards. We demonstrate the capability of reorienting over $2000$ geometrically different objects in both cases. The learned policies show strong zero-shot transfer performance on new objects. We provide evidence that these policies are amenable to real-world operation by distilling them to use observations easily available in the real world. The videos of the learned policies are available at: https://taochenshh.github.io/projects/in-hand-reorientation.", "keywords": "In-hand manipulation;dexterous manipulation;object reorientation", "primary_area": "", "supplementary_material": "/attachment/19c89f7d9f3634b80cce5152f1fcdc6122628cf3.zip", "author": "Tao Chen;Jie Xu;Pulkit Agrawal", "authorids": "~Tao_Chen1;~Jie_Xu7;~Pulkit_Agrawal1", "gender": "M;M;M", "homepage": "https://taochenshh.github.io;https://people.csail.mit.edu/jiex;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": ";37/5126-28;149/2672", "google_scholar": "gdUv1PIAAAAJ;3Tj5lWEAAAAJ;UpZmJI0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tao_Chen1;~Jie_Xu7;~Pulkit_Agrawal1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2021a,\ntitle={A System for General In-Hand Object Re-Orientation},\nauthor={Tao Chen and Jie Xu and Pulkit Agrawal},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=7uSBJDoP7tY}\n}", "github": "", "project": "", "reviewers": "uUVE;Cfuq;UVfU", "site": "https://openreview.net/forum?id=7uSBJDoP7tY", "pdf_size": 0, "rating": "10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 281, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4819332599265025217&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "87_OJU4sw3V", "title": "ReSkin: versatile, replaceable, lasting tactile skins", "track": "main", "status": "Oral", "tldr": "", "abstract": "Soft sensors have continued growing interest in robotics, due to their ability to enable both passive conformal contact from the material properties and active contact data from the sensor properties. However, the same properties of conformal contact result in faster deterioration of soft sensors and larger variations in their response characteristics over time and across samples, inhibiting their ability to be long-lasting and replaceable. ReSkin is a tactile soft sensor that leverages machine learning and magnetic sensing to offer a low-cost, diverse and compact solution for long-term use. Magnetic sensing separates the electronic circuitry from the passive interface, making it easier to replace interfaces as they wear out while allowing for a wide variety of form factors. Machine learning allows us to learn sensor response models that are robust to variations across fabrication and time, and our self-supervised learning algorithm enables finer performance enhancement with small, inexpensive data collection procedures. We believe that ReSkin opens the door to more versatile, scalable and inexpensive tactile sensation modules than existing alternatives. https://reskin.dev", "keywords": "Tactile Skin;Self-supervised Learning;Magnetic Sensing;Soft Sensors", "primary_area": "", "supplementary_material": "/attachment/1385c535ca5948e00d5023f6f92856a43874443f.zip", "author": "Raunaq Bhirangi;Tess Hellebrekers;Carmel Majidi;Abhinav Gupta", "authorids": "~Raunaq_Bhirangi1;~Tess_Hellebrekers1;~Carmel_Majidi1;~Abhinav_Gupta1", "gender": "M;;;M", "homepage": "https://raunaqbhirangi.github.io;;;http://www.cs.cmu.edu/~abhinavg", "dblp": "266/4528;;;36/7024-1", "google_scholar": "LUy4hkcAAAAJ;PE_lI3kAAAAJ;1LyndUsAAAAJ;https://scholar.google.com.tw/citations?user=bqL73OkAAAAJ", "orcid": ";;;", "linkedin": "raunaq-bhirangi/;;;", "or_profile": "~Raunaq_Bhirangi1;~Tess_Hellebrekers1;~Carmel_Majidi1;~Abhinav_Gupta1", "aff": "Carnegie Mellon University;Meta Facebook;Carnegie Mellon University;Meta Facebook", "aff_domain": "cmu.edu;facebook.com;cmu.edu;fb.com", "position": "PhD student;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nbhirangi2021reskin,\ntitle={ReSkin: versatile, replaceable, lasting tactile skins},\nauthor={Raunaq Bhirangi and Tess Hellebrekers and Carmel Majidi and Abhinav Gupta},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=87_OJU4sw3V}\n}", "github": "", "project": "", "reviewers": "fsSv;2SmQ;51C5;VwcU", "site": "https://openreview.net/forum?id=87_OJU4sw3V", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4376570632271114035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8K5kisAnb_p", "title": "Safe Nonlinear Control Using Robust Neural Lyapunov-Barrier Functions", "track": "main", "status": "Poster", "tldr": "", "abstract": "Safety and stability are common requirements for robotic control systems; however, designing safe, stable controllers remains difficult for nonlinear and uncertain models. We develop a model-based learning approach to synthesize robust feedback controllers with safety and stability guarantees. We take inspiration from robust convex optimization and Lyapunov theory to define robust control Lyapunov barrier functions that generalize despite model uncertainty. We demonstrate our approach in simulation on problems including car trajectory tracking, nonlinear control with obstacle avoidance, satellite rendezvous with safety constraints, and flight control with a learned ground effect model. Simulation results show that our approach yields controllers that match or exceed the capabilities of robust MPC while reducing computational costs by an order of magnitude. We provide source code at github.com/dawsonc/neural_clbf/.", "keywords": "Certified control;learning for control", "primary_area": "", "supplementary_material": "/attachment/96326f27f75319bde2747d82ef2b586c3e7d5c9c.zip", "author": "Charles Dawson;Zengyi Qin;Sicun Gao;Chuchu Fan", "authorids": "~Charles_Dawson1;~Zengyi_Qin1;~Sicun_Gao1;~Chuchu_Fan2", "gender": "M;M;M;F", "homepage": "https://dawson.mit.edu;;;https://chuchu.mit.edu", "dblp": "39/1246;230/7736;22/8296;127/1756", "google_scholar": "FkDdz9gAAAAJ;;;J-dq_8EAAAAJ", "orcid": "0000-0002-8371-5313;;;", "linkedin": "c6d5;;;chuchu-fan/", "or_profile": "~Charles_Dawson1;~Zengyi_Qin1;~Sicun_Gao1;~Chuchu_Fan2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;;mit.edu", "position": "MS student;Graduate student;;Assistant Professor", "bibtex": "@inproceedings{\ndawson2021safe,\ntitle={Safe Nonlinear Control Using Robust Neural Lyapunov-Barrier Functions},\nauthor={Charles Dawson and Zengyi Qin and Sicun Gao and Chuchu Fan},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=8K5kisAnb_p}\n}", "github": "", "project": "", "reviewers": "mZgd;JDXV;o7Dq", "site": "https://openreview.net/forum?id=8K5kisAnb_p", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 210, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11873176761623805123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8ZL7Jh1r8WX", "title": "Task-Driven Out-of-Distribution Detection with Statistical Guarantees for Robot Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Our goal is to perform out-of-distribution (OOD) detection, i.e., to detect when a robot is operating in environments that are drawn from a different distribution than the environments used to train the robot. We leverage Probably Approximately Correct (PAC)-Bayes theory in order to train a policy with a guaranteed bound on performance on the training distribution. Our key idea for OOD detection then relies on the following intuition: violation of the performance bound on test environments provides evidence that the robot is operating OOD. We formalize this via statistical techniques based on p-values and concentration inequalities. The resulting approach (i) provides guaranteed confidence bounds on OOD detection, and (ii) is task-driven and sensitive only to changes that impact the robot\u2019s performance. We demonstrate our approach on a simulated example of grasping objects with unfamiliar poses or shapes. We also present both simulation and hardware experiments for a drone performing vision-based obstacle avoidance in unfamiliar environments (including wind disturbances and different obstacle densities). Our examples demonstrate that we can perform task-driven OOD detection within just a handful of trials. Comparisons with baselines also demonstrate the advantages of our approach in terms of providing statistical guarantees and being insensitive to task-irrelevant distribution shifts.", "keywords": "Out-of-distribution detection;generalization;PAC-Bayes", "primary_area": "", "supplementary_material": "/attachment/f71fc02e294ea4b16945c89d1a1fa2d60451b60f.zip", "author": "Alec Farid;Sushant Veer;Anirudha Majumdar", "authorids": "~Alec_Farid1;~Sushant_Veer1;~Anirudha_Majumdar1", "gender": ";M;M", "homepage": ";;https://irom-lab.princeton.edu/majumdar/", "dblp": ";173/5950;116/6436", "google_scholar": ";1FiIlQsAAAAJ;ibu3FwsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alec_Farid1;~Sushant_Veer1;~Anirudha_Majumdar1", "aff": ";Princeton University;Princeton University", "aff_domain": ";princeton.edu;princeton.edu", "position": ";Postdoc;Associate Professor", "bibtex": "@inproceedings{\nfarid2021taskdriven,\ntitle={Task-Driven Out-of-Distribution Detection with Statistical Guarantees for Robot Learning},\nauthor={Alec Farid and Sushant Veer and Anirudha Majumdar},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=8ZL7Jh1r8WX}\n}", "github": "", "project": "", "reviewers": "ev1V;PvzK;orAU", "site": "https://openreview.net/forum?id=8ZL7Jh1r8WX", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16849488672761264795&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8kbp23tSGYv", "title": "BC-Z: Zero-Shot Task Generalization with Robotic Imitation Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this paper, we study the problem of enabling a vision-based robotic manipulation system to generalize to novel tasks, a long-standing challenge in robot learning. We approach the challenge from an imitation learning perspective, aiming to study how scaling and broadening the data collected can facilitate such generalization. To that end, we develop an interactive and flexible imitation learning system that can learn from both demonstrations and interventions and can be conditioned on different forms of information that convey the task, including pre-trained embeddings of natural language or videos of humans performing the task. When scaling data collection on a real robot to more than 100 distinct tasks, we find that this system can perform 24 unseen manipulation tasks with an average success rate of 44%, without any robot demonstrations for those tasks.", "keywords": "Zero-Shot Imitation Learning;One-Shot Imitation Learning;Multi-Task Imitation;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/24931f47ec0759d76efffbff3abb17ba6ee85133.zip", "author": "Eric Jang;Alex Irpan;Mohi Khansari;Daniel Kappler;Frederik Ebert;Corey Lynch;Sergey Levine;Chelsea Finn", "authorids": "~Eric_Jang1;~Alex_Irpan1;~Mohi_Khansari1;~Daniel_Kappler1;~Frederik_Ebert1;~Corey_Lynch1;~Sergey_Levine1;~Chelsea_Finn1", "gender": "M;M;;M;M;M;M;F", "homepage": "http://evjang.com;http://www.alexirpan.com;https://cs.stanford.edu/people/khansari/;https://am.is.tuebingen.mpg.de/person/dkappler;;https://coreylynch.github.io/;https://people.eecs.berkeley.edu/~svlevine/;https://ai.stanford.edu/~cbfinn/", "dblp": "190/7794;202/2063;;31/10333;;155/3141;80/7594;131/1783", "google_scholar": "Izhkp4YAAAAJ;;Z3dxz9IAAAAJ;https://scholar.google.de/citations?user=_WLInT0AAAAJ;;CYWO-oAAAAAJ;8R35rCwAAAAJ;vfPE6hgAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Eric_Jang1;~Alex_Irpan1;~Mohi_Khansari1;~Daniel_Kappler1;~Frederik_Ebert1;~Corey_Lynch1;~Sergey_Levine1;~Chelsea_Finn1", "aff": "Google;Google DeepMind;Google;;University of California, Berkeley;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;;berkeley.edu;google.com;google.com;google.com", "position": "Researcher;Researcher;Sr. Roboticist;;PhD student;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\njang2021bcz,\ntitle={{BC}-Z: Zero-Shot Task Generalization with Robotic Imitation Learning},\nauthor={Eric Jang and Alex Irpan and Mohi Khansari and Daniel Kappler and Frederik Ebert and Corey Lynch and Sergey Levine and Chelsea Finn},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=8kbp23tSGYv}\n}", "github": "", "project": "", "reviewers": "KYDU;7L8N;aF8M", "site": "https://openreview.net/forum?id=8kbp23tSGYv", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 32, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 594, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6558326575413813557&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": "Google;UC Berkeley", "aff_campus_unique_index": "0;0;2;0;0;0", "aff_campus_unique": "Mountain View;;Berkeley", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "8xC5NNej-l_", "title": "S4RL: Surprisingly Simple Self-Supervision for Offline Reinforcement Learning in Robotics", "track": "main", "status": "Poster", "tldr": "", "abstract": "Offline reinforcement learning proposes to learn policies from large collected datasets without interacting with the physical environment. These algorithms have made it possible to learn useful skills from data that can then be deployed in the environment in real-world settings where interactions may be costly or dangerous, such as autonomous driving or factories. However, offline agents are unable to access the environment to collect new data, and therefore are trained on a static dataset. In this paper, we study the effectiveness of performing data augmentations on the state space, and study 7 different augmentation schemes and how they behave with existing offline RL algorithms. We then combine the best data performing augmentation scheme with a state-of-the-art Q-learning technique, and improve the function approximation of the Q-networks by smoothening out the learned state-action space. We experimentally show that using this Surprisingly Simple Self-Supervision technique in RL (S4RL), we significantly improve over the current state-of-the-art algorithms on offline robot learning environments such as MetaWorld [1] and RoboSuite [2,3], and benchmark datasets such as D4RL [4].", "keywords": "Offline Reinforcement Learning;Data Augmentation;Self-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/f7e2fba0598ae59765c8573c3f2ddacc9bd97521.zip", "author": "Samarth Sinha;Ajay Mandlekar;Animesh Garg", "authorids": "~Samarth_Sinha1;~Ajay_Mandlekar1;~Animesh_Garg1", "gender": "M;M;M", "homepage": "https://samsinha.me;https://ai.stanford.edu/~amandlek/;http://animesh.garg.tech", "dblp": ";https://dblp.uni-trier.de/pers/hd/m/Mandlekar:Ajay;123/5728", "google_scholar": "https://scholar.google.ca/citations?user=lnCKs0AAAAAJ;MEz23joAAAAJ;zp8V7ZMAAAAJ", "orcid": ";;0000-0003-0482-4296", "linkedin": ";;animeshgarg/", "or_profile": "~Samarth_Sinha1;~Ajay_Mandlekar1;~Animesh_Garg1", "aff": "University of Toronto, Toronto University;Stanford University;University of Toronto", "aff_domain": "ece.utoronto.ca;stanford.edu;toronto.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsinha2021srl,\ntitle={S4{RL}: Surprisingly Simple Self-Supervision for Offline Reinforcement Learning in Robotics},\nauthor={Samarth Sinha and Ajay Mandlekar and Animesh Garg},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=8xC5NNej-l_}\n}", "github": "", "project": "", "reviewers": "TxK8;oh9V;vBpT;bi2U", "site": "https://openreview.net/forum?id=8xC5NNej-l_", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 30, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 139, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5209455648931314612&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Toronto;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.stanford.edu", "aff_unique_abbr": "U of T;Stanford", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Toronto;Stanford;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "9aVCUv3nKBg", "title": "Adversarially Robust Imitation Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Modern imitation learning (IL) utilizes deep neural networks (DNNs) as function approximators to mimic the policy of the expert demonstrations. However, DNNs can be easily fooled by subtle noise added to the input, which is even non-detectable by humans. This makes the learned agent vulnerable to attacks, especially in IL where agents can struggle to recover from the errors. In such light, we propose a sound Adversarially Robust Imitation Learning (ARIL) method. In our setting, an agent and an adversary are trained alternatively. The former with adversarially attacked input at each timestep mimics the behavior of an online expert and the latter learns to add perturbations on the states by forcing the learned agent to fail on choosing the right decisions. We theoretically prove that ARIL can achieve adversarial robustness and evaluate ARIL on multiple benchmarks from DM Control Suite. The result reveals that our method (ARIL) achieves better robustness compare with other imitation learning methods under both sensory attack and physical attack.", "keywords": "Imitation Learning;Adversarial Learning", "primary_area": "", "supplementary_material": "/attachment/408ed0f1e24ffda6b8bbc4b5cb4f15f03fdfaddc.zip", "author": "Jianren Wang;Ziwen Zhuang;Yuyang Wang;Hang Zhao", "authorids": "~Jianren_Wang2;~Ziwen_Zhuang1;~Yuyang_Wang3;~Hang_Zhao1", "gender": "M;M;;M", "homepage": "https://www.jianrenw.com/;https://ziwenzhuang.github.io;https://yuyangw.github.io/;http://www.mit.edu/~hangzhao/", "dblp": "34/8491;;43/8355-5;", "google_scholar": "qR4O45oAAAAJ;GE8fpdwAAAAJ;6eWGKEsAAAAJ;DmahiOYAAAAJ", "orcid": ";;0000-0003-0723-6246;", "linkedin": ";leozhuang;;", "or_profile": "~Jianren_Wang2;~Ziwen_Zhuang1;~Yuyang_Wang3;~Hang_Zhao1", "aff": "Carnegie Mellon University;ShanghaiTech University;Carnegie Mellon University;Tsinghua University", "aff_domain": "cmu.edu;shanghaitech.edu.cn;andrew.cmu.edu;tsinghua.edu.cn", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2021adversarially,\ntitle={Adversarially Robust Imitation Learning},\nauthor={Jianren Wang and Ziwen Zhuang and Yuyang Wang and Hang Zhao},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=9aVCUv3nKBg}\n}", "github": "", "project": "", "reviewers": "uSZo;bbzz;1bf5;Bpno", "site": "https://openreview.net/forum?id=9aVCUv3nKBg", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 6, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11995478402120154004&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Carnegie Mellon University;ShanghaiTech University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.shanghaitech.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CMU;ShanghaiTech;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;China" }, { "id": "9uFiX_HRsIL", "title": "CLIPort: What and Where Pathways for Robotic Manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "How can we imbue robots with the ability to manipulate objects precisely but also to reason about them in terms of abstract concepts? Recent works in manipulation have shown that end-to-end networks can learn dexterous skills that require precise spatial reasoning, but these methods often fail to generalize to new goals or quickly learn transferable concepts across tasks. In parallel, there has been great progress in learning generalizable semantic representations for vision and language by training on large-scale internet data, however these representations lack the spatial understanding necessary for fine-grained manipulation. To this end, we propose a framework that combines the best of both worlds: a two-stream architecture with semantic and spatial pathways for vision-based manipulation. Specifically, we present CLIPort, a language-conditioned imitation-learning agent that combines the broad semantic understanding (what) of CLIP [1] with the spatial precision (where) of Transporter [2]. Our end-to-end framework is capable of solving a variety of language-specified tabletop tasks from packing unseen objects to folding cloths, all without any explicit representations of object poses, instance segmentations, memory, symbolic states, or syntactic structures. Experiments in simulated and real-world settings show that our approach is data efficient in few-shot settings and generalizes effectively to seen and unseen semantic concepts. We even learn one multi-task policy for 10 simulated and 9 real-world tasks that is better or comparable to single-task policies.", "keywords": "Manipulation;Pre-trained Models;Vision Language Grounding;CLIP", "primary_area": "", "supplementary_material": "/attachment/3154856ad15da8c20f93fe49b2afbc5d58e204cf.zip", "author": "Mohit Shridhar;Lucas Manuelli;Dieter Fox", "authorids": "~Mohit_Shridhar1;~Lucas_Manuelli1;~Dieter_Fox1", "gender": "M;M;M", "homepage": "http://mohitshridhar.com/;http://lucasmanuelli.com;https://homes.cs.washington.edu/~fox/", "dblp": "203/8577.html;;f/DieterFox", "google_scholar": "CrfsfFSiS0kC;0pxg5ssAAAAJ;DqXsbPAAAAAJ", "orcid": "0000-0001-7382-763X;;", "linkedin": ";;", "or_profile": "~Mohit_Shridhar1;~Lucas_Manuelli1;~Dieter_Fox1", "aff": "NVIDIA;NVIDIA;Department of Computer Science", "aff_domain": "nvidia.com;nvidia.com;cs.washington.edu", "position": "NVIDIA;Researcher;Full Professor", "bibtex": "@inproceedings{\nshridhar2021cliport,\ntitle={{CLIP}ort: What and Where Pathways for Robotic Manipulation},\nauthor={Mohit Shridhar and Lucas Manuelli and Dieter Fox},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=9uFiX_HRsIL}\n}", "github": "", "project": "", "reviewers": "Hq8X;HUjp;WZS2", "site": "https://openreview.net/forum?id=9uFiX_HRsIL", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 728, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=626168040060357114&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1", "aff_unique_norm": "NVIDIA;Unknown Institution", "aff_unique_dep": "NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;", "aff_unique_abbr": "NVIDIA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "id": "A9P78VBwYKM", "title": "ObjectFolder: A Dataset of Objects with Implicit Visual, Auditory, and Tactile Representations", "track": "main", "status": "Poster", "tldr": "", "abstract": "Multisensory object-centric perception, reasoning, and interaction have been a key research topic in recent years. However, the progress in these directions is limited by the small set of objects available---synthetic objects are not realistic enough and are mostly centered around geometry, while real object datasets such as YCB are often practically challenging and unstable to acquire due to international shipping, inventory, and financial cost. We present ObjectFolder, a dataset of 100 virtualized objects that addresses both challenges with two key innovations. First, ObjectFolder encodes the visual, auditory, and tactile sensory data for all objects, enabling a number of multisensory object recognition tasks, beyond existing datasets that focus purely on object geometry. Second, ObjectFolder employs a uniform, object-centric, and implicit representation for each object\u2019s visual textures, acoustic simulations, and tactile readings, making the dataset flexible to use and easy to share. We demonstrate the usefulness of our dataset as a testbed for multisensory perception and control by evaluating it on a variety of benchmark tasks, including instance recognition, cross-sensory retrieval, 3D reconstruction, and robotic grasping.", "keywords": "object dataset;multisensory learning;implicit representations", "primary_area": "", "supplementary_material": "/attachment/0a8a71cb8e8b53690722bb4b7b7c10ba4a0093d6.zip", "author": "Ruohan Gao;Yen-Yu Chang;Shivani Mall;Li Fei-Fei;Jiajun Wu", "authorids": "~Ruohan_Gao2;~Yen-Yu_Chang1;~Shivani_Mall1;~Li_Fei-Fei1;~Jiajun_Wu1", "gender": "M;M;F;F;M", "homepage": "https://ruohangao.github.io/;https://yuyuchang.github.io/;;https://profiles.stanford.edu/fei-fei-li;https://jiajunwu.com", "dblp": "176/5787;;;79/2528;117/4768", "google_scholar": "i02oEgMAAAAJ;;;rDfyQnIAAAAJ;2efgcS0AAAAJ", "orcid": "0000-0002-8346-1114;;0000-0002-1341-5702;;0000-0002-4176-343X", "linkedin": ";;shivanimall/;fei-fei-li-4541247/;jiajunwu/", "or_profile": "~Ruohan_Gao2;~Yen-Yu_Chang1;~Shivani_Mall1;~Li_Fei-Fei1;~Jiajun_Wu1", "aff": "University of Texas, Austin;Stanford University;University of Oxford;Stanford University;Stanford University", "aff_domain": "cs.utexas.edu;stanford.edu;robots.ox.ac.uk;stanford.edu;stanford.edu", "position": "PhD student;MS student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2021objectfolder,\ntitle={ObjectFolder: A Dataset of Objects with Implicit Visual, Auditory, and Tactile Representations},\nauthor={Ruohan Gao and Yen-Yu Chang and Shivani Mall and Li Fei-Fei and Jiajun Wu},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=A9P78VBwYKM}\n}", "github": "", "project": "", "reviewers": "BbKQ;pXzD;arko", "site": "https://openreview.net/forum?id=A9P78VBwYKM", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16189569569766554169&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "University of Texas at Austin;Stanford University;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.stanford.edu;https://www.ox.ac.uk", "aff_unique_abbr": "UT Austin;Stanford;Oxford", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Austin;Stanford;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "AL4FPs84YdQ", "title": "Semantic Terrain Classification for Off-Road Autonomous Driving", "track": "main", "status": "Poster", "tldr": "", "abstract": "Producing dense and accurate traversability maps is crucial for autonomous off-road navigation. In this paper, we focus on the problem of classifying terrains into 4 cost classes (free, low-cost, medium-cost, obstacle) for traversability assessment. This requires a robot to reason about both semantics (what objects are present?) and geometric properties (where are the objects located?) of the environment. To achieve this goal, we develop a novel Bird's Eye View Network (BEVNet), a deep neural network that directly predicts a local map encoding terrain classes from sparse LiDAR inputs. BEVNet processes both geometric and semantic information in a temporally consistent fashion. More importantly, it uses learned prior and history to predict terrain classes in unseen space and into the future, allowing a robot to better appraise its situation. We quantitatively evaluate BEVNet on both on-road and off-road scenarios and show that it outperforms a variety of strong baselines.", "keywords": "Off-road Driving;Autonomous Driving;Deep Learning;Perception", "primary_area": "", "supplementary_material": "/attachment/667e62e94b0908a1d853654b05bdf1e2c0955b2d.zip", "author": "Amirreza Shaban;Xiangyun Meng;JoonHo Lee;Byron Boots;Dieter Fox", "authorids": "~Amirreza_Shaban1;~Xiangyun_Meng1;~JoonHo_Lee3;~Byron_Boots1;~Dieter_Fox1", "gender": "Unspecified;;;;M", "homepage": ";https://homes.cs.washington.edu/~xiangyun;https://github.com/JHLee0513;;https://homes.cs.washington.edu/~fox/", "dblp": "99/9987;169/3352;;;f/DieterFox", "google_scholar": "6Q6TCkkAAAAJ;;RIshnY8AAAAJ;;DqXsbPAAAAAJ", "orcid": ";;;;", "linkedin": ";;jhl0513/;;", "or_profile": "~Amirreza_Shaban1;~Xiangyun_Meng1;~JoonHo_Lee3;~Byron_Boots1;~Dieter_Fox1", "aff": "University of Washington, Seattle;University of Washington;University of Washington, Seattle;;Department of Computer Science", "aff_domain": "uw.edu;washington.edu;uw.edu;;cs.washington.edu", "position": "Postdoc;PhD student;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nshaban2021semantic,\ntitle={Semantic Terrain Classification for Off-Road Autonomous Driving},\nauthor={Amirreza Shaban and Xiangyun Meng and JoonHo Lee and Byron Boots and Dieter Fox},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=AL4FPs84YdQ}\n}", "github": "", "project": "", "reviewers": "2yv4;dzpe;u6cx;vtKE", "site": "https://openreview.net/forum?id=AL4FPs84YdQ", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5715514676024114265&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Washington;Unknown Institution", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.washington.edu;", "aff_unique_abbr": "UW;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "AlJXhEI6J5W", "title": "Offline-to-Online Reinforcement Learning via Balanced Replay and Pessimistic Q-Ensemble", "track": "main", "status": "Poster", "tldr": "", "abstract": "Recent advance in deep offline reinforcement learning (RL) has made it possible to train strong robotic agents from offline datasets. However, depending on the quality of the trained agents and the application being considered, it is often desirable to fine-tune such agents via further online interactions. In this paper, we observe that state-action distribution shift may lead to severe bootstrap error during fine-tuning, which destroys the good initial policy obtained via offline RL. To address this issue, we first propose a balanced replay scheme that prioritizes samples encountered online while also encouraging the use of near-on-policy samples from the offline dataset. Furthermore, we leverage multiple Q-functions trained pessimistically offline, thereby preventing overoptimism concerning unfamiliar actions at novel states during the initial training phase. We show that the proposed method improves sample-efficiency and final performance of the fine-tuned robotic agents on various locomotion and manipulation tasks. Our code is available at: https://github.com/shlee94/Off2OnRL.", "keywords": "Deep Reinforcement Learning;Offline RL;Fine-tuning", "primary_area": "", "supplementary_material": "/attachment/1e54b0bf653f00e6542f4422d2e815349672cef7.zip", "author": "Seunghyun Lee;Younggyo Seo;Kimin Lee;Pieter Abbeel;Jinwoo Shin", "authorids": "~Seunghyun_Lee2;~Younggyo_Seo1;~Kimin_Lee1;~Pieter_Abbeel2;~Jinwoo_Shin1", "gender": "M;M;M;M;M", "homepage": "https://sites.google.com/view/seunghyun-lee/home;https://younggyo.me/;https://sites.google.com/view/kiminlee;https://people.eecs.berkeley.edu/~pabbeel/;https://sites.google.com/site/mijirim/", "dblp": "23/774;265/5586;183/6849;;31/7062", "google_scholar": "NOJNXdAAAAAJ;tI1-YwIAAAAJ;92M8xv4AAAAJ;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Seunghyun_Lee2;~Younggyo_Seo1;~Kimin_Lee1;~Pieter_Abbeel2;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Microsoft Research Asia;University of California, Berkeley;Covariant;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;microsoft.com;berkeley.edu;covariant.ai;kaist.ac.kr", "position": "MS student;Intern;Postdoc;Founder;Associate Professor", "bibtex": "@inproceedings{\nlee2021offlinetoonline,\ntitle={Offline-to-Online Reinforcement Learning via Balanced Replay and Pessimistic Q-Ensemble},\nauthor={Seunghyun Lee and Younggyo Seo and Kimin Lee and Pieter Abbeel and Jinwoo Shin},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=AlJXhEI6J5W}\n}", "github": "", "project": "", "reviewers": "yjtU;Jt9W;rPXZ;SrrQ", "site": "https://openreview.net/forum?id=AlJXhEI6J5W", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 239, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=39584001947157597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Microsoft;University of California, Berkeley;Covariant", "aff_unique_dep": ";Research;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.microsoft.com/en-us/research/group/asia;https://www.berkeley.edu;", "aff_unique_abbr": "KAIST;MSR Asia;UC Berkeley;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Berkeley", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "South Korea;China;United States;" }, { "id": "CGn3XKSf7vf", "title": "Single-Shot Scene Reconstruction", "track": "main", "status": "Poster", "tldr": "", "abstract": "We introduce a novel scene reconstruction method to infer a fully editable and re-renderable model of a 3D road scene from a single image. We represent movable objects separately from the immovable background, and recover a full 3D model of each distinct object as well as their spatial relations in the scene. We leverage transformer-based detectors and neural implicit 3D representations and we build a Scene Decomposition Network (SDN) that reconstructs the scene in 3D. Furthermore, we show that this reconstruction can be used in an analysis-by-synthesis setting via differentiable rendering. Trained only on simulated road scenes, our method generalizes well to real data in the same class without any adaptation thanks to its strong inductive priors. Experiments on two synthetic-real dataset pairs (PD-DDAD and VKITTI-KITTI) show that our method can robustly recover scene geometry and appearance, as well as reconstruct and re-render the scene from novel viewpoints.", "keywords": "scene reconstruction;differentiable rendering", "primary_area": "", "supplementary_material": "/attachment/7595f24e811d98f3929aeeff1800f7ffd053e8af.zip", "author": "Sergey Zakharov;Rares Andrei Ambrus;Vitor Campagnolo Guizilini;Dennis Park;Wadim Kehl;Fredo Durand;Joshua B. Tenenbaum;Vincent Sitzmann;Jiajun Wu;Adrien Gaidon", "authorids": "~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Vitor_Campagnolo_Guizilini2;~Dennis_Park1;~Wadim_Kehl2;~Fredo_Durand1;~Joshua_B._Tenenbaum1;~Vincent_Sitzmann1;~Jiajun_Wu1;~Adrien_Gaidon1", "gender": "M;M;M;;M;M;;M;M;", "homepage": "https://zakharos.github.io/;http://www.csc.kth.se/~raambrus/;;;http://www.wadimkehl.com;http://people.csail.mit.edu/fredo/;;https://vsitzmann.github.io;https://jiajunwu.com;https://adriengaidon.com/", "dblp": "195/5832;25/76;;92/8610;157/3597;87/2617;t/JoshuaBTenenbaum;192/1958;117/4768;06/7548.html", "google_scholar": "https://scholar.google.de/citations?user=3DK3I-8AAAAJ;2xjjS3oAAAAJ;UH9tP6QAAAAJ;;https://scholar.google.de/citations?user=KVRo4HUAAAAJ;https://scholar.google.com.tw/citations?user=NJ9c4ygAAAAJ;;X44QVV4AAAAJ;2efgcS0AAAAJ;https://scholar.google.fr/citations?user=2StUgf4AAAAJ", "orcid": ";0000-0002-3111-3812;;;0000-0002-2914-8557;0000-0001-9919-069X;;0000-0002-0107-5704;0000-0002-4176-343X;", "linkedin": ";rare%C8%99-ambru%C8%99-b04812125/;vitorguizilini/;;wadim-kehl-b11870b0/;;;vincentsitzmann/;jiajunwu/;adrien-gaidon-63ab2358/", "or_profile": "~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Vitor_Campagnolo_Guizilini2;~Dennis_Park1;~Wadim_Kehl2;~Fredo_Durand1;~Joshua_B._Tenenbaum1;~Vincent_Sitzmann1;~Jiajun_Wu1;~Adrien_Gaidon1", "aff": "Toyota Research Institute;Toyota Research Institute;Toyota Research Institute;Toyota Research Institute;Woven Planet Holdings;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Preferred Networks, Inc.;Stanford University;Toyota Research Institute (TRI)", "aff_domain": "tri.global;tri.global;tri.global;tri.global;woven-planet.global;mit.edu;mit.edu;preferred.jp;stanford.edu;tri.global", "position": "Researcher;Researcher;Staff Research Scientist;Sr. Research Scientist;Researcher;Full Professor;Professor;Academic Advisor;Assistant Professor;Head of ML", "bibtex": "@inproceedings{\nzakharov2021singleshot,\ntitle={Single-Shot Scene Reconstruction},\nauthor={Sergey Zakharov and Rares Andrei Ambrus and Vitor Campagnolo Guizilini and Dennis Park and Wadim Kehl and Fredo Durand and Joshua B. Tenenbaum and Vincent Sitzmann and Jiajun Wu and Adrien Gaidon},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=CGn3XKSf7vf}\n}", "github": "", "project": "", "reviewers": "jaEn;JgCa;pv1C;ky9j", "site": "https://openreview.net/forum?id=CGn3XKSf7vf", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 10, "corr_rating_confidence": 0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13972920292556995686&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;1;2;2;3;4;0", "aff_unique_norm": "Toyota Research Institute;Woven Planet Holdings;Massachusetts Institute of Technology;Preferred Networks, Inc.;Stanford University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tri.global;https://www.wovenplanet.honda.com;https://web.mit.edu;https://www.preferred-networks.com;https://www.stanford.edu", "aff_unique_abbr": "TRI;WPH;MIT;PFN;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;1;0;0;1;0;0", "aff_country_unique": "United States;Japan" }, { "id": "CPbn4N3a2zC", "title": "Structured deep generative models for sampling on constraint manifolds in sequential manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Sampling efficiently on constraint manifolds is a core problem in robotics. We propose Deep Generative Constraint Sampling (DGCS), which combines a deep generative model for sampling close to a constraint manifold with nonlinear constrained optimization to project to the constraint manifold. The generative model is conditioned on the problem instance, taking a scene image as input, and it is trained with a dataset of solutions and a novel analytic constraint term. To further improve the precision and diversity of samples, we extend the approach to exploit a factorization of the constrained problem. We evaluate our approach in two problems of robotic sequential manipulation in cluttered environments. Experimental results demonstrate that our deep generative model produces diverse and precise samples and outperforms heuristic warmstart initialization. ", "keywords": "Generative Models;Nonlinear Optimization;Constraint Graph;Robotic Sequential Manipulation", "primary_area": "", "supplementary_material": "/attachment/9611cde8863b9ddf65bdf0093d7d3539480ab360.zip", "author": "Joaquim Ortiz-Haro;Jung-Su Ha;Danny Driess;Marc Toussaint", "authorids": "~Joaquim_Ortiz-Haro1;~Jung-Su_Ha1;~Danny_Driess1;~Marc_Toussaint3", "gender": "M;;M;", "homepage": "https://sites.google.com/view/jung-su-ha;https://dannydriess.github.io/;https://www.user.tu-berlin.de/mtoussai/;https://quimortiz.github.io/", "dblp": ";;t/MarcToussaint;", "google_scholar": "cabvCW8AAAAJ;https://scholar.google.de/citations?user=wxnzyjwAAAAJ;t2X4Mg8AAAAJ;", "orcid": ";;0000-0002-5487-6767;", "linkedin": ";;marctoussaint/;", "or_profile": "~Jung-Su_Ha1;~Danny_Driess1;~Marc_Toussaint3;~Joaquim_Ortiz_de_Haro1", "aff": "TU Berlin;Universit\u00e4t Stuttgart;TU Berlin;Tu Berlin", "aff_domain": "tu-berlin.de;uni-stuttgart.de;tu-berlin.de;tu-berlin.de", "position": "Postdoc;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nortiz-haro2021structured,\ntitle={Structured deep generative models for sampling on constraint manifolds in sequential manipulation},\nauthor={Joaquim Ortiz-Haro and Jung-Su Ha and Danny Driess and Marc Toussaint},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=CPbn4N3a2zC}\n}", "github": "", "project": "", "reviewers": "8mJB;yooP;2JcL", "site": "https://openreview.net/forum?id=CPbn4N3a2zC", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13375685682004063316&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Technische Universit\u00e4t Berlin;University of Stuttgart;Technical University of Berlin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tu-berlin.de;https://www.uni-stuttgart.de;https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin;Uni Stuttgart;TU Berlin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berlin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "DCfV0wDvtoV", "title": "Generalised Task Planning with First-Order Function Approximation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Real world robotics often operates in uncertain and dynamic environments where generalisation over different scenarios is of practical interest. In the absence of a model, value-based reinforcement learning can be used to learn a goal-directed policy. Typically, the interaction between robots and the objects in the environment exhibit a first-order structure. We introduce first-order, or relational, features to represent an approximation of the Q-function so that it can induce a generalised policy. Empirical results for a service robot domain show that our online relational reinforcement learning method is scalable to large scale problems and enables transfer learning between different problems and simulation environments with dissimilar transition dynamics.", "keywords": "task planning;relational reinforcement learning;transfer learning", "primary_area": "", "supplementary_material": "/attachment/318aa5fa529086a057d3f1887e6516d46d8108b4.zip", "author": "Jun Hao Alvin Ng;Ron Petrick", "authorids": "~Jun_Hao_Alvin_Ng1;~Ron_Petrick1", "gender": "M;", "homepage": ";http://petrick.uk/", "dblp": ";53/6768", "google_scholar": ";https://scholar.google.co.uk/citations?user=xbfb8YgAAAAJ", "orcid": ";0000-0002-3386-9568", "linkedin": ";", "or_profile": "~Jun_Hao_Alvin_Ng1;~Ron_Petrick1", "aff": "Heriot-Watt University;Heriot-Watt University", "aff_domain": "hw.ac.uk;hw.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nng2021generalised,\ntitle={Generalised Task Planning with First-Order Function Approximation},\nauthor={Jun Hao Alvin Ng and Ron Petrick},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=DCfV0wDvtoV}\n}", "github": "", "project": "", "reviewers": "rhBR;jVmG;uKVc;dCnP", "site": "https://openreview.net/forum?id=DCfV0wDvtoV", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17960398433063056141&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Heriot-Watt University", "aff_unique_dep": "", "aff_unique_url": "https://www.hw.ac.uk", "aff_unique_abbr": "HWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "DgCWxJyERoQ", "title": "The Boombox: Visual Reconstruction from Acoustic Vibrations", "track": "main", "status": "Poster", "tldr": "", "abstract": "Interacting with bins and containers is a fundamental task in robotics, making state estimation of the objects inside the bin critical. \nWhile robots often use cameras for state estimation, the visual modality is not always ideal due to occlusions and poor illumination. We introduce The Boombox, a container that uses sound to estimate the state of the contents inside a box. Based on the observation that the collision between objects and its containers will cause an acoustic vibration, we present a convolutional network for learning to reconstruct visual scenes. Although we use low-cost and low-power contact microphones to detect the vibrations, our results show that learning from multimodal data enables state estimation from affordable audio sensors. Due to the many ways that robots use containers, we believe the box will have a number of applications in robotics.", "keywords": "Multimodal Perception;Object State Estimation;Audio", "primary_area": "", "supplementary_material": "/attachment/e2839c37e1875086f968d92c0352dfe24d67c984.zip", "author": "Boyuan Chen;Mia Chiquier;Hod Lipson;Carl Vondrick", "authorids": "~Boyuan_Chen1;~Mia_Chiquier1;~Hod_Lipson1;~Carl_Vondrick2", "gender": "Not Specified;F;M;M", "homepage": "http://boyuanchen.com/;http://www.cs.columbia.edu/~mia.chiquier/;https://www.hodlipson.com/;http://www.cs.columbia.edu/~vondrick/", "dblp": "193/7174-1;;l/HodLipson;26/8610", "google_scholar": "5DBpY6EAAAAJ;;https://scholar.google.com/citations?hl=en;3MzhkFIAAAAJ", "orcid": ";;0000-0003-0769-4618;", "linkedin": "boyuan-chen-b30854a0/;;hod-lipson-4018189/;", "or_profile": "~Boyuan_Chen1;~Mia_Chiquier1;~Hod_Lipson1;~Carl_Vondrick2", "aff": "Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": "cs.columbia.edu;columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2021the,\ntitle={The Boombox: Visual Reconstruction from Acoustic Vibrations},\nauthor={Boyuan Chen and Mia Chiquier and Hod Lipson and Carl Vondrick},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=DgCWxJyERoQ}\n}", "github": "", "project": "", "reviewers": "5sJt;XY9t;Ezuu;aA4p", "site": "https://openreview.net/forum?id=DgCWxJyERoQ", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4565175384892861048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "EdmeHU4WVjJ", "title": "Advancing Self-supervised Monocular Depth Learning with Sparse LiDAR", "track": "main", "status": "Poster", "tldr": "", "abstract": "Self-supervised monocular depth prediction provides a cost-effective solution to obtain the 3D location of each pixel. However, the existing approaches usually lead to unsatisfactory accuracy, which is critical for autonomous robots. In this paper, we propose FusionDepth, a novel two-stage network to advance the self-supervised monocular dense depth learning by leveraging low-cost sparse (e.g. 4-beam) LiDAR. Unlike the existing methods that use sparse LiDAR mainly in a manner of time-consuming iterative post-processing, our model fuses monocular image features and sparse LiDAR features to predict initial depth maps. Then, an efficient feed-forward refine network is further designed to correct the errors in these initial depth maps in pseudo-3D space with real-time performance. Extensive experiments show that our proposed model significantly outperforms all the state-of-the-art self-supervised methods, as well as the sparse-LiDAR-based methods on both self-supervised monocular depth prediction and completion tasks. With the accurate dense depth prediction, our model outperforms the state-of-the-art sparse-LiDAR-based method (Pseudo-LiDAR++) by more than 68% for the downstream task monocular 3D object detection on the KITTI Leaderboard.", "keywords": "Self-supervised;Monocular;Depth Prediction;Sparse LiDAR", "primary_area": "", "supplementary_material": "/attachment/7702272009d9d840e2b41fa8622f65a6af37daf0.zip", "author": "Ziyue Feng;Longlong Jing;Peng Yin;Yingli Tian;Bing Li", "authorids": "~Ziyue_Feng1;~Longlong_Jing1;~Peng_Yin3;~Yingli_Tian1;~Bing_Li4", "gender": "M;M;F;M;M", "homepage": "https://ziyue.cool/;https://longlong-jing.github.io/;https://www.ccny.cuny.edu/profiles/yingli-tian;http://cecas.clemson.edu/bingli;https://maxtomcmu.github.io/", "dblp": "228/4607.html;214/9050;54/8250;13/2692-8;50/5864", "google_scholar": "8Zb1V70AAAAJ;lhdhi5wAAAAJ;https://scholar.google.com.tw/citations?user=aAWeB4wAAAAJ;yysOczkAAAAJ;_q-aMvgAAAAJ", "orcid": "0000-0002-0037-3697;;;0000-0003-4987-6129;", "linkedin": "ziyue-feng/;;yingli-tian-43a86413/;;maxtom", "or_profile": "~Ziyue_Feng1;~Longlong_Jing1;~Yingli_Tian1;~Bing_Li4;~Peng-Yeng_Yin1", "aff": "Clemson University;The City University of New York;CUNY Graduate Center;Clemson University;Carnegie Mellon University", "aff_domain": "clemson.edu;cuny.edu;ccny.cuny.edu;clemson.edu;cmu.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nfeng2021advancing,\ntitle={Advancing Self-supervised Monocular Depth Learning with Sparse Li{DAR}},\nauthor={Ziyue Feng and Longlong Jing and Peng Yin and Yingli Tian and Bing Li},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=EdmeHU4WVjJ}\n}", "github": "", "project": "", "reviewers": "SZdB;y8K7;HJoT;vPeH", "site": "https://openreview.net/forum?id=EdmeHU4WVjJ", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12543744596610220203&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Clemson University;City University of New York;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.clemson.edu;https://www.cuny.edu;https://www.cmu.edu", "aff_unique_abbr": "Clemson;CUNY;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Graduate Center", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ei3MOY2rDHB", "title": "My House, My Rules: Learning Tidying Preferences with Graph Neural Networks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robots that arrange household objects should do so according to the user's preferences, which are inherently subjective and difficult to model. We present NeatNet: a novel Variational Autoencoder architecture using Graph Neural Network layers, which can extract a low-dimensional latent preference vector from a user by observing how they arrange scenes. Given any set of objects, this vector can then be used to generate an arrangement which is tailored to that user's spatial preferences, with word embeddings used for generalisation to new objects. We develop a tidying simulator to gather rearrangement examples from 75 users, and demonstrate empirically that our method consistently produces neat and personalised arrangements across a variety of rearrangement scenarios.", "keywords": "graph neural networks;preference learning;rearrangement tasks", "primary_area": "", "supplementary_material": "/attachment/240559c92e0b06520512d7914431cdcb82d9c33d.zip", "author": "Ivan Kapelyukh;Edward Johns", "authorids": "~Ivan_Kapelyukh1;~Edward_Johns1", "gender": "M;M", "homepage": "https://ivankapelyukh.com/;https://www.robot-learning.uk", "dblp": "305/7084;68/9968", "google_scholar": "DkNQTkoAAAAJ;https://scholar.google.co.uk/citations?user=sMIUkiQAAAAJ", "orcid": ";0000-0002-8914-8786", "linkedin": "ivan-kapelyukh/;https://uk.linkedin.com/in/edward-johns-1b24845a", "or_profile": "~Ivan_Kapelyukh1;~Edward_Johns1", "aff": "Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;imperial.ac.uk", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nkapelyukh2021my,\ntitle={My House, My Rules: Learning Tidying Preferences with Graph Neural Networks},\nauthor={Ivan Kapelyukh and Edward Johns},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=Ei3MOY2rDHB}\n}", "github": "", "project": "", "reviewers": "8rCv;vXaq;Ftgd", "site": "https://openreview.net/forum?id=Ei3MOY2rDHB", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17462096414903751486&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "EougVeukEH9", "title": "O2O-Afford: Annotation-Free Large-Scale Object-Object Affordance Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Contrary to the vast literature in modeling, perceiving, and understanding agent-object (e.g., human-object, hand-object, robot-object) interaction in computer vision and robotics, very few past works have studied the task of object-object interaction, which also plays an important role in robotic manipulation and planning tasks. There is a rich space of object-object interaction scenarios in our daily life, such as placing an object on a messy tabletop, fitting an object inside a drawer, pushing an object using a tool, etc. In this paper, we propose a unified affordance learning framework to learn object-object interaction for various tasks. By constructing four object-object interaction task environments using physical simulation (SAPIEN) and thousands of ShapeNet models with rich geometric diversity, we are able to conduct large-scale object-object affordance learning without the need for human annotations or demonstrations. At the core of technical contribution, we propose an object-kernel point convolution network to reason about detailed interaction between two objects. Experiments on large-scale synthetic data and real-world data prove the effectiveness of the proposed approach.", "keywords": "Object-object Affordance;Vision for Robotics;Large-scale Learning", "primary_area": "", "supplementary_material": "/attachment/db1f4a5dc32d8ecaa0ad2dab5d160f050794efef.zip", "author": "Kaichun Mo;Yuzhe Qin;Fanbo Xiang;Hao Su;Leonidas Guibas", "authorids": "~Kaichun_Mo1;~Yuzhe_Qin1;~Fanbo_Xiang1;~Hao_Su1;~Leonidas_Guibas1", "gender": "M;M;;M;M", "homepage": "https://cs.stanford.edu/~kaichun/;https://yzqin.github.io/;https://www.fbxiang.com;http://ai.ucsd.edu/~haosu;http://geometry.stanford.edu/", "dblp": "172/1283;241/9337;261/3562;09/4945-1;g/LeonidasJGuibas", "google_scholar": "pL7JsOsAAAAJ;3KF3AIMAAAAJ;oLIh5qYAAAAJ;1P8Zu04AAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ", "orcid": ";0000-0002-9321-9305;;;", "linkedin": ";;;;", "or_profile": "~Kaichun_Mo1;~Yuzhe_Qin1;~Fanbo_Xiang1;~Hao_Su1;~Leonidas_Guibas1", "aff": "Stanford University;University of California, San Diego;University of California, San Diego;University of California, San Diego;Stanford University", "aff_domain": "stanford.edu;ucsd.edu;ucsd.edu;ucsd.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmo2021ooafford,\ntitle={O2O-Afford: Annotation-Free Large-Scale Object-Object Affordance Learning},\nauthor={Kaichun Mo and Yuzhe Qin and Fanbo Xiang and Hao Su and Leonidas Guibas},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=EougVeukEH9}\n}", "github": "", "project": "", "reviewers": "4Nfi;dU8R;esX6", "site": "https://openreview.net/forum?id=EougVeukEH9", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1092985086825726339&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Stanford University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.ucsd.edu", "aff_unique_abbr": "Stanford;UCSD", "aff_campus_unique_index": "0;1;1;1;0", "aff_campus_unique": "Stanford;San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Esw0Wh6Stt5", "title": "Fully Autonomous Real-World Reinforcement Learning with Applications to Mobile Manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this paper, we study how robots can autonomously learn skills that require a combination of navigation and grasping. Learning robotic skills in the real world remains challenging without large scale data collection and supervision. Our aim is to devise a robotic reinforcement learning system for learning navigation and manipulation together, in an autonomous way without human intervention, enabling continual learning under realistic assumptions. Specifically, our system, ReLMM, can learn continuously on a real-world platform without any environment instrumentation, without human intervention, and without access to privileged information, such as maps, objects positions, or a global view of the environment. Our method employs a modularized policy with components for manipulation and navigation, where uncertainty over the manipulation success drives exploration for the navigation controller, and the manipulation module provides rewards for navigation. We evaluate our method on a room cleanup task, where the robot must navigate to and pick up items of scattered on the floor. After a grasp curriculum training phase, ReLMM can learn navigation and grasping together fully automatically, in around 40 hours of real-world training.", "keywords": "mobile manipulation;reinforcement learning;Reset-Free", "primary_area": "", "supplementary_material": "/attachment/f5af3e4f265c9186f0deb11f3d1a6211b1de7ab4.zip", "author": "Charles Sun;J\u0119drzej Orbik;Coline Manon Devin;Brian H Yang;Abhishek Gupta;Glen Berseth;Sergey Levine", "authorids": "~Charles_Sun1;~J\u0119drzej_Orbik1;~Coline_Manon_Devin1;brianhyang@berkeley.edu;~Abhishek_Gupta1;~Glen_Berseth1;~Sergey_Levine1", "gender": "M;M;;;M;M;M", "homepage": "https://charlesjsun.github.io/;;;;https://homes.cs.washington.edu/~abhgupta/;http://fracturedplane.com/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";;;;18/6404-4;147/5478;80/7594", "google_scholar": ";;;;1wLVDP4AAAAJ;https://scholar.google.ca/citations?user=-WZcuuwAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;;0000-0001-7351-8028;", "linkedin": ";jedrzejorbik/;;;;glen-berseth-0523278b?trk=hp-identity-name;", "or_profile": "~Charles_Sun1;~J\u0119drzej_Orbik1;~Coline_Manon_Devin1;brianhyang@berkeley.edu;~Abhishek_Gupta1;~Glen_Berseth1;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;;;University of California, Berkeley;University of California, Berkeley;Google", "aff_domain": "berkeley.edu;berkeley.edu;;;berkeley.edu;berkeley.edu;google.com", "position": "Undergrad student;Research Engineer;;;PhD student;Postdoc;Research Scientist", "bibtex": "@inproceedings{\nsun2021fully,\ntitle={Fully Autonomous Real-World Reinforcement Learning with Applications to Mobile Manipulation},\nauthor={Charles Sun and J{\\k{e}}drzej Orbik and Coline Manon Devin and Brian H Yang and Abhishek Gupta and Glen Berseth and Sergey Levine},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=Esw0Wh6Stt5}\n}", "github": "", "project": "", "reviewers": "W8H3;aRW7;rFXj;m31K", "site": "https://openreview.net/forum?id=Esw0Wh6Stt5", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 29, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4808135172864911306&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "FCoh4OLZ1Gg", "title": "Embodied Semantic Scene Graph Generation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Semantic scene graph provides an effective way for intelligent agents to better understand the environment and it has been extensively used in many robotic applications. Existing work mainly focuses on generating the scene graph from the sensory information collected from a pre-defined path, while the environment should be exhaustively explored with a carefully designed path in order to obtain a comprehensive semantic scene graph efficiently. In this paper, we propose a new task of Embodied Semantic Scene Graph Generation, which exploits the embodiment of the intelligent agent to autonomously generate an appropriate path to explore the environment for scene graph generation. To this end, a learning framework with the paradigms of imitation learning and reinforcement learning is proposed to help the agent generate proper actions to explore the environment and the scene graph is incrementally constructed. The proposed method is evaluated on the AI2Thor environment using both the quantitative and qualitative performance indexes. Additionally, we implement the proposed method on a streaming video captioning task and promising experimental results are achieved.", "keywords": "Semantic Scene Graph;Embodied Exploration;Learning for Visual Navigation", "primary_area": "", "supplementary_material": "/attachment/86fc37ace9db38d774ed4a65ea97ff57d93eebd4.zip", "author": "Xinghang Li;Di Guo;Huaping Liu;Fuchun Sun", "authorids": "~Xinghang_Li1;~Di_Guo1;~Huaping_Liu3;~Fuchun_Sun4", "gender": "M;F;M;M", "homepage": ";;https://sites.google.com/site/thuliuhuaping/;https://sites.google.com/site/thuliuhuaping/", "dblp": "304/4145;;69/1097-1;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;OCauNHUAAAAJ;https://scholar.google.com.hk/citations?user=HXnkIkwAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xinghang_Li1;~Di_Guo1;~Huaping_Liu3;~Fuchun_Sun4", "aff": "Tsinghua University;;Tsinghua University;", "aff_domain": "mail.tsinghua.edu.cn;;tsinghua.edu.cn;", "position": "PhD student;;Full Professor;", "bibtex": "@inproceedings{\nli2021embodied,\ntitle={Embodied Semantic Scene Graph Generation},\nauthor={Xinghang Li and Di Guo and Huaping Liu and Fuchun Sun},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=FCoh4OLZ1Gg}\n}", "github": "", "project": "", "reviewers": "vave;bACY;nnax;iKq5", "site": "https://openreview.net/forum?id=FCoh4OLZ1Gg", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4544625370944993455&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "FS30JeiGG3h", "title": "Learning Models as Functionals of Signed-Distance Fields for Manipulation Planning", "track": "main", "status": "Poster", "tldr": "", "abstract": "This work proposes an optimization-based manipulation planning framework where the objectives are learned functionals of signed-distance fields that represent objects in the scene. Most manipulation planning approaches rely on analytical models and carefully chosen abstractions/state-spaces to be effective. A central question is how models can be obtained from data that are not primarily accurate in their predictions, but, more importantly, enable efficient reasoning within a planning framework, while at the same time being closely coupled to perception spaces. We show that representing objects as signed-distance fields not only enables to learn and represent a variety of models with higher accuracy compared to point-cloud and occupancy measure representations, but also that SDF-based models are suitable for optimization-based planning. To demonstrate the versatility of our approach, we learn both kinematic and dynamic models to solve tasks that involve hanging mugs on hooks and pushing objects on a table. We can unify these quite different tasks within one framework, since SDFs are the common object representation. \nVideo: https://youtu.be/ga8Wlkss7co", "keywords": "Manipulation Planning;Signed Distance Fields;Model Learning;Implicit Models;Dynamic Model Learning;Implicit Object Representations", "primary_area": "", "supplementary_material": "/attachment/5c087bc80eb75120a82209b190f5969440aeccc0.zip", "author": "Danny Driess;Jung-Su Ha;Marc Toussaint;Russ Tedrake", "authorids": "~Danny_Driess1;~Jung-Su_Ha1;~Marc_Toussaint3;~Russ_Tedrake1", "gender": ";M;M;M", "homepage": "https://dannydriess.github.io/;https://sites.google.com/view/jung-su-ha;https://www.user.tu-berlin.de/mtoussai/;http://people.csail.mit.edu/russt", "dblp": ";;t/MarcToussaint;73/1296", "google_scholar": "https://scholar.google.de/citations?user=wxnzyjwAAAAJ;cabvCW8AAAAJ;t2X4Mg8AAAAJ;nxNkEiYAAAAJ", "orcid": ";;0000-0002-5487-6767;", "linkedin": ";;marctoussaint/;", "or_profile": "~Danny_Driess1;~Jung-Su_Ha1;~Marc_Toussaint3;~Russ_Tedrake1", "aff": "Universit\u00e4t Stuttgart;TU Berlin;TU Berlin;Massachusetts Institute of Technology", "aff_domain": "uni-stuttgart.de;tu-berlin.de;tu-berlin.de;mit.edu", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndriess2021learning,\ntitle={Learning Models as Functionals of Signed-Distance Fields for Manipulation Planning},\nauthor={Danny Driess and Jung-Su Ha and Marc Toussaint and Russ Tedrake},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=FS30JeiGG3h}\n}", "github": "", "project": "", "reviewers": "NU21;SZWq;5kbt", "site": "https://openreview.net/forum?id=FS30JeiGG3h", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10485599482780000298&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Stuttgart;Technische Universit\u00e4t Berlin;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-stuttgart.de;https://www.tu-berlin.de;https://web.mit.edu", "aff_unique_abbr": "Uni Stuttgart;TU Berlin;MIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Germany;United States" }, { "id": "FzMHiDesj0I", "title": "TANDEM: Tracking and Dense Mapping in Real-time using Deep Multi-view Stereo", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this paper, we present TANDEM a real-time monocular tracking and dense mapping framework. For pose estimation, TANDEM performs photometric bundle adjustment based on a sliding window of keyframes. To increase the robustness, we propose a novel tracking front-end that performs dense direct image alignment using depth maps rendered from a global model that is built incrementally from dense depth predictions. To predict the dense depth maps, we propose Cascade View-Aggregation MVSNet (CVA-MVSNet) that utilizes the entire active keyframe window by hierarchically constructing 3D cost volumes with adaptive view aggregation to balance the different stereo baselines between the keyframes. Finally, the predicted depth maps are fused into a consistent global map represented as a truncated signed distance function (TSDF) voxel grid. Our experimental results show that TANDEM outperforms other state-of-the-art traditional and learning-based monocular visual odometry (VO) methods in terms of camera tracking. Moreover, TANDEM shows state-of-the-art real-time 3D reconstruction performance. Webpage: https://go.vision.in.tum.de/tandem", "keywords": "SLAM;Dense Mapping;Multi-view Stereo;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/6703e26d8578e905c6b6bde5c1f499a6db84b8d8.zip", "author": "Lukas Koestler;Nan Yang;Niclas Zeller;Daniel Cremers", "authorids": "~Lukas_Koestler1;~Nan_Yang3;~Niclas_Zeller1;~Daniel_Cremers1", "gender": "M;M;M;M", "homepage": "https://lukaskoestler.com;http://www.nan-yang.me;https://www.niclas-zeller.de/;https://vision.in.tum.de/members/cremers", "dblp": "276/1000;51/1629-7;172/2143;c/DanielCremers", "google_scholar": "vepdJTQAAAAJ;https://scholar.google.de/citations?user=pUj2ffwAAAAJ;https://scholar.google.de/citations?user=-tegRu0AAAAJ;cXQciMEAAAAJ", "orcid": ";0000-0002-1497-9630;0000-0001-7865-1944;", "linkedin": ";nan-yang-089aa8aa/;niclas-zeller/;", "or_profile": "~Lukas_Koestler1;~Nan_Yang3;~Niclas_Zeller1;~Daniel_Cremers1", "aff": "Technical University Munich;Artisense;Artisense;Technical University Munich", "aff_domain": "tum.de;artisense.ai;artisense.ai;tum.de", "position": "PhD student;PhD student;Senior CV & AI Engineer;Full Professor", "bibtex": "@inproceedings{\nkoestler2021tandem,\ntitle={{TANDEM}: Tracking and Dense Mapping in Real-time using Deep Multi-view Stereo},\nauthor={Lukas Koestler and Nan Yang and Niclas Zeller and Daniel Cremers},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=FzMHiDesj0I}\n}", "github": "", "project": "", "reviewers": "xCmq;mvF2;XeDS;W394", "site": "https://openreview.net/forum?id=FzMHiDesj0I", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12036126780542329221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Technical University of Munich;Artisense", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;", "aff_unique_abbr": "TUM;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany;" }, { "id": "GIgsuWifgIi", "title": "You Only Evaluate Once: a Simple Baseline Algorithm for Offline RL", "track": "main", "status": "Poster", "tldr": "", "abstract": "The goal of offline reinforcement learning (RL) is to find an optimal policy given prerecorded trajectories. Many current approaches customize existing off-policy RL algorithms, especially actor-critic algorithms in which policy evaluation and improvement are iterated. However, the convergence of such approaches is not guaranteed due to the use of complex non-linear function approximation and an intertwined optimization process.\nBy contrast, we propose a simple baseline algorithm for offline RL that only performs the policy evaluation step once so that the algorithm does not require complex stabilization schemes. Since the proposed algorithm is not likely to converge to an optimal policy, it is an appropriate baseline for actor-critic algorithms that ought to be outperformed if there is indeed value in iterative optimization in the offline setting. Surprisingly, we empirically find that the proposed algorithm exhibits competitive and sometimes even state-of-the-art performance in a subset of the D4RL offline RL benchmark. This result suggests that future work is needed to fully exploit the potential advantages of iterative optimization in order to justify the reduced stability of such methods.", "keywords": "offline reinforcement learning;conservative policy evaluation", "primary_area": "", "supplementary_material": "/attachment/8847e1cf4ea7487bad0f7ede86671e2a85eea704.zip", "author": "Wonjoon Goo;Scott Niekum", "authorids": "~Wonjoon_Goo1;~Scott_Niekum1", "gender": ";M", "homepage": "http://dev.wonjoon.me;https://people.cs.umass.edu/~sniekum/index.php", "dblp": "185/7860;62/8399", "google_scholar": "AmBlcsMAAAAJ;4wXYfSUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Wonjoon_Goo1;~Scott_Niekum1", "aff": "University of Texas, Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ngoo2021you,\ntitle={You Only Evaluate Once: a Simple Baseline Algorithm for Offline {RL}},\nauthor={Wonjoon Goo and Scott Niekum},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=GIgsuWifgIi}\n}", "github": "", "project": "", "reviewers": "LDRr;q44u;HJhz", "site": "https://openreview.net/forum?id=GIgsuWifgIi", "pdf_size": 0, "rating": "4;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7275300588801483633&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "GhMZNcr54zt", "title": "Learning to Predict Vehicle Trajectories with Model-based Planning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Predicting the future trajectories of on-road vehicles is critical for autonomous driving. In this paper, we introduce a novel prediction framework called PRIME, which stands for Prediction with Model-based Planning. Unlike recent prediction works that utilize neural networks to model scene context and produce unconstrained trajectories, PRIME is designed to generate accurate and feasibility-guaranteed future trajectory predictions. PRIME guarantees the trajectory feasibility by exploiting a model-based generator to produce future trajectories under explicit constraints and enables accurate multimodal prediction by utilizing a learning-based evaluator to select future trajectories. We conduct experiments on the large-scale Argoverse Motion Forecasting Benchmark, where PRIME outperforms the state-of-the-art methods in prediction accuracy, feasibility, and robustness under imperfect tracking.", "keywords": "Trajectory Prediction;Autonomous Driving", "primary_area": "", "supplementary_material": "/attachment/d9211df03e9fdef6d63b7e4f785f3d5a6e5c1b3a.zip", "author": "Haoran Song;Di Luan;Wenchao Ding;Michael Y Wang;Qifeng Chen", "authorids": "~Haoran_Song1;~Di_Luan3;~Wenchao_Ding1;~Michael_Y_Wang1;~Qifeng_Chen1", "gender": "M;M;M;M;M", "homepage": "https://haoran-song.github.io;https://wenchaoding.github.io/;http://cqf.io/;;", "dblp": "215/6161;157/4438-1;117/4819;;", "google_scholar": "https://scholar.google.com.hk/citations?user=PNB7U7xPI_4C;https://scholar.google.com.hk/citations?user=44f1ubYAAAAJ;lLMX9hcAAAAJ;;https://scholar.google.com.hk/citations?user=Oo7c22wAAAAJ", "orcid": "0000-0003-3157-0885;0000-0003-4249-526X;;0000-0002-4178-8549;0000-0002-6524-5741", "linkedin": ";;;;", "or_profile": "~Haoran_Song1;~Wenchao_Ding1;~Qifeng_Chen1;~LUAN_DI1;~MICHAEL_YU_WANG1", "aff": "Hong Kong University of Science and Technology;Huawei Technologies Ltd.;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;huawei.com;hkust.edu;hkust.edu;ust.hk", "position": "PhD student;Researcher;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nsong2021learning,\ntitle={Learning to Predict Vehicle Trajectories with Model-based Planning},\nauthor={Haoran Song and Di Luan and Wenchao Ding and Michael Y Wang and Qifeng Chen},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=GhMZNcr54zt}\n}", "github": "", "project": "", "reviewers": "PMNK;SJ8P;fWJ2;Q68c", "site": "https://openreview.net/forum?id=GhMZNcr54zt", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 160, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6897041037245345756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com", "aff_unique_abbr": "HKUST;Huawei", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "H1-uwiTbY9z", "title": "Distributional Depth-Based Estimation of Object Articulation Models", "track": "main", "status": "Poster", "tldr": "", "abstract": "We propose a method that efficiently learns distributions over articulation models directly from depth images without the need to know articulation model categories a priori. By contrast, existing methods that learn articulation models from raw observations require objects to be textured, and most only predict point estimates of the model parameters. Our core contributions include a novel representation for distributions over rigid body transformations and articulation model parameters based on Screw theory, von Mises-Fisher distributions and Stiefel manifolds. Combining these concepts allows for an efficient, mathematically sound representation that inherently satisfies several constraints that rigid body transformations and articulations must adhere to. In addition, we introduce a novel deep-learning based approach, DUST-net, that efficiently learns such distributions and, hence, performs category-independent articulation model estimation while also providing model uncertainties. We evaluate our approach on two benchmarking datasets and three real-world objects and compare its performance with two current state-of-the-art methods. Our results demonstrate that DUST-net can successfully learn distributions over articulation models and their parameters for novel objects across articulation model categories with better accuracy than state-of-the-art methods.", "keywords": "Articulated Objects;Model Learning;Uncertainty Estimation;Kinematics;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/bf78d3736068f623f205808ab01d8fcf52bb4177.zip", "author": "Ajinkya Jain;Stephen Giguere;Rudolf Lioutikov;Scott Niekum", "authorids": "~Ajinkya_Jain1;~Stephen_Giguere1;~Rudolf_Lioutikov1;~Scott_Niekum1", "gender": "M;M;M;M", "homepage": "https://jainajinkya.github.io;;https://rudolf.intuitive-robots.net;https://people.cs.umass.edu/~sniekum/index.php", "dblp": "166/0292;14/8174;151/9451;62/8399", "google_scholar": "CNUnD7kAAAAJ;;hvjV43MAAAAJ;4wXYfSUAAAAJ", "orcid": ";;;", "linkedin": ";;rudolf-lioutikov-74830730a/;", "or_profile": "~Ajinkya_Jain1;~Stephen_Giguere1;~Rudolf_Lioutikov1;~Scott_Niekum1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Postdoc;Assistant Professor of Practice;Associate Professor", "bibtex": "@inproceedings{\njain2021distributional,\ntitle={Distributional Depth-Based Estimation of Object Articulation Models},\nauthor={Ajinkya Jain and Stephen Giguere and Rudolf Lioutikov and Scott Niekum},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=H1-uwiTbY9z}\n}", "github": "", "project": "", "reviewers": "Libh;ahNp;41iG;tV4q", "site": "https://openreview.net/forum?id=H1-uwiTbY9z", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7978508576758244394&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "HTfApPeT4DZ", "title": "Generating Scenarios with Diverse Pedestrian Behaviors for Autonomous Vehicle Testing", "track": "main", "status": "Poster", "tldr": "", "abstract": "There exist several datasets for developing self-driving car methodologies. Manually collected datasets impose inherent limitations on the variability of test cases and it is particularly difficult to acquire challenging scenarios, e.g. ones involving collisions with pedestrians. A way to alleviate this is to consider automatic generation of safety-critical scenarios for autonomous vehicle (AV) testing. Existing approaches for scenario generation use heuristic pedestrian behavior models. We instead propose a framework that can use state-of-the-art pedestrian motion models, which is achieved by reformulating the problem as learning where to place pedestrians such that the induced scenarios are collision prone for a given AV. Our pedestrian initial location model can be used in conjunction with any goal driven pedestrian model which makes it possible to challenge an AV with a wide range of pedestrian behaviors -- this ensures that the AV can avoid collisions with any pedestrian it encounters. We show that it is possible to learn a collision seeking scenario generation model when both the pedestrian and AV are collision avoiding. The initial location model is conditioned on scene semantics and occlusions to ensure semantic and visual plausibility, which increases the realism of generated scenarios. Our model can be used to test any AV model given sufficient constraints.", "keywords": "Autonomous Vehicles;AV Testing;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/7cc8a0572d0161883c9c81a46f362cc67a222df9.zip", "author": "Maria Priisalu;Aleksis Pirinen;Ciprian Paduraru;Cristian Sminchisescu", "authorids": "~Maria_Priisalu1;~Aleksis_Pirinen1;~Ciprian_Paduraru1;~Cristian_Sminchisescu1", "gender": "F;M;M;", "homepage": "https://www.lunduniversity.lu.se/lucat/user/84b2627e3f8740fabfda8485c48cd12e;https://aleksispi.github.io/;;http://www.maths.lth.se/sminchisescu/", "dblp": "http://dblp.org/pers/hd/p/Priisalu:Maria;;;96/3826", "google_scholar": "https://scholar.google.se/citations?user=9JrrOMIAAAAJ;paBGTgsAAAAJ;EaAekU4AAAAJ;https://scholar.google.se/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Maria_Priisalu1;~Aleksis_Pirinen1;~Ciprian_Paduraru1;~Cristian_Sminchisescu1", "aff": "Lund University / Lund Institute of Technology;Lund University / Lund Institute of Technology;University Of Bucharest;Lund University", "aff_domain": "lth.se;lth.se;fmi.unibuc.ro;lth.se", "position": "PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\npriisalu2021generating,\ntitle={Generating Scenarios with Diverse Pedestrian Behaviors for Autonomous Vehicle Testing},\nauthor={Maria Priisalu and Aleksis Pirinen and Ciprian Paduraru and Cristian Sminchisescu},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=HTfApPeT4DZ}\n}", "github": "", "project": "", "reviewers": "s5dQ;UAuu;7Zap", "site": "https://openreview.net/forum?id=HTfApPeT4DZ", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 26, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1626951327530457063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Lund University;University of Bucharest", "aff_unique_dep": "Lund Institute of Technology;", "aff_unique_url": "https://www.lunduniversity.lu.se;https://www.unibuc.ro", "aff_unique_abbr": "LU;Unibuc", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lund;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Sweden;Romania" }, { "id": "I6DLxqk9J0A", "title": "Anytime Depth Estimation with Limited Sensing and Computation Capabilities on Mobile Devices", "track": "main", "status": "Poster", "tldr": "", "abstract": "Depth estimation is a safety critical and energy sensitive method for environment sensing. However, in real applications, the depth estimation may be halted at any time, due to the random interruptions or low energy capacity of battery when using powerful sensors like 3D LiDAR. To address this problem, we propose a depth estimation method that is robust to random halts and relies on energy-saving 2D LiDAR and a monocular camera. To this end, we formulate the depth estimation as an anytime problem and propose a new metric to evaluate its robustness under random interruptions. Our final model has only 2M parameters with a marginal accuracy loss compared to state-of-the-art baselines. Indeed, our experiments on NYU Depth v2 dataset show that our model is capable of processing 224$\\times$224 resolution images and 2D point clouds with any computation budget larger than 6.37ms (157 FPS) and 0.2J on an NVIDIA Jetson TX2 system. Evaluations on KITTI dataset under supervised and self-supervised training show similar results.", "keywords": "Depth Estimation;Anytime Algorithm;Energy-aware Optimization;Mobile Devices", "primary_area": "", "supplementary_material": "/attachment/c93973dcdcaec10d54dcda4167041f205e339a9c.zip", "author": "Yuedong Yang;Zihui Xue;Radu Marculescu", "authorids": "~Yuedong_Yang2;~Zihui_Xue1;~Radu_Marculescu2", "gender": "M;F;M", "homepage": "https://github.com/AlbertYoung0112;https://zihuixue.github.io;https://radum.ece.utexas.edu/", "dblp": ";256/9549;88/3494", "google_scholar": ";JCV9BQ0AAAAJ;ZCmYP5cAAAAJ", "orcid": ";;0000-0003-1826-7646", "linkedin": ";;", "or_profile": "~Yuedong_Yang2;~Zihui_Xue1;~Radu_Marculescu2", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2021anytime,\ntitle={Anytime Depth Estimation with Limited Sensing and Computation Capabilities on Mobile Devices},\nauthor={Yuedong Yang and Zihui Xue and Radu Marculescu},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=I6DLxqk9J0A}\n}", "github": "", "project": "", "reviewers": "HGbf;rseL;AwPn;2XnV;Y4hj", "site": "https://openreview.net/forum?id=I6DLxqk9J0A", "pdf_size": 0, "rating": "4;6;6;6;10", "confidence": "", "rating_avg": 6.4, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3177879035170200983&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "IScz42A3iCI", "title": "Equivariant $Q$ Learning in Spatial Action Spaces", "track": "main", "status": "Poster", "tldr": "", "abstract": "Recently, a variety of new equivariant neural network model architectures have been proposed that generalize better over rotational and reflectional symmetries than standard models. These models are relevant to robotics because many robotics problems can be expressed in a rotationally symmetric way. This paper focuses on equivariance over a visual state space and a spatial action space -- the setting where the robot action space includes a subset of $\\rm{SE}(2)$. In this situation, we know a priori that rotations and translations in the state image should result in the same rotations and translations in the spatial action dimensions of the optimal policy. Therefore, we can use equivariant model architectures to make $Q$ learning more sample efficient. This paper identifies when the optimal $Q$ function is equivariant and proposes $Q$ network architectures for this setting. We show experimentally that this approach outperforms standard methods in a set of challenging manipulation problems. ", "keywords": "Reinforcement Learning;Equivariance;Manipulation", "primary_area": "", "supplementary_material": "/attachment/d7876816041039a7c6e53cc25d1fdc7357b298b4.zip", "author": "Dian Wang;Robin Walters;Xupeng Zhu;Robert Platt", "authorids": "~Dian_Wang1;~Robin_Walters1;~Xupeng_Zhu1;~Robert_Platt1", "gender": "M;M;M;", "homepage": "https://pointw.github.io/;http://www.robinwalters.com;https://zxp-s-works.github.io/;http://www.ccs.neu.edu/home/rplatt/", "dblp": "191/1369-1;258/3416;257/4426;39/5434", "google_scholar": "CckjtfQAAAAJ;fnprJmUAAAAJ;mwxz-8MAAAAJ;Z4Y5S2oAAAAJ", "orcid": ";;;", "linkedin": "dianwang1007;;xupengzhu-skunk;", "or_profile": "~Dian_Wang1;~Robin_Walters1;~Xupeng_Zhu1;~Robert_Platt1", "aff": "Northeastern University;;Northeastern University;Northeastern University", "aff_domain": "northeastern.edu;;northeastern.edu;neu.edu", "position": "PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2021equivariant,\ntitle={Equivariant \\$Q\\$ Learning in Spatial Action Spaces},\nauthor={Dian Wang and Robin Walters and Xupeng Zhu and Robert Platt},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=IScz42A3iCI}\n}", "github": "", "project": "", "reviewers": "BRhk;HHEb;3ab5;GkB1", "site": "https://openreview.net/forum?id=IScz42A3iCI", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13415018997285235019&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "JSC4KMlENqF", "title": "Demonstration-Guided Reinforcement Learning with Learned Skills", "track": "main", "status": "Poster", "tldr": "", "abstract": "Demonstration-guided reinforcement learning (RL) is a promising approach for learning complex behaviors by leveraging both reward feedback and a set of target task demonstrations. Prior approaches for demonstration-guided RL treat every new task as an independent learning problem and attempt to follow the provided demonstrations step-by-step, akin to a human trying to imitate a completely unseen behavior by following the demonstrator's exact muscle movements. Naturally, such learning will be slow, but often new behaviors are not completely unseen: they share subtasks with behaviors we have previously learned. In this work, we aim to exploit this shared subtask structure to increase the efficiency of demonstration-guided RL. We first learn a set of reusable skills from large offline datasets of prior experience collected across many tasks. We then propose Skill-based Learning with Demonstrations (SkiLD), an algorithm for demonstration-guided RL that efficiently leverages the provided demonstrations by following the demonstrated skills instead of the primitive actions, resulting in substantial performance improvements over prior demonstration-guided RL approaches. We validate the effectiveness of our approach on long-horizon maze navigation and complex robot manipulation tasks.", "keywords": "Reinforcement Learning;Imitation Learning;Skill-Based Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/448bdb70aaac7c4bffb35fa7520f49dfb75cfb13.zip", "author": "Karl Pertsch;Youngwoon Lee;Yue Wu;Joseph J Lim", "authorids": "~Karl_Pertsch1;~Youngwoon_Lee1;~Yue_Wu21;~Joseph_J_Lim1", "gender": ";M;M;M", "homepage": "https://kpertsch.github.io/;https://youngwoon.github.io;;http://people.csail.mit.edu/lim/", "dblp": "211/7137;117/4767;;08/3086", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;CDPa3AgAAAAJ;;jTnQTBoAAAAJ", "orcid": ";0000-0001-9918-1056;;", "linkedin": ";;yue-wu-6b0183180/;", "or_profile": "~Karl_Pertsch1;~Youngwoon_Lee1;~Yue_Wu21;~Joseph_J_Lim1", "aff": "Meta Facebook;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "fb.com;usc.edu;usc.edu;usc.edu", "position": "Research Intern;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\npertsch2021demonstrationguided,\ntitle={Demonstration-Guided Reinforcement Learning with Learned Skills},\nauthor={Karl Pertsch and Youngwoon Lee and Yue Wu and Joseph J Lim},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=JSC4KMlENqF}\n}", "github": "", "project": "", "reviewers": "fuNi;RYNo;5LPL", "site": "https://openreview.net/forum?id=JSC4KMlENqF", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Meta;University of Southern California", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.usc.edu", "aff_unique_abbr": "Meta;USC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "JT4orC5QaLQ", "title": "Strength Through Diversity: Robust Behavior Learning via Mixture Policies", "track": "main", "status": "Poster", "tldr": "", "abstract": "Efficiency in robot learning is highly dependent on hyperparameters. Robot morphology and task structure differ widely and finding the optimal setting typically requires sequential or parallel repetition of experiments, strongly increasing the interaction count. We propose a training method that only relies on a single trial by enabling agents to select and combine controller designs conditioned on the task. Our Hyperparameter Mixture Policies (HMPs) feature diverse sub-policies that vary in distribution types and parameterization, reducing the impact of design choices and unlocking synergies between low-level components. We demonstrate strong performance on continuous control tasks, including a simulated ANYmal robot, showing that HMPs yield robust, data-efficient learning.", "keywords": "Learning Control;Hierarchical Optimization;Sample Efficiency", "primary_area": "", "supplementary_material": "/attachment/f912cfc57f2ecbff3aa2fe41dd2c0826e85843b1.zip", "author": "Tim Seyde;Wilko Schwarting;Igor Gilitschenski;Markus Wulfmeier;Daniela Rus", "authorids": "~Tim_Seyde1;~Wilko_Schwarting1;~Igor_Gilitschenski1;~Markus_Wulfmeier1;~Daniela_Rus1", "gender": ";;M;M;F", "homepage": ";;https://www.gilitschenski.org/igor;;https://www.csail.mit.edu/person/daniela-rus", "dblp": "226/6408;191/0268;129/1281;166/1552;r/DanielaRus", "google_scholar": "FJ7ILzkAAAAJ;;Nuw1Y4oAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;igorgilitschenski/;;", "or_profile": "~Tim_Seyde1;~Wilko_Schwarting1;~Igor_Gilitschenski1;~Markus_Wulfmeier1;~Daniela_Rus1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Google DeepMind;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu;deepmind.com;mit.edu", "position": "Student;;Research Scientist (prev: PostDoc);Research Scientist;Full Professor", "bibtex": "@inproceedings{\nseyde2021strength,\ntitle={Strength Through Diversity: Robust Behavior Learning via Mixture Policies},\nauthor={Tim Seyde and Wilko Schwarting and Igor Gilitschenski and Markus Wulfmeier and Daniela Rus},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=JT4orC5QaLQ}\n}", "github": "", "project": "", "reviewers": "PWdD;ADtN;ghjV;1NVm", "site": "https://openreview.net/forum?id=JT4orC5QaLQ", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11926102124817199074&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://web.mit.edu;https://deepmind.com", "aff_unique_abbr": "MIT;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "JrsfBJtDFdI", "title": "What Matters in Learning from Offline Human Demonstrations for Robot Manipulation", "track": "main", "status": "Oral", "tldr": "", "abstract": "Imitating human demonstrations is a promising approach to endow robots with various manipulation capabilities. While recent advances have been made in imitation learning and batch (offline) reinforcement learning, a lack of open-source human datasets and reproducible learning methods make assessing the state of the field difficult. In this paper, we conduct an extensive study of six offline learning algorithms for robot manipulation on five simulated and three real-world multi-stage manipulation tasks of varying complexity, and with datasets of varying quality. Our study analyzes the most critical challenges when learning from offline human data for manipulation. Based on the study, we derive a series of lessons including the sensitivity to different algorithmic design choices, the dependence on the quality of the demonstrations, and the variability based on the stopping criteria due to the different objectives in training and evaluation. We also highlight opportunities for learning from human datasets, such as the ability to learn proficient policies on challenging, multi-stage tasks beyond the scope of current reinforcement learning methods, and the ability to easily scale to natural, real-world manipulation scenarios where only raw sensory signals are available. We have open-sourced our datasets and all algorithm implementations to facilitate future research and fair comparisons in learning from human demonstration data at https://arise-initiative.github.io/robomimic-web/", "keywords": "Imitation Learning;Offline Reinforcement Learning;Robot Manipulation", "primary_area": "", "supplementary_material": "/attachment/430ad1c6a96292c4b32cb3f3b505e6b07e18d9dc.zip", "author": "Ajay Mandlekar;Danfei Xu;Josiah Wong;Soroush Nasiriany;Chen Wang;Rohun Kulkarni;Li Fei-Fei;Silvio Savarese;Yuke Zhu;Roberto Mart\u00edn-Mart\u00edn", "authorids": "~Ajay_Mandlekar1;~Danfei_Xu1;~Josiah_Wong1;~Soroush_Nasiriany1;~Chen_Wang16;rohun@stanford.edu;~Li_Fei-Fei1;~Silvio_Savarese1;~Yuke_Zhu1;~Roberto_Mart\u00edn-Mart\u00edn1", "gender": "M;M;M;;M;;F;M;M;M", "homepage": "https://ai.stanford.edu/~amandlek/;https://cs.stanford.edu/~danfei/;https://www.jdw.ong;http://snasiriany.me/;http://www.chenwangjeremy.net/;;https://profiles.stanford.edu/fei-fei-li;;https://cs.utexas.edu/~yukez/;https://robertomartinmartin.com/", "dblp": "https://dblp.uni-trier.de/pers/hd/m/Mandlekar:Ajay;135/8443;178/8895;185/5645;;;79/2528;50/3578;133/1772;153/7670", "google_scholar": "MEz23joAAAAJ;J5D4kcoAAAAJ;Y0a0n5wAAAAJ;bBLqsgkAAAAJ;lStkAzsAAAAJ;;rDfyQnIAAAAJ;ImpbxLsAAAAJ;mWGyYMsAAAAJ;XOJE8OEAAAAJ", "orcid": ";;;;;;;;;0000-0002-9586-2759", "linkedin": ";;josiahw/;;;;fei-fei-li-4541247/;;;", "or_profile": "~Ajay_Mandlekar1;~Danfei_Xu1;~Josiah_Wong1;~Soroush_Nasiriany1;~Chen_Wang16;rohun@stanford.edu;~Li_Fei-Fei1;~Silvio_Savarese1;~Yuke_Zhu1;~Roberto_Mart\u00edn-Mart\u00edn1", "aff": "Stanford University;Stanford University;Stanford University;University of Texas, Austin;Computer Science Department, Stanford University;;Stanford University;Stanford University;Computer Science Department, University of Texas, Austin;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;utexas.edu;cs.stanford.edu;;stanford.edu;stanford.edu;cs.utexas.edu;stanford.edu", "position": "PhD student;PhD student;MS student;PhD student;PhD student;;Full Professor;Associate professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nmandlekar2021what,\ntitle={What Matters in Learning from Offline Human Demonstrations for Robot Manipulation},\nauthor={Ajay Mandlekar and Danfei Xu and Josiah Wong and Soroush Nasiriany and Chen Wang and Rohun Kulkarni and Li Fei-Fei and Silvio Savarese and Yuke Zhu and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=JrsfBJtDFdI}\n}", "github": "", "project": "", "reviewers": "wVDW;YubL;mRsh;XeZV", "site": "https://openreview.net/forum?id=JrsfBJtDFdI", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 10, "corr_rating_confidence": 0, "gs_citation": 523, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3943356860137248514&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0;0;0;1;0", "aff_unique_norm": "Stanford University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.utexas.edu", "aff_unique_abbr": "Stanford;UT Austin", "aff_campus_unique_index": "0;0;0;1;0;0;0;1;0", "aff_campus_unique": "Stanford;Austin", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "JvXqtLtAtMY", "title": "Self-supervised Point Cloud Prediction Using 3D Spatio-temporal Convolutional Networks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Exploiting past 3D LiDAR scans to predict future point clouds is a promising method for autonomous mobile systems to realize foresighted state estimation, collision avoidance, and planning. In this paper, we address the problem of predicting future 3D LiDAR point clouds given a sequence of past LiDAR scans. Estimating the future scene on the sensor level does not require any preceding steps as in localization or tracking systems and can be trained self-supervised. We propose an end-to-end approach that exploits a 2D range image representation of each 3D LiDAR scan and concatenates a sequence of range images to obtain a 3D tensor. Based on such tensors, we develop an encoder-decoder architecture using 3D convolutions to jointly aggregate spatial and temporal information of the scene and to predict the future 3D point clouds. We evaluate our method on multiple datasets and the experimental results suggest that our method outperforms existing point cloud prediction architectures and generalizes well to new, unseen environments without additional fine-tuning. Our method operates online and is faster than the common LiDAR frame rate of 10 Hz.", "keywords": "Point Cloud Prediction;3D LiDAR;Temporal Convolutional Networks", "primary_area": "", "supplementary_material": "/attachment/713bd13ec5002bbb670203e7e092910efa3717db.zip", "author": "Benedikt Mersch;Xieyuanli Chen;Jens Behley;Cyrill Stachniss", "authorids": "~Benedikt_Mersch1;~Xieyuanli_Chen1;jens.behley@igg.uni-bonn.de;~Cyrill_Stachniss1", "gender": "M;M;;M", "homepage": "https://www.ipb.uni-bonn.de/people/benedikt-mersch/;https://chen-xieyuanli.github.io/;;https://www.ipb.uni-bonn.de", "dblp": "293/7534;207/2256;;s/CyrillStachniss", "google_scholar": "XwuAB1sAAAAJ;DvrngV4AAAAJ;;https://scholar.google.de/citations?hl=de", "orcid": "0000-0002-6937-2799;0000-0003-0955-6681;;", "linkedin": ";;;", "or_profile": "~Benedikt_Mersch1;~Xieyuanli_Chen1;jens.behley@igg.uni-bonn.de;~Cyrill_Stachniss1", "aff": "University of Bonn;University of Bonn;;University of Bonn", "aff_domain": "uni-bonn.de;uni-bonn.de;;uni-bonn.de", "position": "PhD student;PhD student;;Professor", "bibtex": "@inproceedings{\nmersch2021selfsupervised,\ntitle={Self-supervised Point Cloud Prediction Using 3D Spatio-temporal Convolutional Networks},\nauthor={Benedikt Mersch and Xieyuanli Chen and Jens Behley and Cyrill Stachniss},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=JvXqtLtAtMY}\n}", "github": "", "project": "", "reviewers": "n5qa;2bTD;puBM", "site": "https://openreview.net/forum?id=JvXqtLtAtMY", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13117817438335819357&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Bonn", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-bonn.de/", "aff_unique_abbr": "UBonn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "K5-J-Espnaq", "title": "Adversarial Skill Chaining for Long-Horizon Robot Manipulation via Terminal State Regularization", "track": "main", "status": "Poster", "tldr": "", "abstract": "Skill chaining is a promising approach for synthesizing complex behaviors by sequentially combining previously learned skills. Yet, a naive composition of skills fails when a policy encounters a starting state never seen during its training. For successful skill chaining, prior approaches attempt to widen the policy's starting state distribution. However, these approaches require larger state distributions to be covered as more policies are sequenced, and thus are limited to short skill sequences. In this paper, we propose to chain multiple policies without excessively large initial state distributions by regularizing the terminal state distributions in an adversarial learning framework. We evaluate our approach on two complex long-horizon manipulation tasks of furniture assembly. Our results have shown that our method establishes the first model-free reinforcement learning algorithm to solve these tasks; whereas prior skill chaining approaches fail. The code and videos are available at https://clvrai.com/skill-chaining\n\n", "keywords": "Long-Horizon Manipulation;Skill Chaining;Reinforcement Learning;Adversarial Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/85f830788aaba10e3efff712843806def00ed3de.zip", "author": "Youngwoon Lee;Joseph J Lim;Anima Anandkumar;Yuke Zhu", "authorids": "~Youngwoon_Lee1;~Joseph_J_Lim1;~Anima_Anandkumar1;~Yuke_Zhu1", "gender": "M;M;M;F", "homepage": "https://youngwoon.github.io;http://people.csail.mit.edu/lim/;https://cs.utexas.edu/~yukez/;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": "117/4767;08/3086;133/1772;", "google_scholar": "CDPa3AgAAAAJ;jTnQTBoAAAAJ;mWGyYMsAAAAJ;bEcLezcAAAAJ", "orcid": "0000-0001-9918-1056;;;", "linkedin": ";;;anima-anandkumar-35171b1/", "or_profile": "~Youngwoon_Lee1;~Joseph_J_Lim1;~Yuke_Zhu1;~anima_anandkumar1", "aff": "University of Southern California;University of Southern California;Computer Science Department, University of Texas, Austin;California Institute of Technology", "aff_domain": "usc.edu;usc.edu;cs.utexas.edu;caltech.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlee2021adversarial,\ntitle={Adversarial Skill Chaining for Long-Horizon Robot Manipulation via Terminal State Regularization},\nauthor={Youngwoon Lee and Joseph J Lim and Anima Anandkumar and Yuke Zhu},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=K5-J-Espnaq}\n}", "github": "", "project": "", "reviewers": "HKpX;bN8e;opMr;8Fh5", "site": "https://openreview.net/forum?id=K5-J-Espnaq", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9265504775850036310&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Southern California;University of Texas at Austin;California Institute of Technology", "aff_unique_dep": ";Computer Science Department;", "aff_unique_url": "https://www.usc.edu;https://www.utexas.edu;https://www.caltech.edu", "aff_unique_abbr": "USC;UT Austin;Caltech", "aff_campus_unique_index": "0;0;1;2", "aff_campus_unique": "Los Angeles;Austin;Pasadena", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "KKBfrCzCVOn", "title": "ThriftyDAgger: Budget-Aware Novelty and Risk Gating for Interactive Imitation Learning", "track": "main", "status": "Oral", "tldr": "", "abstract": "Effective robot learning often requires online human feedback and interventions that can cost significant human time, giving rise to the central challenge in interactive imitation learning: is it possible to control the timing and length of interventions to both facilitate learning and limit burden on the human supervisor? This paper presents ThriftyDAgger, an algorithm for actively querying a human supervisor given a desired budget of human interventions. ThriftyDAgger uses a learned switching policy to solicit interventions only at states that are sufficiently (1) novel, where the robot policy has no reference behavior to imitate, or (2) risky, where the robot has low confidence in task completion. To detect the latter, we introduce a novel metric for estimating risk under the current robot policy. Experiments in simulation and on a physical cable routing experiment suggest that ThriftyDAgger's intervention criteria balances task performance and supervisor burden more effectively than prior algorithms. ThriftyDAgger can also be applied at execution time, where it achieves a 100% success rate on both the simulation and physical tasks. A user study (N=10) in which users control a three-robot fleet while also performing a concentration task suggests that ThriftyDAgger increases human and robot performance by 58% and 80% respectively compared to the next best algorithm while reducing supervisor burden. See https://tinyurl.com/thrifty-dagger for supplementary material.", "keywords": "Imitation Learning;Fleet Learning;Human Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/af7a258dba31a672ae73214872fc21e995861c5b.zip", "author": "Ryan Hoque;Ashwin Balakrishna;Ellen Novoseller;Albert Wilcox;Daniel S. Brown;Ken Goldberg", "authorids": "~Ryan_Hoque1;~Ashwin_Balakrishna1;enovoseller@berkeley.edu;~Albert_Wilcox1;~Daniel_S._Brown1;~Ken_Goldberg1", "gender": "M;M;;M;M;M", "homepage": "https://ryanhoque.github.io;https://abalakrishna123.github.io/;;https://albertwilcox.github.io/;https://www.cs.utah.edu/~dsbrown/;http://goldberg.berkeley.edu/", "dblp": "250/9457;218/5246.html;;;141/7769;g/KennethYGoldberg", "google_scholar": "ywv6tDUAAAAJ;tfN6V84AAAAJ;;bj628LsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ", "orcid": ";;;;;0000-0001-6747-9499", "linkedin": "https://linkedin.com/in/ryanhoque;ashwin-balakrishna-9b71a357/;;albert-wilcox-314898184/;;goldbergken/", "or_profile": "~Ryan_Hoque1;~Ashwin_Balakrishna1;enovoseller@berkeley.edu;~Albert_Wilcox1;~Daniel_S._Brown1;~Ken_Goldberg1", "aff": "University of California, Berkeley;University of California, Berkeley;;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;;Undergrad student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhoque2021thriftydagger,\ntitle={Thrifty{DA}gger: Budget-Aware Novelty and Risk Gating for Interactive Imitation Learning},\nauthor={Ryan Hoque and Ashwin Balakrishna and Ellen Novoseller and Albert Wilcox and Daniel S. Brown and Ken Goldberg},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=KKBfrCzCVOn}\n}", "github": "", "project": "", "reviewers": "PRtc;pnHZ;UPFi", "site": "https://openreview.net/forum?id=KKBfrCzCVOn", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2845672318436036770&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "KOq9qDgn-Ta", "title": "Correspondence-Free Point Cloud Registration with SO(3)-Equivariant Implicit Shape Representations", "track": "main", "status": "Poster", "tldr": "", "abstract": "This paper proposes a correspondence-free method for point cloud rotational registration. We learn an embedding for each point cloud in a feature space that preserves the SO(3)-equivariance property, enabled by recent developments in equivariant neural networks. The proposed shape registration method achieves three major advantages through combining equivariant feature learning with implicit shape models. First, the necessity of data association is removed because of the permutation-invariant property in network architectures similar to PointNet. Second, the registration in feature space can be solved in closed-form using Horn's method due to the SO(3)-equivariance property. Third, the registration is robust to noise in the point cloud because of the joint training of registration and implicit shape reconstruction. The experimental results show superior performance compared with existing correspondence-free deep registration methods. ", "keywords": "point cloud registration;implicit shape model;equivariant neural network;representation learning", "primary_area": "", "supplementary_material": "/attachment/33a0849913ecc0c0ddbe8a115fc3e035db0d9104.zip", "author": "Minghan Zhu;Maani Ghaffari;Huei Peng", "authorids": "~Minghan_Zhu1;~Maani_Ghaffari1;~Huei_Peng1", "gender": "Not Specified;M;M", "homepage": ";https://curly.engin.umich.edu/;https://huei.engin.umich.edu/", "dblp": "255/5003;;", "google_scholar": "70CbUXwAAAAJ;l2jdSb8AAAAJ;MMgcQiIAAAAJ", "orcid": "0000-0002-0145-7542;0000-0002-4734-4295;", "linkedin": ";maani-ghaffari-19b017203/;", "or_profile": "~Minghan_Zhu1;~Maani_Ghaffari1;~Huei_Peng1", "aff": "University of Michigan;University of Michigan;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2021correspondencefree,\ntitle={Correspondence-Free Point Cloud Registration with {SO}(3)-Equivariant Implicit Shape Representations},\nauthor={Minghan Zhu and Maani Ghaffari and Huei Peng},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=KOq9qDgn-Ta}\n}", "github": "", "project": "", "reviewers": "sqvt;b5A4;nLiN;4hsK;ZnCf", "site": "https://openreview.net/forum?id=KOq9qDgn-Ta", "pdf_size": 0, "rating": "4;4;6;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16760435487639120028&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "KnOYrZf17CQ", "title": "Safe Driving via Expert Guided Policy Optimization", "track": "main", "status": "Poster", "tldr": "", "abstract": "When learning common skills like driving, beginners usually have domain experts standing by to ensure the safety of the learning process. We formulate such learning scheme under the Expert-in-the-loop Reinforcement Learning where a guardian is introduced to safeguard the exploration of the learning agent. While allowing the sufficient exploration in the uncertain environment, the guardian intervenes under dangerous situations and demonstrates the correct actions to avoid potential accidents. Thus ERL enables both exploration and expert's partial demonstration as two training sources. Following such a setting, we develop a novel Expert Guided Policy Optimization (EGPO) method which integrates the guardian in the loop of reinforcement learning. The guardian is composed of an expert policy to generate demonstration and a switch function to decide when to intervene. Particularly, a constrained optimization technique is used to tackle the trivial solution that the agent deliberately behaves dangerously to deceive the expert into taking over. Offline RL technique is further used to learn from the partial demonstration generated by the expert. Safe driving experiments show that our method achieves superior training and test-time safety, outperforms baselines with a substantial margin in sample efficiency, and preserves the generalizabiliy to unseen environments in test-time. Demo video and source code are available at: https://decisionforce.github.io/EGPO/", "keywords": "Safe Reinforcement Learning;Human-in-the-loop;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/26fa97899546eef1a80afd4de7b51a13ee43b17d.zip", "author": "Zhenghao Peng;Quanyi Li;Chunxiao Liu;Bolei Zhou", "authorids": "~Zhenghao_Peng1;~Quanyi_Li1;~Chunxiao_Liu1;~Bolei_Zhou5", "gender": "M;M;M;M", "homepage": "https://pengzhenghao.github.io;https://quanyili.github.io;;https://boleizhou.github.io/", "dblp": "220/3963;270/7691;70/772;46/8066", "google_scholar": "JZ8ws6IAAAAJ;Ty49X3UAAAAJ;;9D4aG8AAAAAJ", "orcid": ";;;", "linkedin": ";https://www.linkedin.com/mwlite/in/quanyi-li-2b7985183;;", "or_profile": "~Zhenghao_Peng1;~Quanyi_Li1;~Chunxiao_Liu1;~Bolei_Zhou5", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong;;The Chinese University of Hong Kong", "aff_domain": "ie.cuhk.edu;ie.cuhk.edu;;cuhk.edu.hk", "position": "MS student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\npeng2021safe,\ntitle={Safe Driving via Expert Guided Policy Optimization},\nauthor={Zhenghao Peng and Quanyi Li and Chunxiao Liu and Bolei Zhou},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=KnOYrZf17CQ}\n}", "github": "", "project": "", "reviewers": "RGdx;rHZA;hLxx;RZXA;o3VH", "site": "https://openreview.net/forum?id=KnOYrZf17CQ", "pdf_size": 0, "rating": "4;6;6;6;10", "confidence": "", "rating_avg": 6.4, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8006921290115421249&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "L0tXWRrB9yw", "title": "SCAPE: Learning Stiffness Control from Augmented Position Control Experiences", "track": "main", "status": "Poster", "tldr": "", "abstract": "We introduce a sample-efficient method for learning state-dependent stiffness control policies for dexterous manipulation. The ability to control stiffness facilitates safe and reliable manipulation by providing compliance and robustness to uncertainties. Most current reinforcement learning approaches to achieve robotic manipulation have exclusively focused on position control, often due to the difficulty of learning high-dimensional stiffness control policies. This difficulty can be partially mitigated via policy guidance such as imitation learning. However, expert stiffness control demonstrations are often expensive or infeasible to record. Therefore, we present an approach to learn Stiffness Control from Augmented Position control Experiences (SCAPE) that bypasses this difficulty by transforming position control demonstrations into approximate, suboptimal stiffness control demonstrations. Then, the suboptimality of the augmented demonstrations is addressed by using complementary techniques that help the agent safely learn from both the demonstrations and reinforcement learning. By using simulation tools and experiments on a robotic testbed, we show that the proposed approach efficiently learns safe manipulation policies and outperforms learned position control policies and several other baseline learning algorithms.", "keywords": "Manipulation;stiffness control;reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/108f76b36e56688aa1885cdbd6979155a6c60e21.zip", "author": "Mincheol Kim;Scott Niekum;Ashish Deshpande", "authorids": "~Mincheol_Kim1;~Scott_Niekum1;~Ashish_Deshpande1", "gender": ";M;M", "homepage": "http://tinyurl.com/mincheolkim;https://people.cs.umass.edu/~sniekum/index.php;https://reneu.robotics.utexas.edu", "dblp": "37/3411;62/8399;", "google_scholar": "fwB_hOcAAAAJ;4wXYfSUAAAAJ;Zks0hIUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mincheol_Kim1;~Scott_Niekum1;~Ashish_Deshpande1", "aff": "University of Texas, Austin;University of Texas, Austin;The University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2021scape,\ntitle={{SCAPE}: Learning Stiffness Control from Augmented Position Control Experiences},\nauthor={Mincheol Kim and Scott Niekum and Ashish Deshpande},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=L0tXWRrB9yw}\n}", "github": "", "project": "", "reviewers": "V3q7;Lq7F;jxpB;JwaB", "site": "https://openreview.net/forum?id=L0tXWRrB9yw", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1903824405716174423&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "LGaHnyg81sQ", "title": "Co-GAIL: Learning Diverse Strategies for Human-Robot Collaboration", "track": "main", "status": "Poster", "tldr": "", "abstract": "We present a method for learning human-robot collaboration policy from human-human collaboration demonstrations. An effective robot assistant must learn to handle diverse human behaviors shown in the demonstrations and be robust when the humans adjust their strategies during online task execution. Our method co-optimizes a human policy and a robot policy in an interactive learning process: the human policy learns to generate diverse and plausible collaborative behaviors from demonstrations while the robot policy learns to assist by estimating the unobserved latent strategy of its human collaborator. Across a 2D strategy game, a human-robot handover task, and a multi-step collaborative manipulation task, our method outperforms the alternatives in both simulated evaluations and when executing the tasks with a real human operator in-the-loop.", "keywords": "Learning for Human-Robot Collaboration;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/ea44de6f2c5940395eb0a59471d837c4236ceb63.zip", "author": "Chen Wang;Claudia P\u00e9rez-D'Arpino;Danfei Xu;Li Fei-Fei;Karen Liu;Silvio Savarese", "authorids": "~Chen_Wang16;~Claudia_P\u00e9rez-D'Arpino2;~Danfei_Xu1;~Li_Fei-Fei1;~Karen_Liu1;~Silvio_Savarese1", "gender": "M;F;M;F;;M", "homepage": "http://www.chenwangjeremy.net/;https://ai.stanford.edu/~cdarpino/;https://cs.stanford.edu/~danfei/;https://profiles.stanford.edu/fei-fei-li;https://cs.stanford.edu/~karenliu;", "dblp": ";69/3922;135/8443;79/2528;;50/3578", "google_scholar": "lStkAzsAAAAJ;q0kUpsoAAAAJ;J5D4kcoAAAAJ;rDfyQnIAAAAJ;i28fU0MAAAAJ;ImpbxLsAAAAJ", "orcid": ";;;;0000-0001-5926-0905;", "linkedin": ";;;fei-fei-li-4541247/;;", "or_profile": "~Chen_Wang16;~Claudia_P\u00e9rez-D'Arpino2;~Danfei_Xu1;~Li_Fei-Fei1;~Karen_Liu1;~Silvio_Savarese1", "aff": "Computer Science Department, Stanford University;Stanford University;Stanford University;Stanford University;;Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;;stanford.edu", "position": "PhD student;Postdoc;PhD student;Full Professor;;Associate professor", "bibtex": "@inproceedings{\nwang2021cogail,\ntitle={Co-{GAIL}: Learning Diverse Strategies for Human-Robot Collaboration},\nauthor={Chen Wang and Claudia P{\\'e}rez-D'Arpino and Danfei Xu and Li Fei-Fei and Karen Liu and Silvio Savarese},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=LGaHnyg81sQ}\n}", "github": "", "project": "", "reviewers": "wpop;RH4p;4Wjg;qTpv", "site": "https://openreview.net/forum?id=LGaHnyg81sQ", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10660211554250792709&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "MWtinPDqfZg", "title": "Learning Feasibility to Imitate Demonstrators with Different Dynamics", "track": "main", "status": "Poster", "tldr": "", "abstract": "The goal of learning from demonstrations is to learn a policy for an agent (imitator) by mimicking the behavior in the demonstrations. \nPrior works on learning from demonstrations assume that the demonstrations are collected by a demonstrator that has the same dynamics as the imitator. However, in many real-world applications, this assumption is limiting --- to improve the problem of lack of data in robotics, we would like to be able to leverage demonstrations collected from agents with different dynamics. This can be challenging as the demonstrations might not even be feasible for the imitator. Our insight is that we can learn a feasibility metric that captures the likelihood of a demonstration being feasible by the imitator. We develop a feasibility MDP (f-MDP) and derive the feasibility score by learning an optimal policy in the f-MDP. Our proposed feasibility measure encourages the imitator to learn from more informative demonstrations, and disregard the far from feasible demonstrations. Our experiments on four simulated environments and on a real robot show that the policy learned with our approach achieves a higher expected return than prior works. We show the videos of the real robot arm experiments on our website.", "keywords": "Imitation Learning;Learning from Agents with Different Dynamics", "primary_area": "", "supplementary_material": "/attachment/38301a2f789dd236df1a2d68b2ddf6f63e4c3e13.zip", "author": "Zhangjie Cao;Yilun Hao;Mengxi Li;Dorsa Sadigh", "authorids": "~Zhangjie_Cao1;~Yilun_Hao1;~Mengxi_Li1;~Dorsa_Sadigh1", "gender": "M;;F;F", "homepage": "https://caozhangjie.github.io/;https://yih301.github.io;https://ai.stanford.edu/~mengxili/;https://dorsa.fyi/", "dblp": "https://dblp.org/pers/hd/c/Cao:Zhangjie;285/4024;209/9814;117/3174", "google_scholar": "pA-TqMEAAAAJ;RjQF17YAAAAJ;https://scholar.google.com/citations?hl=en;ZaJEZpYAAAAJ", "orcid": ";;;", "linkedin": ";yilun-hao-86554a178/;;", "or_profile": "~Zhangjie_Cao1;~Yilun_Hao1;~Mengxi_Li1;~Dorsa_Sadigh1", "aff": "Stanford University;University of California, San Diego;Stanford University;Stanford University", "aff_domain": "stanford.edu;ucsd.edu;stanford.edu;stanford.edu", "position": "PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncao2021learning,\ntitle={Learning Feasibility to Imitate Demonstrators with Different Dynamics},\nauthor={Zhangjie Cao and Yilun Hao and Mengxi Li and Dorsa Sadigh},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=MWtinPDqfZg}\n}", "github": "", "project": "", "reviewers": "Srqj;fG77;GBdL", "site": "https://openreview.net/forum?id=MWtinPDqfZg", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2631929311939447010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Stanford University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.ucsd.edu", "aff_unique_abbr": "Stanford;UCSD", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Stanford;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "NDYbXf-DvwZ", "title": "Visual-Locomotion: Learning to Walk on Complex Terrains with Vision", "track": "main", "status": "Poster", "tldr": "", "abstract": "Vision is one of the most important perception modalities for legged robots to safely and efficiently navigate uneven terrains, such as stairs and stepping stones. However, training robots to effectively understand high-dimensional visual input for locomotion is a challenging problem. In this work, we propose a framework to train a vision-based locomotion controller which enables a quadrupedal robot to traverse uneven environments. The key idea is to introduce a hierarchical structure with a high-level vision policy and a low-level motion controller. The high-level vision policy takes as inputs the perceived vision signals as well as robot states and outputs the desired footholds and base movement of the robot. These are then realized by the low level motion controller composed of a position controller for swing legs and a MPC-based torque controller for stance legs. We train the vision policy using Deep Reinforcement Learning and demonstrate our approach on a variety of uneven environments such as randomly placed stepping stones, quincuncial piles, stairs, and moving platforms. We also validate our method on a real robot to walk over a series of gaps and climbing up a platform.", "keywords": "Legged Robot;Reinforcement Learning;Visual Locomotion", "primary_area": "", "supplementary_material": "/attachment/f01ae2d3d807c2b2ef7a3b9bf1780963b4870785.zip", "author": "Wenhao Yu;Deepali Jain;Alejandro Escontrela;Atil Iscen;Peng Xu;Erwin Coumans;Sehoon Ha;Jie Tan;Tingnan Zhang", "authorids": "~Wenhao_Yu1;~Deepali_Jain1;~Alejandro_Escontrela1;~Atil_Iscen1;pengxu@google.com;~Erwin_Coumans1;~Sehoon_Ha2;~Jie_Tan1;~Tingnan_Zhang1", "gender": "M;F;M;;;M;M;M;M", "homepage": "https://wenhaoyu.weebly.com/;;https://www.escontrela.me;;;;https://www.cc.gatech.edu/~sha9;http://www.jie-tan.net;", "dblp": ";84/8010;;10/2422;;;33/10491;81/7419;https://dblp.uni-trier.de/pers/hd/z/Zhang:Tingnan", "google_scholar": "1bF2s2kAAAAJ;;53OxjmYAAAAJ;https://scholar.google.com/citations?hl=en;;-aapzdEAAAAJ;Q6F3O0sAAAAJ;neGbgzYAAAAJ;RM2vMNcAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;alejandro-escontrela/;;;;;jie-tan/;", "or_profile": "~Wenhao_Yu1;~Deepali_Jain1;~Alejandro_Escontrela1;~Atil_Iscen1;pengxu@google.com;~Erwin_Coumans1;~Sehoon_Ha2;~Jie_Tan1;~Tingnan_Zhang1", "aff": "Google;Google;University of California, Berkeley;Google;;Google;Georgia Institute of Technology;Google;Google", "aff_domain": "google.com;google.com;berkeley.edu;google.com;;google.com;gatech.edu;google.com;google.com", "position": "Software Engineer;Researcher;PhD student;Researcher;;Researcher;Assistant Professor;Research Scientist;Software Engineer", "bibtex": "@inproceedings{\nyu2021visuallocomotion,\ntitle={Visual-Locomotion: Learning to Walk on Complex Terrains with Vision},\nauthor={Wenhao Yu and Deepali Jain and Alejandro Escontrela and Atil Iscen and Peng Xu and Erwin Coumans and Sehoon Ha and Jie Tan and Tingnan Zhang},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=NDYbXf-DvwZ}\n}", "github": "", "project": "", "reviewers": "tqbD;MZ8r;jPiH;2GnU", "site": "https://openreview.net/forum?id=NDYbXf-DvwZ", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10137508947002631733&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0;2;0;0", "aff_unique_norm": "Google;University of California, Berkeley;Georgia Institute of Technology", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu;https://www.gatech.edu", "aff_unique_abbr": "Google;UC Berkeley;Georgia Tech", "aff_campus_unique_index": "0;0;1;0;0;0;0", "aff_campus_unique": "Mountain View;Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "NZnz3cExrDW", "title": "Distilling Motion Planner Augmented Policies into Visual Control Policies for Robot Manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Learning complex manipulation tasks in realistic, obstructed environments is a challenging problem due to hard exploration in the presence of obstacles and high-dimensional visual observations. Prior work tackles the exploration problem by integrating motion planning and reinforcement learning. However, the motion planner augmented policy requires access to state information, which is often not available in the real-world settings. To this end, we propose to distill a state-based motion planner augmented policy to a visual control policy via (1) visual behavioral cloning to remove the motion planner dependency along with its jittery motion, and (2) vision-based reinforcement learning with the guidance of the smoothed trajectories from the behavioral cloning agent. We evaluate our method on three manipulation tasks in obstructed environments and compare it against various reinforcement learning and imitation learning baselines. The results demonstrate that our framework is highly sample-efficient and outperforms the state-of-the-art algorithms. Moreover, coupled with domain randomization, our policy is capable of zero-shot transfer to unseen environment settings with distractors.", "keywords": "Reinforcement Learning;Motion Planning;Robot Manipulation", "primary_area": "", "supplementary_material": "/attachment/8c071a00d8e58dc43445ec076534f65fa315ffc4.zip", "author": "I-Chun Arthur Liu;Shagun Uppal;Gaurav S. Sukhatme;Joseph J Lim;Peter Englert;Youngwoon Lee", "authorids": "~I-Chun_Arthur_Liu1;~Shagun_Uppal1;~Gaurav_S._Sukhatme1;~Joseph_J_Lim1;~Peter_Englert1;~Youngwoon_Lee1", "gender": "M;F;M;M;;M", "homepage": "http://arthurliu.com/;;http://www-robotics.usc.edu/~gaurav/;http://people.csail.mit.edu/lim/;http://www.peter-englert.net;https://youngwoon.github.io", "dblp": ";;s/GauravSSukhatme;08/3086;132/9113;117/4767", "google_scholar": "ToWC_fgAAAAJ;cjo5X3QAAAAJ;https://scholar.google.com.tw/citations?user=lRUi-A8AAAAJ;jTnQTBoAAAAJ;;CDPa3AgAAAAJ", "orcid": "0000-0001-7144-634X;;0000-0003-2408-474X;;;0000-0001-9918-1056", "linkedin": "i-chun-arthur-liu/;shagunuppal/;gaurav-sukhatme-9b6420b/;;;", "or_profile": "~I-Chun_Arthur_Liu1;~Shagun_Uppal1;~Gaurav_S._Sukhatme1;~Joseph_J_Lim1;~Peter_Englert1;~Youngwoon_Lee1", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu;usc.edu;usc.edu", "position": "MS student;Researcher;Full Professor;Assistant Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nliu2021distilling,\ntitle={Distilling Motion Planner Augmented Policies into Visual Control Policies for Robot Manipulation},\nauthor={I-Chun Arthur Liu and Shagun Uppal and Gaurav S. Sukhatme and Joseph J Lim and Peter Englert and Youngwoon Lee},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=NZnz3cExrDW}\n}", "github": "", "project": "", "reviewers": "acdh;M4xK;rTHv", "site": "https://openreview.net/forum?id=NZnz3cExrDW", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9249631968406833105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "NeGDZeyjcKa", "title": "A Persistent Spatial Semantic Representation for High-level Natural Language Instruction Execution", "track": "main", "status": "Poster", "tldr": "", "abstract": "Natural language provides an accessible and expressive interface to specify long-term tasks for robotic agents. However, non-experts are likely to specify such tasks with high-level instructions, which abstract over specific robot actions through several layers of abstraction. We propose that key to bridging this gap between language and robot actions over long execution horizons are persistent representations. We propose a persistent spatial semantic representation method, and show how it enables building an agent that performs hierarchical reasoning to effectively execute long-term tasks. We evaluate our approach on the ALFRED benchmark and achieve state-of-the-art results, despite completely avoiding the commonly used step-by-step instructions. https://hlsm-alfred.github.io/", "keywords": "vision and language;spatial representations;semantic mapping;representation learning;instruction following", "primary_area": "", "supplementary_material": "/attachment/dbc861464d1dcba8a6888edb3be4d6d5f21fea2e.zip", "author": "Valts Blukis;Chris Paxton;Dieter Fox;Animesh Garg;Yoav Artzi", "authorids": "~Valts_Blukis1;~Chris_Paxton1;~Dieter_Fox1;~Animesh_Garg1;~Yoav_Artzi1", "gender": "M;M;M;M;", "homepage": ";https://cpaxton.github.io/;https://homes.cs.washington.edu/~fox/;http://animesh.garg.tech;", "dblp": "210/9692;;f/DieterFox;123/5728;", "google_scholar": "i9-GzNYAAAAJ;I1mOQpAAAAAJ;DqXsbPAAAAAJ;zp8V7ZMAAAAJ;", "orcid": ";;;0000-0003-0482-4296;", "linkedin": "valtsblukis/;;;animeshgarg/;", "or_profile": "~Valts_Blukis1;~Chris_Paxton1;~Dieter_Fox1;~Animesh_Garg1;~Yoav_Artzi1", "aff": "Cornell University;NVIDIA;Department of Computer Science;University of Toronto;", "aff_domain": "cornell.edu;nvidia.com;cs.washington.edu;toronto.edu;", "position": "PhD student;Researcher;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nblukis2021a,\ntitle={A Persistent Spatial Semantic Representation for High-level Natural Language Instruction Execution},\nauthor={Valts Blukis and Chris Paxton and Dieter Fox and Animesh Garg and Yoav Artzi},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=NeGDZeyjcKa}\n}", "github": "", "project": "", "reviewers": "Xmxc;s6Fc;xkZt;h53W", "site": "https://openreview.net/forum?id=NeGDZeyjcKa", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8956253219655337806&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Cornell University;NVIDIA;Unknown Institution;University of Toronto", "aff_unique_dep": ";NVIDIA Corporation;Department of Computer Science;", "aff_unique_url": "https://www.cornell.edu;https://www.nvidia.com;;https://www.utoronto.ca", "aff_unique_abbr": "Cornell;NVIDIA;;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2", "aff_country_unique": "United States;;Canada" }, { "id": "OQMXb0xiCrt", "title": "SeqMatchNet: Contrastive Learning with Sequence Matching for Place Recognition & Relocalization", "track": "main", "status": "Oral", "tldr": "", "abstract": "Visual Place Recognition (VPR) for mobile robot global relocalization is a well-studied problem, where contrastive learning based representation training methods have led to state-of-the-art performance. However, these methods are mainly designed for single image based VPR, where sequential information, which is ubiquitous in robotics, is only used as a post-processing step for filtering single image match scores, but is never used to guide the representation learning process itself. In this work, for the first time, we bridge the gap between single image representation learning and sequence matching through \"SeqMatchNet\" which transforms the single image descriptors such that they become more responsive to the sequence matching metric. We propose a novel triplet loss formulation where the distance metric is based on \"sequence matching\", that is, the aggregation of temporal order-based Euclidean distances computed using single images. We use the same metric for mining negatives online during the training which helps the optimization process by selecting appropriate positives and harder negatives. To overcome the computational overhead of sequence matching for negative mining, we propose a 2D convolution based formulation of sequence matching for efficiently aggregating distances within a distance matrix computed using single images. We show that our proposed method achieves consistent gains in performance as demonstrated on four benchmark datasets. Source code available at https://github.com/oravus/SeqMatchNet.", "keywords": "Relocalization;Localization;Visual Place Recognition;Sequence Matching;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/fa009790c6321747afc7de0579faecc24840024a.zip", "author": "Sourav Garg;Madhu Vankadari;Michael Milford", "authorids": "~Sourav_Garg1;~Madhu_Vankadari1;~Michael_Milford1", "gender": "M;M;M", "homepage": "https://oravus.github.io/;https://madhubabuv.github.io;https://staff.qut.edu.au/staff/michael.milford", "dblp": "142/0073;205/3815;01/4027", "google_scholar": "oVS3HHIAAAAJ;St1130EAAAAJ;TDSmCKgAAAAJ", "orcid": "0000-0001-6068-3307;;0000-0002-5162-1793", "linkedin": "gargsourav/;;michaeljmilford/", "or_profile": "~Sourav_Garg1;~Madhu_Vankadari1;~Michael_Milford1", "aff": "Queensland University of Technology;Department of Computer Science, University of Oxford;Queensland University of Technology", "aff_domain": "qut.edu.au;cs.ox.ac.uk;qut.edu.au", "position": "Postdoc;PhD student;Director (Acting)", "bibtex": "@inproceedings{\ngarg2021seqmatchnet,\ntitle={SeqMatchNet: Contrastive Learning with Sequence Matching for Place Recognition \\& Relocalization},\nauthor={Sourav Garg and Madhu Vankadari and Michael Milford},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=OQMXb0xiCrt}\n}", "github": "", "project": "", "reviewers": "CkaF;Kfyz;A1mn", "site": "https://openreview.net/forum?id=OQMXb0xiCrt", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4767333330693892796&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Queensland University of Technology;University of Oxford", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.qut.edu.au;https://www.ox.ac.uk", "aff_unique_abbr": "QUT;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;United Kingdom" }, { "id": "PDy45cdhiZ_", "title": "Stochastic Policy Optimization with Heuristic Information for Robot Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Stochastic policy-based deep reinforcement learning (RL) approaches have remarkably succeeded to deal with continuous control tasks. However, applying these methods to manipulation tasks remains a challenge since actuators of a robot manipulator require high dimensional continuous action spaces. In this paper, we propose exploration-bounded exploration actor-critic (EBE-AC), a novel deep RL approach to combine stochastic policy optimization with interpretable human knowledge. The human knowledge is defined as heuristic information based on both physical relationships between a robot and objects and binary signals of whether the robot has achieved certain states. The proposed approach, EBE-AC, combines an off-policy actor-critic algorithm with an entropy maximization based on the heuristic information. On a robotic manipulation task, we demonstrate that EBE-AC outperforms prior state-of-the-art off-policy actor-critic deep RL algorithms in terms of sample efficiency. In addition, we found that EBE-AC can be easily combined with latent information, where EBE-AC with latent information further improved sample efficiency and robustness.", "keywords": "Robot manipulation;Reinforcement learning;Stochastic policy", "primary_area": "", "supplementary_material": "/attachment/4ce0c3a32027ee833d24f7c9b047a96aa727e8b1.zip", "author": "SEONGHYUN KIM;Ingook Jang;Samyeul Noh;Hyunseok Kim", "authorids": "~SEONGHYUN_KIM1;~Ingook_Jang1;~Samyeul_Noh1;~Hyunseok_Kim1", "gender": "M;M;;M", "homepage": ";;https://www.samyeulnoh.com;", "dblp": ";70/7891;;", "google_scholar": "https://scholar.google.co.kr/citations?user=eMe8mZ8AAAAJ;https://scholar.google.co.kr/citations?hl=ko;7J5CITkAAAAJ;woSgLL8AAAAJ", "orcid": ";;;0000-0001-8302-5438", "linkedin": ";;;", "or_profile": "~SEONGHYUN_KIM1;~Ingook_Jang1;~Samyeul_Noh1;~Hyunseok_Kim1", "aff": "Electronics and Telecommunications Research Institute;Electronics and Telecommunications Research Institute;Electronics and Telecommunications Research Institute (ETRI);Electronics and Telecommunications Research Institute", "aff_domain": "etri.re.kr;etri.re.kr;etri.re.kr;etri.re.kr", "position": "Researcher;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nkim2021stochastic,\ntitle={Stochastic Policy Optimization with Heuristic Information for Robot Learning},\nauthor={SEONGHYUN KIM and Ingook Jang and Samyeul Noh and Hyunseok Kim},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=PDy45cdhiZ_}\n}", "github": "", "project": "", "reviewers": "UVE3;p2oA;48sx;oVi6", "site": "https://openreview.net/forum?id=PDy45cdhiZ_", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eIBf7EF4wxwJ:scholar.google.com/&scioq=Stochastic+Policy+Optimization+with+Heuristic+Information+for+Robot+Learning&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Electronics and Telecommunications Research Institute", "aff_unique_dep": "", "aff_unique_url": "http://www.etri.re.kr", "aff_unique_abbr": "ETRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "PfC1Jr6gvuP", "title": "Minimizing Energy Consumption Leads to the Emergence of Gaits in Legged Robots", "track": "main", "status": "Poster", "tldr": "", "abstract": "Legged locomotion is commonly studied and expressed as a discrete set of gait patterns, like walk, trot, gallop, which are usually treated as given and pre-programmed in legged robots for efficient locomotion at different speeds. However, fixing a set of pre-programmed gaits limits the generality of locomotion. Recent animal motor studies show that these conventional gaits are only prevalent in ideal flat terrain conditions while real-world locomotion is unstructured and more like bouts of intermittent steps. What principles could lead to both structured and unstructured patterns across mammals and how to synthesize them in robots? In this work, we take an analysis-by-synthesis approach and learn to move by minimizing mechanical energy. We demonstrate that learning to minimize energy consumption plays a key role in the emergence of natural locomotion gaits at different speeds in real quadruped robots. The emergent gaits are structured in ideal terrains and look similar to that of horses and sheep. The same approach leads to unstructured gaits in rough terrains which is consistent with the findings in animal motor control. We validate our hypothesis in both simulation and real hardware across natural terrains. Videos at https://energy-locomotion.github.io", "keywords": "Locomotion;Gaits;Biomechanics;Energetics;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/a64af0261543cdbbb7227d10f51605d6bd87a065.zip", "author": "Zipeng Fu;Ashish Kumar;Jitendra Malik;Deepak Pathak", "authorids": "~Zipeng_Fu1;~Ashish_Kumar1;~Jitendra_Malik2;~Deepak_Pathak1", "gender": "M;M;M;M", "homepage": "https://zipengfu.github.io;https://ashish-kmr.github.io/;https://people.eecs.berkeley.edu/~malik/;https://www.cs.cmu.edu/~dpathak/", "dblp": "245/1504;34/5378;58/2944;155/9860", "google_scholar": "wMcPTbEAAAAJ;Oj-2ZNEAAAAJ;oY9R5YQAAAAJ;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ", "orcid": ";;0000-0003-3695-1580;", "linkedin": "zipengfu;;;pathak22/", "or_profile": "~Zipeng_Fu1;~Ashish_Kumar1;~Jitendra_Malik2;~Deepak_Pathak1", "aff": "Carnegie Mellon University;University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University", "aff_domain": "cmu.edu;berkeley.edu;berkeley.edu;cmu.edu", "position": "MS student;Graduate Student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nfu2021minimizing,\ntitle={Minimizing Energy Consumption Leads to the Emergence of Gaits in Legged Robots},\nauthor={Zipeng Fu and Ashish Kumar and Jitendra Malik and Deepak Pathak},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=PfC1Jr6gvuP}\n}", "github": "", "project": "", "reviewers": "CQpm;hrLb;gEhV;h727", "site": "https://openreview.net/forum?id=PfC1Jr6gvuP", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9989711798764689458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Pp0Co2vU28N", "title": "IMU-Assisted Learning of Single-View Rolling Shutter Correction", "track": "main", "status": "Poster", "tldr": "", "abstract": "Rolling shutter distortion is highly undesirable for photography and computer vision algorithms (e.g., visual SLAM) because pixels can be potentially captured at different times and poses. In this paper, we propose a deep neural network to predict depth and row-wise pose from a single image for rolling shutter correction. Our contribution in this work is to incorporate inertial measurement unit (IMU) data into the pose refinement process, which, compared to the state-of-the-art, greatly enhances the pose prediction. The improved accuracy and robustness make it possible for numerous vision algorithms to use imagery captured by rolling shutter cameras and produce highly accurate results. We also extend a dataset to have real rolling shutter images, IMU data, depth maps, camera poses, and corresponding global shutter images for rolling shutter correction training. We demonstrate the efficacy of the proposed method by evaluating the performance of Direct Sparse Odometry (DSO) algorithm on rolling shutter imagery corrected using the proposed approach. Results show marked improvements of the DSO algorithm over using uncorrected imagery, validating the proposed approach.", "keywords": "Rolling Shutter Correction;IMU;Learning", "primary_area": "", "supplementary_material": "/attachment/020c40c9ae502b85cceeb8e2c0554a79af67a877.zip", "author": "Jiawei Mo;Md Jahidul Islam;Junaed Sattar", "authorids": "~Jiawei_Mo1;~Md_Jahidul_Islam1;~Junaed_Sattar2", "gender": "Not Specified;M;M", "homepage": "https://jiawei-mo.github.io/;https://jahid.ece.ufl.edu/;https://junaedsattar.cs.umn.edu", "dblp": ";;", "google_scholar": ";XuEzu5cAAAAJ;cgaU4UkAAAAJ", "orcid": ";;", "linkedin": "jiawei-mo/;;", "or_profile": "~Jiawei_Mo1;~Md_Jahidul_Islam1;~Junaed_Sattar2", "aff": "University of Minnesota, Twin Cities;University of Minnesota, Minneapolis;University of Minnesota, Minneapolis", "aff_domain": "umn.edu;umn.edu;umn.edu", "position": "PhD Student ;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmo2021imuassisted,\ntitle={{IMU}-Assisted Learning of Single-View Rolling Shutter Correction},\nauthor={Jiawei Mo and Md Jahidul Islam and Junaed Sattar},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=Pp0Co2vU28N}\n}", "github": "", "project": "", "reviewers": "DMMe;kkCY;BmJn", "site": "https://openreview.net/forum?id=Pp0Co2vU28N", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5211711891375004687&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Twin Cities;Minneapolis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Pxs5XwId51n", "title": "\"Good Robot! Now Watch This!\": Repurposing Reinforcement Learning for Task-to-Task Transfer", "track": "main", "status": "Poster", "tldr": "", "abstract": "Modern Reinforcement Learning (RL) algorithms are not sample efficient to train on multi-step tasks in complex domains, impeding their wider deployment in the real world. We address this problem by leveraging the insight that RL models trained to complete one set of tasks can be repurposed to complete related tasks when given just a handful of demonstrations. Based upon this insight, we propose See-SPOT-Run (SSR), a new computational approach to robot learning that enables a robot to complete a variety of real robot tasks in novel problem domains without task-specific training. SSR uses pretrained RL models to create vectors that represent model, task, and action relevance in demonstration and test scenes. SSR then compares these vectors via our Cycle Consistency Distance (CCD) metric to determine the next action to take. SSR completes 58% more task steps and 20% more trials than a baseline few-shot learning method that requires task-specific training. SSR also achieves a four order of magnitude improvement in compute efficiency and a 20% to three order of magnitude improvement in sample efficiency compared to the baseline and to training RL models from scratch. To our knowledge, we are the first to address multi-step tasks from demonstration on a real robot without task-specific training, where both the visual input and action space output are high dimensional. Code will be made available.", "keywords": "Deep Learning in Grasping and Manipulation;Computer Vision for Robotic Applications;Imitation Learning;Reinforcement Learning;Learning from Demonstration", "primary_area": "", "supplementary_material": "/attachment/7c5c3e319cffed3d19d070097ace706cd3ae03a0.zip", "author": "Andrew Hundt;Aditya Murali;Priyanka Hubli;Ran Liu;Nakul Gopalan;Matthew Gombolay;Gregory D. Hager", "authorids": "~Andrew_Hundt1;~Aditya_Murali1;~Priyanka_Hubli1;~Ran_Liu3;~Nakul_Gopalan1;~Matthew_Gombolay1;~Gregory_D._Hager1", "gender": "M;M;F;M;;M;M", "homepage": "https://ahundt.github.io/;http://www.github.com/adit98;;https://liuran.net;http://nakulgopalan.github.io/;https://core-robotics.gatech.edu/;http://www.cs.jhu.edu/~hager/", "dblp": "190/7614;;;;135/8173;144/1022;12/5814", "google_scholar": "N0JJHwkAAAAJ;yN5fTGEAAAAJ;;bshgBtkAAAAJ;dPsQR14AAAAJ;Ihyz20wAAAAJ;https://scholar.google.com.tw/citations?user=ivApfKcAAAAJ", "orcid": "0000-0003-2023-1810;;;0000-0002-0866-9281;;;", "linkedin": ";;priyanka-hubli/;rliu14/;;;gregory-hager-11a1056/", "or_profile": "~Andrew_Hundt1;~Aditya_Murali1;~Priyanka_Hubli1;~Ran_Liu3;~Nakul_Gopalan1;~Matthew_Gombolay1;~Gregory_D._Hager1", "aff": "Johns Hopkins University;Johns Hopkins University;;Johns Hopkins University;Georgia Institute of Technology;Georgia Institute of Technology;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu;;jhu.edu;gatech.edu;cc.gatech.edu;jhu.edu", "position": "PhD student;MS student;;PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhundt2021good,\ntitle={''Good Robot! Now Watch This!'': Repurposing Reinforcement Learning for Task-to-Task Transfer},\nauthor={Andrew Hundt and Aditya Murali and Priyanka Hubli and Ran Liu and Nakul Gopalan and Matthew Gombolay and Gregory D. Hager},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=Pxs5XwId51n}\n}", "github": "", "project": "", "reviewers": "YJsz;tTpU;Hszk;FEvJ", "site": "https://openreview.net/forum?id=Pxs5XwId51n", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 6, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9106788718126517751&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Johns Hopkins University;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.gatech.edu", "aff_unique_abbr": "JHU;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Q9VHQw594zx", "title": "Learning A Risk-Aware Trajectory Planner From Demonstrations Using Logic Monitor", "track": "main", "status": "Poster", "tldr": "", "abstract": "Risk awareness is an important factor to consider when deploying policies on robots in the real-world. Defining the right set of risk metrics can be difficult. In this work, we use a differentiable logic monitor that keeps track of the environmental agents' behaviors and provides a risk metric that the controlled agent can incorporate during planning. We introduce LogicRiskNet, a learning structure that can be constructed from temporal logic formulas describing rules governing a safe agent's behaviors. The network's parameters can be learned from demonstration data. By using temporal logic, the network provides an interpretable architecture that can explain what risk metrics are important to the human. We integrate LogicRiskNet in an inverse optimal control (IOC) framework and show that we can learn to generate trajectory plans that accurately mimic the expert's risk handling behaviors solely from demonstration data. We evaluate our method on a real-world driving dataset. ", "keywords": "Learning from demonstrations;temporal logic;trajectory planning;autonomous driving;real-time verification", "primary_area": "", "supplementary_material": "/attachment/3b0856d385afa3c17bd209bf2e1dce7c111e37d0.zip", "author": "Xiao Li;Jonathan DeCastro;Cristian Ioan Vasile;Sertac Karaman;Daniela Rus", "authorids": "~Xiao_Li1;~Jonathan_DeCastro1;~Cristian_Ioan_Vasile1;~Sertac_Karaman1;~Daniela_Rus1", "gender": ";M;M;M;F", "homepage": "https://xli4217.github.io/;http://jadecastro.github.io/;https://cristianvasile.com;https://karaman.mit.edu;https://www.csail.mit.edu/person/daniela-rus", "dblp": ";139/3569;116/4751;45/1718;r/DanielaRus", "google_scholar": ";Pnbjx1AAAAAJ;lS3XNekAAAAJ;Vu-Zb7EAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-0933-9671;0000-0002-1132-1462;;", "linkedin": ";jonathan-decastro-b8748715/;;;", "or_profile": "~Xiao_Li1;~Jonathan_DeCastro1;~Cristian_Ioan_Vasile1;~Sertac_Karaman1;~Daniela_Rus1", "aff": "Massachusetts Institute of Technology;Toyota Research Institute;Lehigh University;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;tri.global;lehigh.edu;mit.edu;mit.edu", "position": "Postdoc;Researcher;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2021learning,\ntitle={Learning A Risk-Aware Trajectory Planner From Demonstrations Using Logic Monitor},\nauthor={Xiao Li and Jonathan DeCastro and Cristian Ioan Vasile and Sertac Karaman and Daniela Rus},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=Q9VHQw594zx}\n}", "github": "", "project": "", "reviewers": "3SPR;gdH3;Qizq;c3zR", "site": "https://openreview.net/forum?id=Q9VHQw594zx", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Toyota Research Institute;Lehigh University", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.tri.global;https://www.lehigh.edu", "aff_unique_abbr": "MIT;TRI;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Qdb1ODTQTnL", "title": "Learning to Regrasp by Learning to Place", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this paper, we explore whether a robot can learn to regrasp a diverse set of objects to achieve various desired grasp poses. Regrasping is needed whenever a robot's current grasp pose fails to perform desired manipulation tasks. Endowing robots with such an ability has applications in many domains such as manufacturing or domestic services. Yet, it is a challenging task due to the large diversity of geometry in everyday objects and the high dimensionality of the state and action space. In this paper, we propose a system for robots to take partial point clouds of an object and the supporting environment as inputs and output a sequence of pick-and-place operations to transform an initial object grasp pose to the desired object grasp poses. The key technique includes a neural stable placement predictor and a regrasp graph based solution through leveraging and changing the surrounding environment. We introduce a new and challenging synthetic dataset for learning and evaluating the proposed approach. We demonstrate the effectiveness of our proposed system with both simulator and real-world experiments. More videos and visualization examples are available on our project https://sites.google.com/view/regrasp.", "keywords": "Regrasping;Deep Learning;Robotic Manipulation", "primary_area": "", "supplementary_material": "/attachment/a7561d5d08b9619506ad7468703eda4e7b3c8f53.zip", "author": "Shuo Cheng;Kaichun Mo;Lin Shao", "authorids": "~Shuo_Cheng1;~Kaichun_Mo1;~Lin_Shao2", "gender": "M;M;M", "homepage": "https://sites.google.com/view/shuocheng/home;https://cs.stanford.edu/~kaichun/;https://linsats.github.io/", "dblp": "179/0863;172/1283;26/8546-2", "google_scholar": "5CL_0qMAAAAJ;pL7JsOsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shuo_Cheng1;~Kaichun_Mo1;~Lin_Shao2", "aff": ";Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu", "position": ";PhD student;PhD student", "bibtex": "@inproceedings{\ncheng2021learning,\ntitle={Learning to Regrasp by Learning to Place},\nauthor={Shuo Cheng and Kaichun Mo and Lin Shao},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=Qdb1ODTQTnL}\n}", "github": "", "project": "", "reviewers": "FiyG;bwfS;nUvc", "site": "https://openreview.net/forum?id=Qdb1ODTQTnL", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8153287896172317735&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "R4E8wTUtxdl", "title": "Learning to Jump from Pixels", "track": "main", "status": "Poster", "tldr": "", "abstract": "Today's robotic quadruped systems can robustly walk over a diverse range of rough but continuous terrains, where the terrain elevation varies gradually. Locomotion on discontinuous terrains, such as those with gaps or obstacles, presents a complementary set of challenges. In discontinuous settings, it becomes necessary to plan ahead using visual inputs and to execute agile behaviors beyond robust walking, such as jumps. Such dynamic motion results in significant motion of onboard sensors, which introduces a new set of challenges for real-time visual processing. The requirements of agility and terrain awareness in this setting reinforce the need for robust control. We present Depth-based Impulse Control (DIC), a method for synthesizing highly agile visually-guided locomotion behaviors. DIC affords the flexibility of model-free learning but regularizes behavior through explicit model-based optimization of ground reaction forces. We evaluate performance both in simulation and in the real world.", "keywords": "Locomotion;Vision;Hierarchical Control", "primary_area": "", "supplementary_material": "/attachment/a67afee5e70a9774397b79d05ec6a6fdbcfbff0c.zip", "author": "Gabriel B Margolis;Tao Chen;Kartik Paigwar;Xiang Fu;Donghyun Kim;Sang bae Kim;Pulkit Agrawal", "authorids": "~Gabriel_B_Margolis1;~Tao_Chen1;~Kartik_Paigwar1;~Xiang_Fu4;donghyunkim@cs.umass.edu;~Sang_bae_Kim1;~Pulkit_Agrawal1", "gender": "M;M;M;M;;M;M", "homepage": "https://gmargo11.github.io/;https://taochenshh.github.io;https://kartikpaigwar.github.io;https://xiangfu.co/;;https://biomimetics.mit.edu/;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": "305/0205;;238/2303;97/374-5.html;;;149/2672", "google_scholar": "Jzt5uNAAAAAJ;gdUv1PIAAAAJ;11M8bvcAAAAJ;https://scholar.google.com/citations?view_op=list_works;;;UpZmJI0AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Gabriel_B_Margolis1;~Tao_Chen1;~Kartik_Paigwar1;~Xiang_Fu4;donghyunkim@cs.umass.edu;~Sang_bae_Kim1;~Pulkit_Agrawal1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Arizona State University;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;asu.edu;mit.edu;;mit.edu;mit.edu", "position": "MS student;PhD student;MS student;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmargolis2021learning,\ntitle={Learning to Jump from Pixels},\nauthor={Gabriel B Margolis and Tao Chen and Kartik Paigwar and Xiang Fu and Donghyun Kim and Sang bae Kim and Pulkit Agrawal},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=R4E8wTUtxdl}\n}", "github": "", "project": "", "reviewers": "NUCw;dwZr;8yhZ", "site": "https://openreview.net/forum?id=R4E8wTUtxdl", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16572711378403862236&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Arizona State University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.asu.edu", "aff_unique_abbr": "MIT;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "RO4DM85Z4P7", "title": "XIRL: Cross-embodiment Inverse Reinforcement Learning", "track": "main", "status": "Oral", "tldr": "", "abstract": "We investigate the visual cross-embodiment imitation setting, in which agents learn policies from videos of other agents (such as humans) demonstrating the same task, but with stark differences in their embodiments -- shape, actions, end-effector dynamics, etc. In this work, we demonstrate that it is possible to automatically discover and learn vision-based reward functions from cross-embodiment demonstration videos that are robust to these differences. Specifically, we present a self-supervised method for Cross-embodiment Inverse Reinforcement Learning (XIRL) that leverages temporal cycle-consistency constraints to learn deep visual embeddings that capture task progression from offline videos of demonstrations across multiple expert agents, each performing the same task differently due to embodiment differences. Prior to our work, producing rewards from self-supervised embeddings typically required alignment with a reference trajectory, which may be difficult to acquire under stark embodiment differences. We show empirically that if the embeddings are aware of task-progress, simply taking the negative distance between the current state and goal state in the learned embedding space is useful as a reward for training policies with reinforcement learning. We find our learned reward function not only works for embodiments seen during training, but also generalizes to entirely new embodiments. Additionally, when transferring real-world human demonstrations to a simulated robot, we find that XIRL is more sample efficient than current best methods.", "keywords": "inverse reinforcement learning;imitation learning;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/01662cbb6c6bb2ae26a2e94614ba3bb614c583f5.zip", "author": "Kevin Zakka;Andy Zeng;Pete Florence;Jonathan Tompson;Jeannette Bohg;Debidatta Dwibedi", "authorids": "~Kevin_Zakka1;~Andy_Zeng1;~Pete_Florence1;~Jonathan_Tompson1;~Jeannette_Bohg1;~Debidatta_Dwibedi1", "gender": "M;M;;M;;M", "homepage": "https://kzakka.com/;http://andyzeng.github.io/;http://www.peteflorence.com/;http://jonathantompson.com;https://web.stanford.edu/~bohg/;https://debidatta.github.io/", "dblp": ";http://dblp.uni-trier.de/pers/hd/z/Zeng:Andy;;139/0769;52/7377;160/3739", "google_scholar": "8qHnRnsAAAAJ;q7nFtUcAAAAJ;;U_Jw8DUAAAAJ;rjnJnEkAAAAJ;EPfOJwQAAAAJ", "orcid": ";;;;0000-0002-4921-7193;", "linkedin": ";;;;;", "or_profile": "~Kevin_Zakka1;~Andy_Zeng1;~Pete_Florence1;~Jonathan_Tompson1;~Jeannette_Bohg1;~Debidatta_Dwibedi1", "aff": "Stanford University;Google;Google;Google DeepMind;Stanford University;Google", "aff_domain": "stanford.edu;google.com;google.com;google.com;stanford.edu;google.com", "position": "MS student;Research Scientist;Research Scientist;Researcher;Assistant Professor;Google", "bibtex": "@inproceedings{\nzakka2021xirl,\ntitle={{XIRL}: Cross-embodiment Inverse Reinforcement Learning},\nauthor={Kevin Zakka and Andy Zeng and Pete Florence and Jonathan Tompson and Jeannette Bohg and Debidatta Dwibedi},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=RO4DM85Z4P7}\n}", "github": "", "project": "", "reviewers": "tUkm;dDLT;Fbyh", "site": "https://openreview.net/forum?id=RO4DM85Z4P7", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 134, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=908167754345612275&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Stanford;Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "TEQWRlncJVm", "title": "Self-supervised Reinforcement Learning with Independently Controllable Subgoals", "track": "main", "status": "Poster", "tldr": "", "abstract": "To successfully tackle challenging manipulation tasks, autonomous agents must learn a diverse set of skills and how to combine them.\nRecently, self-supervised agents that set their own abstract goals by exploiting the discovered structure in the environment were shown to perform well on many different tasks.\nIn particular, some of them were applied to learn basic manipulation skills in compositional multi-object environments. \nHowever, these methods learn skills without taking the dependencies between objects into account. Thus, the learned skills are difficult to combine in realistic environments.\nWe propose a novel self-supervised agent that estimates relations between environment components and uses them to independently control different parts of the environment state. In addition, the estimated relations between objects can be used to decompose a complex goal into a compatible sequence of subgoals.\nWe show that, by using this framework, an agent can efficiently and automatically learn manipulation tasks in multi-object environments with different relations between objects. ", "keywords": "object-centric representations;relations;self-supervised reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/4caf064e41096b3bdfae47cb6ae727dffe86766e.zip", "author": "Andrii Zadaianchuk;Georg Martius;Fanny Yang", "authorids": "~Andrii_Zadaianchuk1;~Georg_Martius1;~Fanny_Yang1", "gender": "M;M;", "homepage": "https://zadaianchuk.github.io/;https://uni-tuebingen.de/de/264672;http://www.fanny-yang.de", "dblp": "274/9441;47/2706;126/4852", "google_scholar": ";https://scholar.google.de/citations?user=b-JF-UIAAAAJ;BfDKicQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Andrii_Zadaianchuk1;~Georg_Martius1;~Fanny_Yang1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems;Swiss Federal Institute of Technology", "aff_domain": "is.mpg.de;tuebingen.mpg.de;ethz.ch", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nzadaianchuk2021selfsupervised,\ntitle={Self-supervised Reinforcement Learning with Independently Controllable Subgoals},\nauthor={Andrii Zadaianchuk and Georg Martius and Fanny Yang},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=TEQWRlncJVm}\n}", "github": "", "project": "", "reviewers": "h8jP;uiwp;BPut", "site": "https://openreview.net/forum?id=TEQWRlncJVm", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11718197532466881087&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems;Swiss Federal Institute of Technology", "aff_unique_dep": "Intelligent Systems;Intelligent Systems;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de;https://www.ethz.ch", "aff_unique_abbr": "MPI-IS;MPI-IS;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;Switzerland" }, { "id": "TSuSGVkjuXd", "title": "Smooth Exploration for Robotic Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Reinforcement learning (RL) enables robots to learn skills from interactions with the real world.\nIn practice, the unstructured step-based exploration used in Deep RL -- often very successful in simulation -- leads to jerky motion patterns on real robots.\nConsequences of the resulting shaky behavior are poor exploration, or even damage to the robot.\nWe address these issues by adapting state-dependent exploration (SDE) to current Deep RL algorithms.\nTo enable this adaptation, we propose two extensions to the original SDE, using more general features and re-sampling the noise periodically, which leads to a new exploration method generalized state-dependent exploration (gSDE).\nWe evaluate gSDE both in simulation, on PyBullet continuous control tasks, and directly on three different real robots: a tendon-driven elastic robot, a quadruped and an RC car.\nThe noise sampling interval of gSDE enables a compromise between performance and smoothness, which allows training directly on the real robots without loss of performance.", "keywords": "Robotics;Reinforcement Learning;Exploration;Real World", "primary_area": "", "supplementary_material": "/attachment/52cfe076fec5d735f10affdf411ef186bb1642b6.zip", "author": "Antonin Raffin;Jens Kober;Freek Stulp", "authorids": "~Antonin_Raffin1;~Jens_Kober1;~Freek_Stulp1", "gender": "M;M;M", "homepage": "https://araffin.github.io/;http://www.jenskober.de/;", "dblp": "225/7772;69/6636;73/478", "google_scholar": "kik4AwIAAAAJ;XOWZzUcAAAAJ;https://scholar.google.de/citations?user=aHPX6PsAAAAJ", "orcid": "0000-0001-6036-6950;0000-0001-7257-5434;", "linkedin": ";jens-kober/;", "or_profile": "~Antonin_Raffin1;~Jens_Kober1;~Freek_Stulp1", "aff": "DLR;Delft University of Technology;German Aerospace Center (DLR)", "aff_domain": "dlr.de;tudelft.nl;dlr.de", "position": "PhD student;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nraffin2021smooth,\ntitle={Smooth Exploration for Robotic Reinforcement Learning},\nauthor={Antonin Raffin and Jens Kober and Freek Stulp},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=TSuSGVkjuXd}\n}", "github": "", "project": "", "reviewers": "bar8;uHC8;8waC", "site": "https://openreview.net/forum?id=TSuSGVkjuXd", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 93, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3718142825784379427&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;2", "aff_unique_norm": "Deutsches Zentrum f\u00fcr Luft- und Raumfahrt;Delft University of Technology;German Aerospace Center", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dlr.de;https://www.tudelft.nl;https://www.dlr.de", "aff_unique_abbr": "DLR;TU Delft;DLR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "TavPBk4Zs9m", "title": "BEHAVIOR: Benchmark for Everyday Household Activities in Virtual, Interactive, and Ecological Environments", "track": "main", "status": "Poster", "tldr": "", "abstract": "We introduce BEHAVIOR, a benchmark for embodied AI with 100 activities in simulation, spanning a range of everyday household chores such as cleaning, maintenance, and food preparation. These activities are designed to be realistic, diverse and complex, aiming to reproduce the challenges that agents must face in the real world. Building such a benchmark poses three fundamental difficulties for each activity: definition (it can differ by time, place, or person), instantiation in a simulator, and evaluation. BEHAVIOR addresses these with three innovations. First, we propose a predicate logic-based description language for expressing an activity\u2019s initial and goal conditions, enabling generation of diverse instances for any activity. Second, we identify the simulator-agnostic features required by an underlying environment to support BEHAVIOR, and demonstrate in one such simulator. Third, we introduce a set of metrics to measure task progress and efficiency, absolute and relative to human demonstrators. We include 500 human demonstrations in virtual reality (VR) to serve as the human ground truth. Our experiments demonstrate that even state-of-the-art embodied AI solutions struggle with the level of realism, diversity, and complexity imposed by the activities in our benchmark. We make BEHAVIOR publicly available at behavior.stanford.edu to facilitate and calibrate the development of new embodied AI solutions.", "keywords": "Embodied AI;Benchmarking;Household activities", "primary_area": "", "supplementary_material": "/attachment/259a42bbbd729242f282f7a81418d2f2e87ee2d9.zip", "author": "Sanjana Srivastava;Chengshu Li;Michael Lingelbach;Roberto Mart\u00edn-Mart\u00edn;Fei Xia;Kent Elliott Vainio;Zheng Lian;Cem Gokmen;Shyamal Buch;Karen Liu;Silvio Savarese;Hyowon Gweon;Jiajun Wu;Li Fei-Fei", "authorids": "~Sanjana_Srivastava2;~Chengshu_Li1;~Michael_Lingelbach1;~Roberto_Mart\u00edn-Mart\u00edn1;~Fei_Xia1;~Kent_Elliott_Vainio1;~Zheng_Lian2;~Cem_Gokmen1;~Shyamal_Buch1;~Karen_Liu1;~Silvio_Savarese1;~Hyowon_Gweon1;~Jiajun_Wu1;~Li_Fei-Fei1", "gender": ";M;M;M;M;;;M;Unspecified;;M;;M;F", "homepage": ";https://www.chengshuli.me/;;https://robertomartinmartin.com/;;;;https://www.cemgokmen.com;https://cs.stanford.edu/~shyamal;https://cs.stanford.edu/~karenliu;;http://sll.stanford.edu;https://jiajunwu.com;https://profiles.stanford.edu/fei-fei-li", "dblp": ";63/6091-2;;153/7670;;;;220/3187;207/8458;;50/3578;;117/4768;79/2528", "google_scholar": "sqTh_dwAAAAJ;yay_v9EAAAAJ;d4xUjL8AAAAJ;XOJE8OEAAAAJ;pqP5_PgAAAAJ;;;wCiI8oUAAAAJ;https://scholar.google.com/citations?hl=en;i28fU0MAAAAJ;ImpbxLsAAAAJ;;2efgcS0AAAAJ;rDfyQnIAAAAJ", "orcid": ";0000-0002-9027-8617;;0000-0002-9586-2759;0000-0003-4343-1444;;;0000-0001-9446-6052;;0000-0001-5926-0905;;;0000-0002-4176-343X;", "linkedin": "sanjana-srivastava5/;chengshu/;;;;kent-vainio-4749b0145/;https://linkedin.com/in/zheng-l-205124133;cgokmen/;;;;;jiajunwu/;fei-fei-li-4541247/", "or_profile": "~Sanjana_Srivastava2;~Chengshu_Li1;~Michael_Lingelbach1;~Roberto_Mart\u00edn-Mart\u00edn1;~Fei_Xia1;~Kent_Elliott_Vainio1;~Zheng_Lian2;~Cem_Gokmen1;~Shyamal_Buch1;~Karen_Liu1;~Silvio_Savarese1;~Hyowon_Gweon1;~Jiajun_Wu1;~Li_Fei-Fei1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;Postdoc;PhD student;MS student;Undergrad student;MS student;PhD student;;Associate professor;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsrivastava2021behavior,\ntitle={{BEHAVIOR}: Benchmark for Everyday Household Activities in Virtual, Interactive, and Ecological Environments},\nauthor={Sanjana Srivastava and Chengshu Li and Michael Lingelbach and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n and Fei Xia and Kent Elliott Vainio and Zheng Lian and Cem Gokmen and Shyamal Buch and Karen Liu and Silvio Savarese and Hyowon Gweon and Jiajun Wu and Li Fei-Fei},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=TavPBk4Zs9m}\n}", "github": "", "project": "", "reviewers": "GSeR;Ljtb;NSMD;BvZu", "site": "https://openreview.net/forum?id=TavPBk4Zs9m", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 29, "authors#_avg": 14, "corr_rating_confidence": 0, "gs_citation": 176, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14938702296449997955&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "TsqkJJMgHkk", "title": "FabricFlowNet: Bimanual Cloth Manipulation with a Flow-based Policy", "track": "main", "status": "Poster", "tldr": "", "abstract": "We address the problem of goal-directed cloth manipulation, a challenging task due to the deformability of cloth. Our insight is that optical flow, a technique normally used for motion estimation in video, can also provide an effective representation for corresponding cloth poses across observation and goal images. We introduce FabricFlowNet (FFN), a cloth manipulation policy that leverages flow as both an input and as an action representation to improve performance. FabricFlowNet also elegantly switches between bimanual and single-arm actions based on the desired goal. We show that FabricFlowNet significantly outperforms state-of-the-art model-free and model-based cloth manipulation policies that take image input. We also present real-world experiments on a bimanual system, demonstrating effective sim-to-real transfer. Finally, we show that our method generalizes when trained on a single square cloth to other cloth shapes, such as T-shirts and rectangular cloths. Video and other supplementary materials are available at: https://sites.google.com/view/fabricflownet.", "keywords": "deformable object manipulation;optical flow;bimanual manipulation;cloth manipulation", "primary_area": "", "supplementary_material": "/attachment/b201c60abaaa26262843923a025d14663b03ed49.zip", "author": "Thomas Weng;Sujay Man Bajracharya;Yufei Wang;Khush Agrawal;David Held", "authorids": "~Thomas_Weng1;~Sujay_Man_Bajracharya1;~Yufei_Wang4;~Khush_Agrawal1;~David_Held1", "gender": "M;;;M;M", "homepage": ";https://www.sujaybajracharya.me;https://yufeiwang63.github.io/;https://khush3.github.io/;http://davheld.github.io/", "dblp": ";;;;22/11147", "google_scholar": ";;HQl9718AAAAJ;46LyBvoAAAAJ;0QtU-NsAAAAJ", "orcid": "0000-0002-0874-6507;;;;", "linkedin": ";;;khush-agrawal/;", "or_profile": "~Thomas_Weng1;~Sujay_Man_Bajracharya1;~Yufei_Wang4;~Khush_Agrawal1;~David_Held1", "aff": "Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Visvesvaraya National Institute Of Technology;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cs.cmu.edu;vnit.ac.in;cmu.edu", "position": "PhD student;MS student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nweng2021fabricflownet,\ntitle={FabricFlowNet: Bimanual Cloth Manipulation with a Flow-based Policy},\nauthor={Thomas Weng and Sujay Man Bajracharya and Yufei Wang and Khush Agrawal and David Held},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=TsqkJJMgHkk}\n}", "github": "", "project": "", "reviewers": "sL8a;RKFg;a9Nv", "site": "https://openreview.net/forum?id=TsqkJJMgHkk", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8873019281781665223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Visvesvaraya National Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://vnit.ac.in", "aff_unique_abbr": "CMU;VNIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;India" }, { "id": "U0Q8CrtBJxJ", "title": "Beyond Pick-and-Place: Tackling Robotic Stacking of Diverse Shapes", "track": "main", "status": "Poster", "tldr": "", "abstract": "We study the problem of robotic stacking with objects of complex geometry. We propose a challenging and diverse set of such objects that was carefully designed to require strategies beyond a simple \u201cpick-and-place\u201d solution. Our method is a reinforcement learning (RL) approach combined with vision-based interactive policy distillation and simulation-to-reality transfer. Our learned policies can efficiently handle multiple object combinations in the real world and exhibit a large variety of stacking skills. In a large experimental study, we investigate what choices matter for learning such general vision-based agents in simulation, and what affects optimal transfer to the real robot. We then leverage data collected by such policies and improve upon them with offline RL. A video and a blog post of our work are provided as supplementary material.", "keywords": "sim-to-real;offline RL;manipulation;stacking;robot learning", "primary_area": "", "supplementary_material": "/attachment/f286aa78f4ce9ae939cc0f6d01c85b0c0756e10e.zip", "author": "Alex X. Lee;Coline Manon Devin;Yuxiang Zhou;Thomas Lampe;Konstantinos Bousmalis;Jost Tobias Springenberg;Arunkumar Byravan;Abbas Abdolmaleki;Nimrod Gileadi;David Khosid;Claudio Fantacci;Jose Enrique Chen;Akhil Raju;Rae Jeong;Michael Neunert;Antoine Laurens;Stefano Saliceti;Federico Casarini;Martin Riedmiller;raia hadsell;Francesco Nori", "authorids": "~Alex_X._Lee1;~Coline_Manon_Devin1;~Yuxiang_Zhou2;~Thomas_Lampe1;~Konstantinos_Bousmalis1;~Jost_Tobias_Springenberg1;~Arunkumar_Byravan1;~Abbas_Abdolmaleki3;~Nimrod_Gileadi1;~David_Khosid1;cfantacci@google.com;josechenf@google.com;akhilraju@google.com;raejeong@google.com;~Michael_Neunert1;alaurens@google.com;~Stefano_Saliceti1;fcasarini@google.com;~Martin_Riedmiller1;~raia_hadsell1;~Francesco_Nori2", "gender": "M;;M;;M;M;M;;M;M;;;;;M;;M;;M;F;M", "homepage": "http://people.eecs.berkeley.edu/~alexlee_gk/;;https://yuxiang-zhou.github.io/;;;http://www.springenberg-tobias.de;https://homes.cs.washington.edu/~barun/;;;;;;;;;;;;https://www.riedmiller.me/;http://www.raiahadsell.com;", "dblp": "153/7674;;27/10149;139/5934;http://dblp.org/pers/hd/b/Bousmalis:Konstantinos;;151/9400;;;;;;;;153/7715;;;;;http://dblp.uni-trier.de/pers/hd/h/Hadsell:Raia;21/3290", "google_scholar": "8-p9CLsAAAAJ;;https://scholar.google.co.uk/citations?user=3dYhzNQAAAAJ;;wtRVnsYAAAAJ;;obYwWiMAAAAJ;;snHVatUAAAAJ;WjWBsBUAAAAJ;;;;;;;UKrS1_IAAAAJ;;1gVfqpcAAAAJ;EWQnacoAAAAJ;AqlbAj8AAAAJ", "orcid": ";;;;;;;;;;;;;;;;;;;;0000-0003-3763-6873", "linkedin": ";;;;;;;;nimrod-gileadi-6669b422;davidkhosid;;;;;;;stefanosaliceti/;;;;", "or_profile": "~Alex_X._Lee1;~Coline_Manon_Devin1;~Yuxiang_Zhou2;~Thomas_Lampe1;~Konstantinos_Bousmalis1;~Jost_Tobias_Springenberg1;~Arunkumar_Byravan1;~Abbas_Abdolmaleki3;~Nimrod_Gileadi1;~David_Khosid1;cfantacci@google.com;josechenf@google.com;akhilraju@google.com;raejeong@google.com;~Michael_Neunert1;alaurens@google.com;~Stefano_Saliceti1;fcasarini@google.com;~Martin_Riedmiller1;~raia_hadsell1;~Francesco_Nori2", "aff": "Google DeepMind;;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google;Google;Google DeepMind;Google DeepMind;;;;;;;Google DeepMind;;;Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;;deepmind.com;deepmind.com;google.com;google.com;google.com;google.com;deepmind.com;deepmind.com;;;;;;;deepmind.com;;;deepmind.com;deepmind.com", "position": "Research Scientist;;Research Engineer;Researcher;Research Scientist;Researcher;Research Scientist;research scientist;Software Engineer;Research Engineer;;;;;;;Mechanical Research Engineer;;;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlee2021beyond,\ntitle={Beyond Pick-and-Place: Tackling Robotic Stacking of Diverse Shapes},\nauthor={Alex X. Lee and Coline Manon Devin and Yuxiang Zhou and Thomas Lampe and Konstantinos Bousmalis and Jost Tobias Springenberg and Arunkumar Byravan and Abbas Abdolmaleki and Nimrod Gileadi and David Khosid and Claudio Fantacci and Jose Enrique Chen and Akhil Raju and Rae Jeong and Michael Neunert and Antoine Laurens and Stefano Saliceti and Federico Casarini and Martin Riedmiller and raia hadsell and Francesco Nori},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=U0Q8CrtBJxJ}\n}", "github": "", "project": "", "reviewers": "gHPD;SRSs;6zgt;JZ71", "site": "https://openreview.net/forum?id=U0Q8CrtBJxJ", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 21, "corr_rating_confidence": 0, "gs_citation": 118, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7693247391201444786&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;1;0;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "U1GhcnR4jNI", "title": "Language Grounding with 3D Objects", "track": "main", "status": "Poster", "tldr": "", "abstract": "Seemingly simple natural language requests to a robot are generally underspecified, for example \"Can you bring me the wireless mouse?\" Flat images of candidate mice may not provide the discriminative information needed for \"wireless.\" The world, and objects in it, are not flat images but complex 3D shapes. If a human requests an object based on any of its basic properties, such as color, shape, or texture, robots should perform the necessary exploration to accomplish the task. In particular, while substantial effort and progress has been made on understanding explicitly visual attributes like color and category, comparatively little progress has been made on understanding language about shapes and contours. In this work, we introduce a novel reasoning task that targets both visual and non-visual language about 3D objects. Our new benchmark ShapeNet Annotated with Referring Expressions (SNARE) requires a model to choose which of two objects is being referenced by a natural language description. We introduce several CLIP-based models for distinguishing objects and demonstrate that while recent advances in jointly modeling vision and language are useful for robotic language understanding, it is still the case that these image-based models are weaker at understanding the 3D nature of objects -- properties which play a key role in manipulation. We find that adding view estimation to language grounding models improves accuracy on both SNARE and when identifying objects referred to in language on a robot platform, but note that a large gap remains between these models and human performance.", "keywords": "Benchmark;Language Grounding;Vision;3D", "primary_area": "", "supplementary_material": "/attachment/6010193ddf9f0fcdc47e69b838f7893a0fde7628.zip", "author": "Jesse Thomason;Mohit Shridhar;Yonatan Bisk;Chris Paxton;Luke Zettlemoyer", "authorids": "~Jesse_Thomason1;~Mohit_Shridhar1;~Yonatan_Bisk1;~Chris_Paxton1;~Luke_Zettlemoyer1", "gender": "M;M;M;M;M", "homepage": "https://jessethomason.com/;http://mohitshridhar.com/;http://www.YonatanBisk.com;https://cpaxton.github.io/;https://www.cs.washington.edu/people/faculty/lsz/", "dblp": "130/2863;203/8577.html;38/9282;;21/6793", "google_scholar": "8BeTDr0AAAAJ;CrfsfFSiS0kC;bWoGh8UAAAAJ;I1mOQpAAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ", "orcid": "0000-0001-9199-0633;0000-0001-7382-763X;0000-0002-2111-9081;;", "linkedin": "jesse-thomason-034746171/;;yonatanbisk/;;luke-zettlemoyer-a0109b226/", "or_profile": "~Jesse_Thomason1;~Mohit_Shridhar1;~Yonatan_Bisk1;~Chris_Paxton1;~Luke_Zettlemoyer1", "aff": "Amazon;NVIDIA;Carnegie Mellon University;NVIDIA;Meta", "aff_domain": "amazon.com;nvidia.com;cmu.edu;nvidia.com;meta.com", "position": "Visiting Academic;NVIDIA;Assistant Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nthomason2021language,\ntitle={Language Grounding with 3D Objects},\nauthor={Jesse Thomason and Mohit Shridhar and Yonatan Bisk and Chris Paxton and Luke Zettlemoyer},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=U1GhcnR4jNI}\n}", "github": "", "project": "", "reviewers": "XGP9;ch3k;9Pxm;GQzH", "site": "https://openreview.net/forum?id=U1GhcnR4jNI", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18236926030183777917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Amazon;NVIDIA;Carnegie Mellon University;Meta", "aff_unique_dep": "Amazon.com, Inc.;NVIDIA Corporation;;Meta Platforms, Inc.", "aff_unique_url": "https://www.amazon.com;https://www.nvidia.com;https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "Amazon;NVIDIA;CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "UD4TAsdE-JM", "title": "LEO: Learning Energy-based Models in Factor Graph Optimization", "track": "main", "status": "Poster", "tldr": "", "abstract": "We address the problem of learning observation models end-to-end for estimation. Robots operating in partially observable environments must infer latent states from multiple sensory inputs using observation models that capture the joint distribution between latent states and observations. This inference problem can be formulated as an objective over a graph that optimizes for the most likely sequence of states using all previous measurements. Prior work uses observation models that are either known a-priori or trained on surrogate losses independent of the graph optimizer. In this paper, we propose a method to directly optimize end-to-end tracking performance by learning observation models with the graph optimizer in the loop. This direct approach may appear, however, to require the inference algorithm to be fully differentiable, which many state-of-the-art graph optimizers are not. Our key insight is to instead formulate the problem as that of energy-based learning. We propose a novel approach, LEO, for learning observation models end-to-end with graph optimizers that may be non-differentiable. LEO alternates between sampling trajectories from the graph posterior and updating the model to match these samples to ground truth trajectories. We propose a way to generate such samples efficiently using incremental Gauss-Newton solvers. We compare LEO against baselines on datasets drawn from two distinct tasks: navigation and real-world planar pushing. We show that LEO is able to learn complex observation models with lower errors and fewer samples.", "keywords": "factor graphs;energy-based learning;observation models", "primary_area": "", "supplementary_material": "/attachment/c38c5d38d43cd907ff5a92041ac8c130ad3911f2.zip", "author": "Paloma Sodhi;Eric Dexheimer;Mustafa Mukadam;Stuart Anderson;Michael Kaess", "authorids": "~Paloma_Sodhi1;~Eric_Dexheimer1;~Mustafa_Mukadam1;~Stuart_Anderson1;~Michael_Kaess1", "gender": ";M;M;M;M", "homepage": ";https://edexheim.github.io/;http://www.mustafamukadam.com;;https://www.cs.cmu.edu/~kaess/", "dblp": ";285/2954;;;26/6036", "google_scholar": ";sMsaK0gAAAAJ;yYpm9LoAAAAJ;8orqBsYAAAAJ;27eupmsAAAAJ", "orcid": ";;;;0000-0002-7590-3357", "linkedin": ";;mhmukadam/;stuartoanderson/;michaelkaess/", "or_profile": "~Paloma_Sodhi1;~Eric_Dexheimer1;~Mustafa_Mukadam1;~Stuart_Anderson1;~Michael_Kaess1", "aff": ";Imperial College London, Imperial College London;Meta AI;Meta;Carnegie Mellon University", "aff_domain": ";imperial.ac.uk;meta.com;meta.com;cmu.edu", "position": ";PhD student;Researcher;Researcher;Associate Research Professor", "bibtex": "@inproceedings{\nsodhi2021leo,\ntitle={{LEO}: Learning Energy-based Models in Factor Graph Optimization},\nauthor={Paloma Sodhi and Eric Dexheimer and Mustafa Mukadam and Stuart Anderson and Michael Kaess},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=UD4TAsdE-JM}\n}", "github": "", "project": "", "reviewers": "uKGu;bQ4N;krZP", "site": "https://openreview.net/forum?id=UD4TAsdE-JM", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10381838946107667916&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Imperial College London;Meta;Carnegie Mellon University", "aff_unique_dep": ";Meta AI;", "aff_unique_url": "https://www.imperial.ac.uk;https://meta.com;https://www.cmu.edu", "aff_unique_abbr": "ICL;Meta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "UGp6FDaxB0f", "title": "Model-free Safe Control for Zero-Violation Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "While deep reinforcement learning (DRL) has impressive performance in a variety of continuous control tasks, one critical hurdle that limits the application of DRL to physical world is the lack of safety guarantees. It is challenging for DRL agents to persistently satisfy a hard state constraint (known as the safety specification) during training. On the other hand, safe control methods with safety guarantees have been extensively studied. However, to synthesize safe control, these methods require explicit analytical models of the dynamic system; but these models are usually not available in DRL. This paper presents a model-free safe control strategy to synthesize safeguards for DRL agents, which will ensure zero safety violation during training. In particular, we present an implicit safe set algorithm, which synthesizes the safety index (also called the barrier certificate) and the subsequent safe control law only by querying a black-box dynamic function (e.g., a digital twin simulator). The theoretical results indicate the implicit safe set algorithm guarantees forward invariance and finite-time convergence to the safe set. We validate the proposed method on the state-of-the-art safety benchmark Safety Gym. Results show that the proposed method achieves zero safety violation and gains $ 95\\% \\pm 9\\%$ cumulative reward compared to state-of-the-art safe DRL methods. Moreover, it can easily scale to high-dimensional systems.", "keywords": "Robots;Safe Reinforcement Learning;Safe Control", "primary_area": "", "supplementary_material": "/attachment/49e76590b6b5ba72d74df1189ba7261732e4c274.zip", "author": "Weiye Zhao;Tairan He;Changliu Liu", "authorids": "~Weiye_Zhao1;~Tairan_He1;~Changliu_Liu1", "gender": "M;M;F", "homepage": "https://github.com/CaesarAndylaw;https://tairanhe.com;http://www.cs.cmu.edu/~cliu6/index.html", "dblp": "228/6863;263/2891.html;166/3563", "google_scholar": "P-79KOcAAAAJ;TVWH2U8AAAAJ;", "orcid": "0000-0002-8426-5238;;", "linkedin": ";tairan-he-41a904294/;", "or_profile": "~Weiye_Zhao1;~Tairan_He1;~Changliu_Liu1", "aff": "Carnegie Mellon University;Microsoft;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;microsoft.com;cmu.edu", "position": "PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nzhao2021modelfree,\ntitle={Model-free Safe Control for Zero-Violation Reinforcement Learning},\nauthor={Weiye Zhao and Tairan He and Changliu Liu},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=UGp6FDaxB0f}\n}", "github": "", "project": "", "reviewers": "nDua;2HkF;yCZE;VywJ", "site": "https://openreview.net/forum?id=UGp6FDaxB0f", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6629981983652812490&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com", "aff_unique_abbr": "CMU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "UIaodSPHNFN", "title": "Influencing Behavioral Attributions to Robot Motion During Task Execution", "track": "main", "status": "Poster", "tldr": "", "abstract": "While prior work has shown how to autonomously generate motion that communicates task-related attributes, like intent or capability, we know less about how to automatically generate motion that communicates higher-level behavioral attributes such as curiosity or competence. We propose a framework that addresses the challenges of modeling human attributions to robot motion, generating trajectories that elicit attributions, and selecting trajectories that balance attribution and task completion. The insight underpinning our approach is that attributions can be ascribed to features of the motion that don't severely impact task performance, and that these features form a convenient basis both for predicting and generating communicative motion. We illustrate the framework in a coverage task resembling household vacuum cleaning. Through a virtual interface, we collect a dataset of human attributions to robot trajectories during task execution and learn a probabilistic model that maps trajectories to attributions. We then incorporate this model into a trajectory generation mechanism that balances between task completion and communication of a desired behavioral attribute. Through an online user study on a different household layout, we find that our prediction model accurately captures human attribution for coverage tasks.", "keywords": "human-robot interaction;behavioral attribution;robot motion;human impressions", "primary_area": "", "supplementary_material": "/attachment/60699b90b0d827bdff5be638d5b931b61ea135e0.zip", "author": "Nick Walker;Christoforos Mavrogiannis;Siddhartha Srinivasa;Maya Cakmak", "authorids": "~Nick_Walker1;~Christoforos_Mavrogiannis1;~Siddhartha_Srinivasa1;~Maya_Cakmak1", "gender": "M;;M;F", "homepage": "https://nickwalker.us;https://www.chrismavrogiannis.com;https://goodrobot.ai;https://homes.cs.washington.edu/~mcakmak/", "dblp": "14/1613-1;135/8549;;65/6092", "google_scholar": "JYaJjE8AAAAJ;dTV6Zj4AAAAJ;https://scholar.google.com.tw/citations?user=RCi98EAAAAAJ;https://scholar.google.com.tw/citations?user=sPlonWcAAAAJ", "orcid": "0000-0001-7711-0003;;;", "linkedin": ";;;", "or_profile": "~Nick_Walker1;~Christoforos_Mavrogiannis1;~Siddhartha_Srinivasa1;~Maya_\u00c7akmak1", "aff": "University of Washington;University of Washington;University of Washington;University of Washington, Seattle", "aff_domain": "washington.edu;cs.washington.edu;washington.edu;uw.edu", "position": "PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwalker2021influencing,\ntitle={Influencing Behavioral Attributions to Robot Motion During Task Execution},\nauthor={Nick Walker and Christoforos Mavrogiannis and Siddhartha Srinivasa and Maya Cakmak},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=UIaodSPHNFN}\n}", "github": "", "project": "", "reviewers": "zJU5;ufvu;AY1D", "site": "https://openreview.net/forum?id=UIaodSPHNFN", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9265596983457923625&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "VfGk0ELQ4LC", "title": "Haptics-based Curiosity for Sparse-reward Tasks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robots in many real-world settings have access to force/torque sensors in their gripper and tactile sensing is often necessary for tasks that involve contact-rich motion. In this work, we leverage surprise from mismatches in haptics feedback to guide exploration in hard sparse-reward reinforcement learning tasks. Our approach, Haptics-based Curiosity (\\method{}), learns what visible objects interactions are supposed to ``feel\" like. We encourage exploration by rewarding interactions where the expectation and the experience do not match. We test our approach on a range of haptics-intensive robot arm tasks (e.g. pushing objects, opening doors), which we also release as part of this work. Across multiple experiments in a simulated setting, we demonstrate that our method is able to learn these difficult tasks through sparse reward and curiosity alone. We compare our cross-modal approach to single-modality (haptics- or vision-only) approaches as well as other curiosity-based methods and find that our method performs better and is more sample-efficient.", "keywords": "Intrinsic Motivation;Touch;Curiosity;Manipulation", "primary_area": "", "supplementary_material": "/attachment/b6b2757894a63bd778aa2d3327e244f6dbca2d1f.zip", "author": "Sai Rajeswar;Cyril Ibrahim;Nitin Surya;Florian Golemo;David Vazquez;Aaron Courville;Pedro O. Pinheiro", "authorids": "~Sai_Rajeswar2;~Cyril_Ibrahim1;~Nitin_Surya1;~Florian_Golemo1;~David_Vazquez1;~Aaron_Courville3;~Pedro_O._Pinheiro1", "gender": "M;M;M;M;;M;M", "homepage": ";;https://fgolemo.github.io/;http://www.david-vazquez.com;;;https://sairajeswar.com/", "dblp": ";;08/8643;94/8653;56/1688;223/9937;159/2116", "google_scholar": ";;https://scholar.google.de/citations?user=qvRf9xsAAAAJ;1jHvtfsAAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;https://scholar.google.ca/citations?user=BU6f7L4AAAAJ;https://scholar.google.ca/citations?user=h-sqIigAAAAJ", "orcid": ";;0000-0001-9238-7764;0000-0002-2845-8158;;;", "linkedin": ";nitinsurya/;;https://www.linkedin.com/company/david-vazquez/;;;sairajeswar/", "or_profile": "~Cyril_Ibrahim1;~Nitin_Surya1;~Florian_Golemo1;~David_Vazquez1;~Aaron_Courville3;~Pedro_O._Pinheiro1;~sai_rajeswar_mudumba1", "aff": ";;Mila;ServiceNow research;Universit\u00e9 de Montr\u00e9al;Deep Genomics;University of Montreal", "aff_domain": ";;mila.quebec;servicenow.com; ;deepgenomics.com;umontreal.ca", "position": ";;Postdoc;Researcher;Assistant Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nrajeswar2021hapticsbased,\ntitle={Haptics-based Curiosity for Sparse-reward Tasks},\nauthor={Sai Rajeswar and Cyril Ibrahim and Nitin Surya and Florian Golemo and David Vazquez and Aaron Courville and Pedro O. Pinheiro},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=VfGk0ELQ4LC}\n}", "github": "", "project": "", "reviewers": "vFAA;6jo7;2Equ;5q2G", "site": "https://openreview.net/forum?id=VfGk0ELQ4LC", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15071785076907463471&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Mila;ServiceNow;Universit\u00e9 de Montr\u00e9al;Deep Genomics;University of Montreal", "aff_unique_dep": "Quebec Artificial Intelligence Institute;research;;;", "aff_unique_url": "https://mila.quebec;https://www.servicenow.com;https://www.umontreal.ca;https://www.deepgenomics.com;https://wwwumontreal.ca", "aff_unique_abbr": "Mila;ServiceNow;UdeM;Deep Genomics;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Canada;United States" }, { "id": "WFqkGvBy78", "title": "Parallelised Diffeomorphic Sampling-based Motion Planning", "track": "main", "status": "Poster", "tldr": "", "abstract": "We propose Parallelised Diffeomorphic Sampling-based Motion Planning (PDMP). PDMP is a novel parallelised framework that uses bijective and differentiable mappings, or diffeomorphisms, to transform sampling distributions of sampling-based motion planners, in a manner akin to normalising flows. Unlike normalising flow models which use invertible neural network structures to represent these diffeomorphisms, we develop them from gradient information of desired costs, and encode desirable behaviour, such as obstacle avoidance. These transformed sampling distributions can then be used for sampling-based motion planning. A particular example is when we wish to imbue the sampling distribution with knowledge of the environment geometry, such that drawn samples are less prone to be in collision. To this end, we propose to learn a continuous occupancy representation from environment occupancy data, such that gradients of the representation defines a valid diffeomorphism and is amenable to fast parallelise evaluation. We use this to ``morph'' the sampling distribution to draw far less collision-prone samples. PDMP is able to leverage gradient information of costs, to inject specifications, in a manner similar to optimisation-based motion planning methods, but relies on drawing from a sampling distribution, retaining the tendency to find more global solutions, thereby bridging the gap between trajectory optimisation and sampling-based planning methods.", "keywords": "Sampling-based motion planning;diffeomorphism;Normalising flows;Sampling distribution;RRT;PRM", "primary_area": "", "supplementary_material": "/attachment/945ebcd1aadca6187d0020691b0757aa793a8f50.zip", "author": "Tin Lai;Weiming Zhi;Tucker Hermans;Fabio Ramos", "authorids": "~Tin_Lai2;~Weiming_Zhi1;~Tucker_Hermans2;~Fabio_Ramos1", "gender": ";M;M;", "homepage": "https://cs.tinyiu.com;https://robot-learning.cs.utah.edu;https://fabioramos.github.io/;", "dblp": ";https://dblp.uni-trier.de/pid/67/4241;22/2488;208/4705", "google_scholar": ";G5_VFfkAAAAJ;https://scholar.google.com.au/citations?user=T_mJiHoAAAAJ;", "orcid": ";0000-0003-2496-2768;;", "linkedin": ";;fabio-ramos-3256b421/;", "or_profile": "~Tin_Lai2;~Tucker_Hermans2;~Fabio_Ramos1;~William_Zhi1", "aff": "University of Sydney;University of Utah;NVIDIA;University of Sydney", "aff_domain": "sydney.edu.au;utah.edu;nvidia.com;sydney.edu.au", "position": "PhD student;Associate Professor;Principal Research Scientist;PhD student", "bibtex": "@inproceedings{\nlai2021parallelised,\ntitle={Parallelised Diffeomorphic Sampling-based Motion Planning},\nauthor={Tin Lai and Weiming Zhi and Tucker Hermans and Fabio Ramos},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=WFqkGvBy78}\n}", "github": "", "project": "", "reviewers": "QAiv;mUoR;Dafq;9VQ2", "site": "https://openreview.net/forum?id=WFqkGvBy78", "pdf_size": 0, "rating": "4;4;6;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11531875196682141399&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Sydney;University of Utah;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.sydney.edu.au;https://www.utah.edu;https://www.nvidia.com", "aff_unique_abbr": "USYD;Utah;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Australia;United States" }, { "id": "WIE9t_UwOpM", "title": "Enhancing Consistent Ground Maneuverability by Robot Adaptation to Complex Off-Road Terrains", "track": "main", "status": "Oral", "tldr": "", "abstract": "Terrain adaptation is a critical ability for a ground robot to effectively traverse unstructured off-road terrain in real-world field environments such as forests. However, the expected or planned maneuvering behaviors cannot always be accurately executed due to setbacks such as reduced tire pressure. This inconsistency negatively affects the robot's ground maneuverability and can cause slower traversal time or errors in localization. To address this shortcoming, we propose a novel method for consistent behavior generation that enables a ground robot's actual behaviors to more accurately match expected behaviors while adapting to a variety of complex off-road terrains. Our method learns offset behaviors in a self-supervised fashion to compensate for the inconsistency between the actual and expected behaviors without requiring the explicit modeling of various setbacks. To evaluate the method, we perform extensive experiments using a physical ground robot over diverse complex off-road terrain in real-world field environments. Experimental results show that our method enables a robot to improve its ground maneuverability on complex unstructured off-road terrain with more navigational behavior consistency, and outperforms previous and baseline methods, particularly so on challenging terrain such as that which is seen in forests.", "keywords": "Robot Learning;Off-road Navigation;Terrain Adaptation", "primary_area": "", "supplementary_material": "/attachment/7eec2c01a1f4745340c79d332dcf26c53d19732e.zip", "author": "Sriram Siva;Maggie Wigness;John Rogers;Hao Zhang", "authorids": "~Sriram_Siva1;~Maggie_Wigness2;john.g.rogers59.civ@mail.mil;~Hao_Zhang34", "gender": ";;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sriram_Siva1;~Maggie_Wigness2;john.g.rogers59.civ@mail.mil;~Hao_Zhang34", "aff": ";;;", "aff_domain": ";;;", "position": ";;;", "bibtex": "@inproceedings{\nsiva2021enhancing,\ntitle={Enhancing Consistent Ground Maneuverability by Robot Adaptation to Complex Off-Road Terrains},\nauthor={Sriram Siva and Maggie Wigness and John Rogers and Hao Zhang},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=WIE9t_UwOpM}\n}", "github": "", "project": "", "reviewers": "7mFQ;1Tow;M3C8;cgkr", "site": "https://openreview.net/forum?id=WIE9t_UwOpM", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6465836172356402224&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5 }, { "id": "WqUl7sNkDre", "title": "Risk-Averse Zero-Order Trajectory Optimization", "track": "main", "status": "Poster", "tldr": "", "abstract": "We introduce a simple but effective method for managing risk in zero-order trajectory optimization that involves probabilistic safety constraints and balancing of optimism in the face of epistemic uncertainty and pessimism in the face of aleatoric uncertainty of an ensemble of stochastic neural networks. Various experiments indicate that the separation of uncertainties is essential to performing well with data-driven MPC approaches in uncertain and safety-critical control environments.", "keywords": "CEM;data-driven MPC;uncertainty;model-based RL", "primary_area": "", "supplementary_material": "/attachment/b9d966ab34184144e97dff64db6693d615f65c72.zip", "author": "Marin Vlastelica;Sebastian Blaes;Cristina Pinneri;Georg Martius", "authorids": "~Marin_Vlastelica1;~Sebastian_Blaes1;~Cristina_Pinneri1;~Georg_Martius1", "gender": "M;F;M;M", "homepage": "https://sblaes.com;https://www.is.mpg.de/person/cpinneri;https://uni-tuebingen.de/de/264672;https://jimimvp.github.io/", "dblp": "163/8117;;47/2706;226/9727", "google_scholar": "https://scholar.google.de/citations?user=ftV9OHMAAAAJ;;https://scholar.google.de/citations?user=b-JF-UIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-2959-4119", "linkedin": "sebastian-blaes/;;;mvlastelica/", "or_profile": "~Sebastian_Blaes1;~Cristina_Pinneri1;~Georg_Martius1;~Marin_Vlastelica_Pogan\u010di\u01071", "aff": "Max Planck Institute for Intelligent Systems, Max Planck Institute for Intelligent Systems;Swiss Federal Institute of Technology;Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "is.tue.mpg.de;ethz.ch;tuebingen.mpg.de;tuebingen.mpg.de", "position": "PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nvlastelica2021riskaverse,\ntitle={Risk-Averse Zero-Order Trajectory Optimization},\nauthor={Marin Vlastelica and Sebastian Blaes and Cristina Pinneri and Georg Martius},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=WqUl7sNkDre}\n}", "github": "", "project": "", "reviewers": "Rk2o;pSNt;RZzg", "site": "https://openreview.net/forum?id=WqUl7sNkDre", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8675043166054722768&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.ethz.ch", "aff_unique_abbr": "MPI-IS;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;Switzerland" }, { "id": "Wt3GLZYFvEQ", "title": "Rough Terrain Navigation Using Divergence Constrained Model-Based Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Autonomous navigation of wheeled robots in rough terrain environments has been a long standing challenge. In these environments, predicting the robot's trajectory can be challenging due to the complexity of terrain interactions, as well as the divergent dynamics that cause model uncertainty to compound and propagate poorly. This inhibits the robot's long horizon decision making capabilities and often lead to shortsighted navigation strategies. We propose a model-based reinforcement learning algorithm for rough terrain traversal that trains a probabilistic dynamics model to consider the propagating effects of uncertainty. During trajectory predictions, a trajectory tracking controller is considered to predict closed-loop trajectories. Our method further increases prediction accuracy and precision by using constrained optimization to find trajectories with low divergence. Using this method, wheeled robots can find non-myopic control strategies to reach destinations with higher probability of success. We show results on simulated and real world robots navigating through rough terrain environments.", "keywords": "Rough Terrain Navigation;Model-Based Reinforcement Learning;Model Uncertainty", "primary_area": "", "supplementary_material": "/attachment/6024c7d24551dfceac9ab0143ab1b52d7f284ba4.zip", "author": "Sean J Wang;Samuel Triest;Wenshan Wang;Sebastian Scherer;Aaron Johnson", "authorids": "~Sean_J_Wang1;striest@andrew.cmu.edu;~Wenshan_Wang2;~Sebastian_Scherer1;~Aaron_Johnson1", "gender": ";;F;M;M", "homepage": ";;http://www.wangwenshan.com;https://theairlab.org;https://www.andrew.cmu.edu/user/amj1/", "dblp": ";;;253/5743;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;gxoPfIYAAAAJ;GN5Mc3UAAAAJ", "orcid": ";;;0000-0002-8373-4688;", "linkedin": ";;;sebastian-scherer-a026961a/;", "or_profile": "~Sean_J_Wang1;striest@andrew.cmu.edu;~Wenshan_Wang2;~Sebastian_Scherer1;~Aaron_Johnson1", "aff": ";;School of Computer Science, Carnegie Mellon University;Near Earth Autonomy Inc.;Carnegie Mellon University", "aff_domain": ";;cs.cmu.edu;nearearth.aero;cmu.edu", "position": ";;Researcher;Senior Scientist;Assistant Professor", "bibtex": "@inproceedings{\nwang2021rough,\ntitle={Rough Terrain Navigation Using Divergence Constrained Model-Based Reinforcement Learning},\nauthor={Sean J Wang and Samuel Triest and Wenshan Wang and Sebastian Scherer and Aaron Johnson},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=Wt3GLZYFvEQ}\n}", "github": "", "project": "", "reviewers": "stKS;8wj7;79eD;HWwA", "site": "https://openreview.net/forum?id=Wt3GLZYFvEQ", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16795179468355746910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Near Earth Autonomy", "aff_unique_dep": "School of Computer Science;", "aff_unique_url": "https://www.cmu.edu;https://www.nearearthautonomy.com", "aff_unique_abbr": "CMU;NEA", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "X2KJq-S11BC", "title": "Self-Improving Semantic Perception for Indoor Localisation", "track": "main", "status": "Poster", "tldr": "", "abstract": "We propose a novel robotic system that can improve its perception during deployment. Contrary to the established approach of learning semantics from large datasets and deploying fixed models, we propose a framework in which semantic models are continuously updated on the robot to adapt to the deployment environments. By combining continual learning with self-supervision, our robotic system learns online during deployment without external supervision. We conduct real-world experiments with robots localising in 3D floorplans. Our experiments show how the robot's semantic perception improves during deployment and how this translates into improved localisation, even across drastically different environments. We further study the risk of catastrophic forgetting that such a continuous learning setting poses. We find memory replay an effective measure to reduce forgetting and show how the robotic system can improve even when switching between different environments. On average, our system improves by 60% in segmentation and 10% in localisation accuracy compared to deployment of a fixed model, and it maintains this improvement while adapting to further environments.", "keywords": "continual learning;self-supervised learning;online learning", "primary_area": "", "supplementary_material": "/attachment/5e31b2cd7b099de4c25004fd531f9e8bf30e52e9.zip", "author": "Hermann Blum;Francesco Milano;Ren\u00e9 Zurbr\u00fcgg;Roland Siegwart;Cesar Cadena;Abel Gawel", "authorids": "~Hermann_Blum1;~Francesco_Milano1;zrene@student.ethz.ch;~Roland_Siegwart1;~Cesar_Cadena1;gawela@ethz.ch", "gender": ";M;;M;;", "homepage": "https://hermannblum.net;;;https://asl.ethz.ch/;https://n.ethz.ch/~cesarc/;", "dblp": "204/8759;198/7396-1;;55/4063;43/7748;", "google_scholar": "2Pxx8QIAAAAJ;qwSANZoAAAAJ;;MDIyLnwAAAAJ;aOns5HQAAAAJ;", "orcid": "0000-0002-1713-7877;;;0000-0002-2760-7983;0000-0002-2972-6011;", "linkedin": ";;;roland-siegwart-85466912/;;", "or_profile": "~Hermann_Blum1;~Francesco_Milano1;zrene@student.ethz.ch;~Roland_Siegwart1;~Cesar_Cadena1;gawela@ethz.ch", "aff": "Autonomous Systems Lab, ETH Z\u00fcrich;ETH Zurich;;ETH Zurich;ETH Zurich;", "aff_domain": "ethz.ch;ethz.ch;;ethz.ch;ethz.ch;", "position": "PhD student;PhD student;;Full Professor;Senior Scientist;", "bibtex": "@inproceedings{\nblum2021selfimproving,\ntitle={Self-Improving Semantic Perception for Indoor Localisation},\nauthor={Hermann Blum and Francesco Milano and Ren{\\'e} Zurbr{\\\"u}gg and Roland Siegwart and Cesar Cadena and Abel Gawel},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=X2KJq-S11BC}\n}", "github": "", "project": "", "reviewers": "jkw9;XC6X;bspZ;Ncrf", "site": "https://openreview.net/forum?id=X2KJq-S11BC", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3397668837418133396&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Autonomous Systems Lab", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "XEY7ZeEZEjI", "title": "Error-Aware Imitation Learning from Teleoperation Data for Mobile Manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "In mobile manipulation (MM), robots can both navigate within and interact with their environment and are thus able to complete many more tasks than robots only capable of navigation or manipulation. In this work, we explore how to apply imitation learning (IL) to learn continuous visuo-motor policies for MM tasks. Much prior work has shown that IL can train visuo-motor policies for either manipulation or navigation domains, but few works have applied IL to the MM domain. Doing this is challenging for two reasons: on the data side, current interfaces make collecting high-quality human demonstrations difficult, and on the learning side, policies trained on limited data can suffer from covariate shift when deployed. To address these problems, we first propose Mobile Manipulation RoboTurk (MoMaRT), a novel teleoperation framework allowing simultaneous navigation and manipulation of mobile manipulators, and collect a first-of-its-kind large scale dataset in a realistic simulated kitchen setting. We then propose a learned error detection system to address the covariate shift by detecting when an agent is in a potential failure state. We train performant IL policies and error detectors from this data, and achieve over 45% task success rate and 85% error detection success rate across multiple multi-stage tasks when trained on expert data. Additional results and video at https://sites.google.com/view/il-for-mm/home.", "keywords": "Mobile Manipulation;Imitation Learning;Error Detection", "primary_area": "", "supplementary_material": "/attachment/bc1a11e21998fe6e5e15ea83afe3a02d4d692958.zip", "author": "Josiah Wong;Albert Tung;Andrey Kurenkov;Ajay Mandlekar;Li Fei-Fei;Silvio Savarese;Roberto Mart\u00edn-Mart\u00edn", "authorids": "~Josiah_Wong1;atung3@stanford.edu;~Andrey_Kurenkov1;~Ajay_Mandlekar1;~Li_Fei-Fei1;~Silvio_Savarese1;~Roberto_Mart\u00edn-Mart\u00edn1", "gender": "M;;M;M;F;M;M", "homepage": "https://www.jdw.ong;;https://www.andreykurenkov.com;https://ai.stanford.edu/~amandlek/;https://profiles.stanford.edu/fei-fei-li;;https://robertomartinmartin.com/", "dblp": "178/8895;;;https://dblp.uni-trier.de/pers/hd/m/Mandlekar:Ajay;79/2528;50/3578;153/7670", "google_scholar": "Y0a0n5wAAAAJ;;mmiHOS4AAAAJ;MEz23joAAAAJ;rDfyQnIAAAAJ;ImpbxLsAAAAJ;XOJE8OEAAAAJ", "orcid": ";;;;;;0000-0002-9586-2759", "linkedin": "josiahw/;;;;fei-fei-li-4541247/;;", "or_profile": "~Josiah_Wong1;atung3@stanford.edu;~Andrey_Kurenkov1;~Ajay_Mandlekar1;~Li_Fei-Fei1;~Silvio_Savarese1;~Roberto_Mart\u00edn-Mart\u00edn1", "aff": "Stanford University;;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "MS student;;PhD student;PhD student;Full Professor;Associate professor;Postdoc", "bibtex": "@inproceedings{\nwong2021erroraware,\ntitle={Error-Aware Imitation Learning from Teleoperation Data for Mobile Manipulation},\nauthor={Josiah Wong and Albert Tung and Andrey Kurenkov and Ajay Mandlekar and Li Fei-Fei and Silvio Savarese and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=XEY7ZeEZEjI}\n}", "github": "", "project": "", "reviewers": "KoyB;dB9j;3mTs;56kE", "site": "https://openreview.net/forum?id=XEY7ZeEZEjI", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15594802381946211047&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "YFF73MRGUIH", "title": "LENS: Localization enhanced by NeRF synthesis", "track": "main", "status": "Poster", "tldr": "", "abstract": "Neural Radiance Fields (NeRF) have recently demonstrated photorealistic results for the task of novel view synthesis. In this paper, we propose to apply novel view synthesis to the robot relocalization problem: we demonstrate improvement of camera pose regression thanks to an additional synthetic dataset rendered by the NeRF class of algorithm. To avoid spawning novel views in irrelevant places we selected virtual camera locations from NeRF internal representation of the 3D geometry of the scene. We further improved localization accuracy of pose regressors using synthesized realistic and geometry consistent images as data augmentation during training. At the time of publication, our approach improved state of the art with a 60% lower error on Cambridge Landmarks and 7-scenes datasets. Hence, the resulting accuracy becomes comparable to structure-based methods, without any architecture modification or domain adaptation constraints. Since our method allows almost infinite generation of training data, we investigated limitations of camera pose regression depending on size and distribution of data used for training on public benchmarks. We concluded that pose regression accuracy is mostly bounded by relatively small and biased datasets rather than capacity of the pose regression model to solve the localization task.", "keywords": "visual localization;camera pose regression;novel view synthesis", "primary_area": "", "supplementary_material": "/attachment/77d268179b779c18e4ba4f0e0f28e505fef1bf94.zip", "author": "Arthur Moreau;Nathan Piasco;Dzmitry Tsishkou;Bogdan Stanciulescu;Arnaud de La Fortelle", "authorids": "~Arthur_Moreau1;nathan.piasco@huawei.com;~Dzmitry_Tsishkou1;~Bogdan_Stanciulescu1;~Arnaud_de_La_Fortelle1", "gender": "M;;;M;M", "homepage": ";;;;", "dblp": "288/1718;;;;", "google_scholar": "fMmoHX0AAAAJ;;;https://scholar.google.fr/citations?user=42aGR78AAAAJ;https://scholar.google.fr/citations?hl=en", "orcid": ";;;;", "linkedin": "moreauarthur/;;;bogdan-stanciulescu-5239012/;arnaud-de-la-fortelle-353ba15/", "or_profile": "~Arthur_Moreau1;nathan.piasco@huawei.com;~Dzmitry_Tsishkou1;~Bogdan_Stanciulescu1;~Arnaud_de_La_Fortelle1", "aff": "Mines ParisTech;;;Mines ParisTech;Mines ParisTech", "aff_domain": "mines-paristech.fr;;;mines-paristech.fr;mines-paristech.fr", "position": "PhD student;;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nmoreau2021lens,\ntitle={{LENS}: Localization enhanced by Ne{RF} synthesis},\nauthor={Arthur Moreau and Nathan Piasco and Dzmitry Tsishkou and Bogdan Stanciulescu and Arnaud de La Fortelle},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=YFF73MRGUIH}\n}", "github": "", "project": "", "reviewers": "HedN;CXrf;BBFW", "site": "https://openreview.net/forum?id=YFF73MRGUIH", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 8, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 153, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17593701423350880109&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "MINES ParisTech", "aff_unique_dep": "", "aff_unique_url": "https://www.mines-paristech.fr", "aff_unique_abbr": "Mines ParisTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "YeJaZBXlhPX", "title": "A Constrained Multi-Objective Reinforcement Learning Framework", "track": "main", "status": "Poster", "tldr": "", "abstract": "Many real-world problems, especially in robotics, require that reinforcement learning (RL) agents learn policies that not only maximize an environment reward, but also satisfy constraints. We propose a high-level framework for solving such problems, that treats the environment reward and costs as separate objectives, and learns what preference over objectives the policy should optimize for in order to meet the constraints. We call this Learning Preferences and Policies in Parallel (LP3). By making different choices for how to learn the preference and how to optimize for the policy given the preference, we can obtain existing approaches (e.g., Lagrangian relaxation) and derive novel approaches that lead to better performance. One of these is an algorithm that learns a set of constraint-satisfying policies, useful for when we do not know the exact constraint a priori.", "keywords": "constrained RL;multi-objective RL;deep RL", "primary_area": "", "supplementary_material": "/attachment/148b40249793f11ba9776e58e7f4435de734f8e7.zip", "author": "Sandy Huang;Abbas Abdolmaleki;Giulia Vezzani;Philemon Brakel;Daniel J Mankowitz;Michael Neunert;Steven Bohez;Yuval Tassa;Nicolas Heess;Martin Riedmiller;raia hadsell", "authorids": "~Sandy_Huang1;~Abbas_Abdolmaleki3;giuliavezzani@google.com;~Philemon_Brakel1;~Daniel_J_Mankowitz2;~Michael_Neunert1;~Steven_Bohez1;~Yuval_Tassa2;~Nicolas_Heess1;~Martin_Riedmiller1;~raia_hadsell1", "gender": "F;;;M;;M;M;;;M;F", "homepage": "https://shhuang.github.io/;;;;;;;;;https://www.riedmiller.me/;http://www.raiahadsell.com", "dblp": "153/7841;;;82/10570;;153/7715;143/2302;;76/9181;;http://dblp.uni-trier.de/pers/hd/h/Hadsell:Raia", "google_scholar": "eurA6WgAAAAJ;;;https://scholar.google.ca/citations?user=Q6UMpRYAAAAJ;;;J7p1Fx4AAAAJ;;79k7bGEAAAAJ;1gVfqpcAAAAJ;EWQnacoAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Sandy_Huang1;~Abbas_Abdolmaleki3;giuliavezzani@google.com;~Philemon_Brakel1;~Daniel_J_Mankowitz2;~Michael_Neunert1;~Steven_Bohez1;~Yuval_Tassa2;~Nicolas_Heess1;~Martin_Riedmiller1;~raia_hadsell1", "aff": "Google DeepMind;Google;;Google/DeepMind;;;Ghent University - imec;;Google DeepMind;;Google DeepMind", "aff_domain": "deepmind.com;google.com;;google.com;;;ugent.be;;google.com;;deepmind.com", "position": "Research Scientist;research scientist;;Research Scientist;;;PhD student;;Research Scientist;;Research Scientist", "bibtex": "@inproceedings{\nhuang2021a,\ntitle={A Constrained Multi-Objective Reinforcement Learning Framework},\nauthor={Sandy Huang and Abbas Abdolmaleki and Giulia Vezzani and Philemon Brakel and Daniel J Mankowitz and Michael Neunert and Steven Bohez and Yuval Tassa and Nicolas Heess and Martin Riedmiller and raia hadsell},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=YeJaZBXlhPX}\n}", "github": "", "project": "", "reviewers": "rQa3;Z5YJ;uAjH;YksG", "site": "https://openreview.net/forum?id=YeJaZBXlhPX", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 11, "corr_rating_confidence": 0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15736643996357258905&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Google;Ghent University", "aff_unique_dep": "Google DeepMind;imec", "aff_unique_url": "https://deepmind.com;https://www.ugent.be/en", "aff_unique_abbr": "DeepMind;UGent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;2;0;0", "aff_country_unique": "United Kingdom;United States;Belgium" }, { "id": "YrRoft_OeKp", "title": "Evaluations of the Gap between Supervised and Reinforcement Lifelong Learning on Robotic Manipulation Tasks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Overcoming catastrophic forgetting is of great importance for deep learning and robotics. Recent lifelong learning research has great advances in supervised learning. However, little work focuses on reinforcement learning(RL). We focus on evaluating the performances of state-of-the-art lifelong learning algorithms on robotic reinforcement learning tasks. We mainly focus on the properties of overcoming catastrophic forgetting for these algorithms. We summarize the pros and cons for each category of lifelong learning algorithms when applied in RL scenarios. We propose a framework to modify supervised lifelong learning algorithms to be compatible with RL. We also develop a manipulation benchmark task set for our evaluations.", "keywords": "Lifelong Learning;Reinforcement Learning;Manipulation", "primary_area": "", "supplementary_material": "/attachment/cb2bfa13a4ade23201c5bacd9fc58f18c55258c1.zip", "author": "Fan Yang;Chao Yang;Huaping Liu;Fuchun Sun", "authorids": "~Fan_Yang13;~Chao_Yang3;~Huaping_Liu3;~Fuchun_Sun2", "gender": "M;M;M;M", "homepage": "https://fanyangr.github.io;https://sites.google.com/site/thuliuhuaping/;;https://www.cs.tsinghua.edu.cn/info/1121/3555.htm", "dblp": ";69/1097-1;00/5867-26;", "google_scholar": "qWoep9AAAAAJ;https://scholar.google.com.hk/citations?user=HXnkIkwAAAAJ;5KRbHPMAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Fan_Yang13;~Huaping_Liu3;~Yang_Chao1;~Fuchun_Sun1", "aff": "Carnegie Mellon University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "andrew.cmu.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn;cs.tsinghua.edu.cn", "position": "MS student;Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2021evaluations,\ntitle={Evaluations of the Gap between Supervised and Reinforcement Lifelong Learning on Robotic Manipulation Tasks},\nauthor={Fan Yang and Chao Yang and Huaping Liu and Fuchun Sun},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=YrRoft_OeKp}\n}", "github": "", "project": "", "reviewers": "x2T2;FpJK;R49v", "site": "https://openreview.net/forum?id=YrRoft_OeKp", "pdf_size": 0, "rating": "4;4;10", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 8, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5695539884804998845&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CMU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "YwDvofEWlEx", "title": "Learning Behaviors through Physics-driven Latent Imagination", "track": "main", "status": "Oral", "tldr": "", "abstract": "Model-based reinforcement learning (MBRL) consists in learning a so-called world model, a representation of the environment through interactions with it, then use it to train an agent. This approach is particularly interesting in the con-text of field robotics, as it alleviates the need to train online, and reduces the risks inherent to directly training agents on real robots. Generally, in such approaches, the world encompasses both the part related to the robot itself and the rest of the environment. We argue that decoupling the environment representation (for example, images or laser scans) from the dynamics of the physical system (that is, the robot and its physical state) can increase the flexibility of world models and open doors to greater robustness. In this paper, we apply this concept to a strong latent-agent, Dreamer. We then showcase the increased flexibility by transferring the environment part of the world model from one robot (a boat) to another (a rover), simply by adapting the physical model in the imagination. We additionally demonstrate the robustness of our method through real-world experiments on a boat.", "keywords": "Model-Based Reinforcement Learning;Field Robotics;Latent Models", "primary_area": "", "supplementary_material": "/attachment/f68a76c229792b97d2bd3854c9a3cc900cc01dbf.zip", "author": "Antoine Richard;Stephanie ARAVECCHIA;Matthieu Geist;C\u00e9dric Pradalier", "authorids": "~Antoine_Richard1;~Stephanie_ARAVECCHIA1;~Matthieu_Geist1;~C\u00e9dric_Pradalier1", "gender": "M;F;M;M", "homepage": ";;;http://dream.georgiatech-metz.fr", "dblp": ";;38/6508;04/6378", "google_scholar": "jqmaocQAAAAJ;tzDLDw8AAAAJ;ectPLEUAAAAJ;4_1DZoYAAAAJ", "orcid": "0000-0003-4018-699X;;;0000-0002-1746-2733", "linkedin": "antoine-richard-32429a122/;;;cedric-pradalier-1975512", "or_profile": "~Antoine_Richard1;~Stephanie_ARAVECCHIA1;~Matthieu_Geist1;~C\u00e9dric_Pradalier1", "aff": "CNRS;CNRS;Google;GeorgiaTech Lorraine", "aff_domain": "cnrs.fr;cnrs.fr;google.com;georgiatech-metz.fr", "position": "PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nrichard2021learning,\ntitle={Learning Behaviors through Physics-driven Latent Imagination},\nauthor={Antoine Richard and Stephanie ARAVECCHIA and Matthieu Geist and C{\\'e}dric Pradalier},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=YwDvofEWlEx}\n}", "github": "", "project": "", "reviewers": "WcEr;TKXJ;RFKR;Q3uB", "site": "https://openreview.net/forum?id=YwDvofEWlEx", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17847275696032081126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Centre National de la Recherche Scientifique;Google;Georgia Institute of Technology", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.cnrs.fr;https://www.google.com;https://www.gatech.edu", "aff_unique_abbr": "CNRS;Google;GeorgiaTech", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Lorraine", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "France;United States" }, { "id": "_daq0uh6yXr", "title": "Example-Driven Model-Based Reinforcement Learning for Solving Long-Horizon Visuomotor Tasks", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this paper, we study the problem of learning a repertoire of low-level skills from raw images that can be sequenced to complete long-horizon visuomotor tasks. Reinforcement learning (RL) is a promising approach for acquiring short-horizon skills autonomously. However, the focus of RL algorithms has largely been on the success of those individual skills, more so than learning and grounding a large repertoire of skills that can be sequenced to complete extended multi-stage tasks. The latter demands robustness and persistence, as errors in skills can compound over time, and may require the robot to have a number of primitive skills in its repertoire, rather than just one. To this end, we introduce EMBR, a model-based RL method for learning primitive skills that are suitable for completing long-horizon visuomotor tasks. EMBR learns and plans using a learned model, critic, and success classifier, where the success classifier serves both as a reward function for RL and as a grounding mechanism to continuously detect if the robot should retry a skill when unsuccessful or under perturbations. Further, the learned model is task-agnostic and trained using data from all skills, enabling the robot to efficiently learn a number of distinct primitives. These visuomotor primitive skills and their associated pre- and post-conditions can then be directly combined with off-the-shelf symbolic planners to complete long-horizon tasks. On a Franka Emika robot arm, we find that EMBR enables the robot to complete three long-horizon visuomotor tasks at 85% success rate, such as organizing an office desk, a file cabinet, and drawers, which require sequencing up to 12 skills, involve 14 unique learned primitives, and demand generalization to novel objects.", "keywords": "model-based reinforcement learning;long-horizon planning", "primary_area": "", "supplementary_material": "/attachment/efae4d163168f74f070c92eafe6da05e9a6c861a.zip", "author": "Bohan Wu;Suraj Nair;Li Fei-Fei;Chelsea Finn", "authorids": "~Bohan_Wu1;~Suraj_Nair1;~Li_Fei-Fei1;~Chelsea_Finn1", "gender": "M;M;F;F", "homepage": "https://profiles.stanford.edu/bohan-wu;https://suraj-nair-1.github.io/;https://profiles.stanford.edu/fei-fei-li;https://ai.stanford.edu/~cbfinn/", "dblp": ";;79/2528;131/1783", "google_scholar": ";EHSuFcwAAAAJ;rDfyQnIAAAAJ;vfPE6hgAAAAJ", "orcid": ";;;", "linkedin": ";;fei-fei-li-4541247/;", "or_profile": "~Bohan_Wu1;~Suraj_Nair1;~Li_Fei-Fei1;~Chelsea_Finn1", "aff": "Stanford University;Meta Facebook;Stanford University;Google", "aff_domain": "stanford.edu;facebook.com;stanford.edu;google.com", "position": "PhD student;Student Researcher;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nwu2021exampledriven,\ntitle={Example-Driven Model-Based Reinforcement Learning for Solving Long-Horizon Visuomotor Tasks},\nauthor={Bohan Wu and Suraj Nair and Li Fei-Fei and Chelsea Finn},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=_daq0uh6yXr}\n}", "github": "", "project": "", "reviewers": "vnfa;REyR;Au3T", "site": "https://openreview.net/forum?id=_daq0uh6yXr", "pdf_size": 0, "rating": "4;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9757383657667857190&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Stanford University;Meta;Google", "aff_unique_dep": ";Meta Platforms, Inc.;Google", "aff_unique_url": "https://www.stanford.edu;https://meta.com;https://www.google.com", "aff_unique_abbr": "Stanford;Meta;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Stanford;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "_lkBGOctkip", "title": "LILA: Language-Informed Latent Actions", "track": "main", "status": "Poster", "tldr": "", "abstract": "We introduce Language-Informed Latent Actions (LILA), a framework for learning natural language interfaces in the context of human-robot collaboration. LILA falls under the shared autonomy paradigm: in addition to providing discrete language inputs, humans are given a low-dimensional controller \u2013 e.g., a 2 degree-of-freedom (DoF) joystick that can move left/right and up/down \u2013 for operating the robot. LILA learns to use language to modulate this controller, providing users with a language-informed control space: given an instruction like \"place the cereal bowl on the tray,\" LILA may learn a 2-DoF space where one dimension controls the distance from the robot's end-effector to the bowl, and the other dimension controls the robot's end-effector pose relative to the grasp point on the bowl. We evaluate LILA with real-world user studies, where users can provide a language instruction while operating a 7-DoF Franka Emika Panda Arm to complete a series of complex manipulation tasks. We show that LILA models are not only more sample efficient and performant than imitation learning and end-effector control baselines, but that they are also qualitatively preferred by users.", "keywords": "Language for Shared Autonomy;Language & Robotics;Learned Latent Actions;Human-Robot Interaction", "primary_area": "", "supplementary_material": "/attachment/f0cd70b38495c35e24d5fb1e2ff6f98f3e9c85d8.zip", "author": "Siddharth Karamcheti;Megha Srivastava;Percy Liang;Dorsa Sadigh", "authorids": "~Siddharth_Karamcheti1;~Megha_Srivastava1;~Percy_Liang1;~Dorsa_Sadigh1", "gender": "M;F;;F", "homepage": "http://siddkaramcheti.com/;https://web.stanford.edu/~meghas/;https://cs.stanford.edu/~pliang/;https://dorsa.fyi/", "dblp": "199/1922;222/3241;04/1701;117/3174", "google_scholar": "L5v2PHAAAAAJ;mt4ZDTIAAAAJ;pouyVyUAAAAJ;ZaJEZpYAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Siddharth_Karamcheti1;~Megha_Srivastava1;~Percy_Liang1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkaramcheti2021lila,\ntitle={{LILA}: Language-Informed Latent Actions},\nauthor={Siddharth Karamcheti and Megha Srivastava and Percy Liang and Dorsa Sadigh},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=_lkBGOctkip}\n}", "github": "", "project": "", "reviewers": "dhc1;o584;TkuX;Eg7K", "site": "https://openreview.net/forum?id=_lkBGOctkip", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 27, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3609831780237397440&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "a5ZiDzL0enJ", "title": "Learning Inertial Odometry for Dynamic Legged Robot State Estimation", "track": "main", "status": "Poster", "tldr": "", "abstract": "This paper introduces a novel proprioceptive state estimator for legged robots based on a learned displacement measurement from IMU data. Recent research in pedestrian tracking has shown that motion can be inferred from inertial data using convolutional neural networks. A learned inertial displacement measurement can improve state estimation in challenging scenarios where leg odometry is unreliable, such as slipping and compressible terrains. Our work learns to estimate a displacement measurement from IMU data which is then fused with traditional leg odometry. Our approach greatly reduces the drift of proprioceptive state estimation, which is critical for legged robots deployed in vision and lidar denied environments such as foggy sewers or dusty mines. We compared results from an EKF and an incremental fixed-lag factor graph estimator using data from several real robot experiments crossing challenging terrains. Our results show a reduction of relative pose error by 37% in challenging scenarios when compared to a traditional kinematic-inertial estimator without learned measurement. We also demonstrate a 22% reduction in error when used with vision systems in visually degraded environments such as an underground mine.", "keywords": "Legged Robots;Inertial Navigation;Deep Neural Networks", "primary_area": "", "supplementary_material": "/attachment/5eaa10ec013b8db5960c887498011294cb661295.zip", "author": "Russell Buchanan;Marco Camurri;Frank Dellaert;Maurice Fallon", "authorids": "~Russell_Buchanan1;~Marco_Camurri1;~Frank_Dellaert1;~Maurice_Fallon1", "gender": "M;;M;M", "homepage": "https://www.ripl-lab.com/;;https://dellaert.github.io;https://ori.ox.ac.uk/ori-people/maurice-fallon/", "dblp": "234/8999;;d/FrankDellaert.html;68/7394.html", "google_scholar": "jPoID5gAAAAJ;_yTpZ7QAAAAJ;ZxXBaswAAAAJ;https://scholar.google.co.uk/citations?user=BqV8LaoAAAAJ", "orcid": "0000-0001-9172-5856;0000-0003-2675-9421;;0000-0003-2940-0879", "linkedin": "raabuchanan/;;frankdellaert/;", "or_profile": "~Russell_Buchanan1;~Marco_Camurri1;~Frank_Dellaert1;~Maurice_Fallon1", "aff": "University of Oxford;University of Oxford;Google;University of Oxford", "aff_domain": "ox.ac.uk;robots.ox.ac.uk;google.com;ox.ac.uk", "position": "PhD student;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nbuchanan2021learning,\ntitle={Learning Inertial Odometry for Dynamic Legged Robot State Estimation},\nauthor={Russell Buchanan and Marco Camurri and Frank Dellaert and Maurice Fallon},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=a5ZiDzL0enJ}\n}", "github": "", "project": "", "reviewers": "twnh;VSob;Kasg;je1b", "site": "https://openreview.net/forum?id=a5ZiDzL0enJ", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15199772318359013920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Oxford;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ox.ac.uk;https://www.google.com", "aff_unique_abbr": "Oxford;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "a7DcA81EV4t", "title": "GRAC: Self-Guided and Self-Regularized Actor-Critic", "track": "main", "status": "Poster", "tldr": "", "abstract": " Deep reinforcement learning (DRL) algorithms have successfully been demonstrated on a range of challenging decision making and control tasks. One dominant component of recent deep reinforcement learning algorithms is the target network which mitigates the divergence when learning the Q function. However, target networks can slow down the learning process due to delayed function updates. Our main contribution in this work is a self-regularized TD-learning method to address divergence without requiring a target network. Additionally, we propose a self-guided policy improvement method by combining policy-gradient with zero-order optimization to search for actions associated with higher Q-values in a broad neighborhood. This makes learning more robust to local noise in the Q function approximation and guides the updates of our actor network. Taken together, these components define GRAC, a novel self-guided and self-regularized actor-critic algorithm. We evaluate GRAC on the OpenAI gym tasks, outperforming state of the art on four tasks and achieving competitive results on two environments. We also apply GRAC to enable a non-anthropomorphic robotic hand to successfully accomplish an in-hand manipulation task in the real world.", "keywords": "Deep Reinforcement Learning;Q-learning", "primary_area": "", "supplementary_material": "/attachment/973ab7bcfa3af30de5dc45f7978c70c9f1d0bd55.zip", "author": "Lin Shao;Yifan You;Mengyuan Yan;Shenli Yuan;Qingyun Sun;Jeannette Bohg", "authorids": "~Lin_Shao2;~Yifan_You1;~Mengyuan_Yan1;~Shenli_Yuan1;~Qingyun_Sun1;~Jeannette_Bohg1", "gender": "M;M;F;;M;", "homepage": "https://linsats.github.io/;https://www.linkedin.com/in/yifan-you;;;http://stanford.edu/~qysun/;https://web.stanford.edu/~bohg/", "dblp": "26/8546-2;;164/5672;;182/2041;52/7377", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;UxUdJpcAAAAJ;POXzrBYAAAAJ;rjnJnEkAAAAJ", "orcid": ";;;;;0000-0002-4921-7193", "linkedin": ";;;;;", "or_profile": "~Lin_Shao2;~Yifan_You1;~Mengyuan_Yan1;~Shenli_Yuan1;~Qingyun_Sun1;~Jeannette_Bohg1", "aff": "Stanford University;University of California, Los Angeles;Google;Stanford University;;Stanford University", "aff_domain": "stanford.edu;ucla.edu;google.com;stanford.edu;;stanford.edu", "position": "PhD student;Undergrad student;Researcher;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nshao2021grac,\ntitle={{GRAC}: Self-Guided and Self-Regularized Actor-Critic},\nauthor={Lin Shao and Yifan You and Mengyuan Yan and Shenli Yuan and Qingyun Sun and Jeannette Bohg},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=a7DcA81EV4t}\n}", "github": "", "project": "", "reviewers": "8B1m;wYEU;4Buh", "site": "https://openreview.net/forum?id=a7DcA81EV4t", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10679732138958982053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Stanford University;University of California, Los Angeles;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.stanford.edu;https://www.ucla.edu;https://www.google.com", "aff_unique_abbr": "Stanford;UCLA;Google", "aff_campus_unique_index": "0;1;2;0;0", "aff_campus_unique": "Stanford;Los Angeles;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ar8FMzaZbcq", "title": "Fully Self-Supervised Class Awareness in Dense Object Descriptors", "track": "main", "status": "Poster", "tldr": "", "abstract": "We address the problem of inferring self-supervised dense semantic correspondences between objects\nin multi-object scenes. The method introduces learning of class-aware dense object descriptors by providing either unsupervised discrete labels or confidence in object similarities. We quantitatively and qualitatively show that the introduced method outperforms previous techniques with more robust pixel-to-pixel matches. An example robotic application is also shown~- grasping of objects in clutter based on corresponding points. ", "keywords": "Self-Supervision;Descriptor Learning;Object Correspondence", "primary_area": "", "supplementary_material": "/attachment/da97c90736f4cdfef28cd194604c65506f7a1850.zip", "author": "Denis Hadjivelichkov;Dimitrios Kanoulas", "authorids": "~Denis_Hadjivelichkov1;~Dimitrios_Kanoulas1", "gender": "M;M", "homepage": ";https://dkanou.github.io", "dblp": "303/4527;20/4287.html", "google_scholar": ";cE8_5EsAAAAJ", "orcid": ";0000-0002-3684-1472", "linkedin": "denishadjivelichkov/;", "or_profile": "~Denis_Hadjivelichkov1;~Dimitrios_Kanoulas1", "aff": "University College London;University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhadjivelichkov2021fully,\ntitle={Fully Self-Supervised Class Awareness in Dense Object Descriptors},\nauthor={Denis Hadjivelichkov and Dimitrios Kanoulas},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=ar8FMzaZbcq}\n}", "github": "", "project": "", "reviewers": "ZCDs;2SMq;RTPV", "site": "https://openreview.net/forum?id=ar8FMzaZbcq", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12224250600967534258&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "av44VzmMKwW", "title": "Learning Model Preconditions for Planning with Multiple Models", "track": "main", "status": "Poster", "tldr": "", "abstract": "Different models can provide differing levels of fidelity when a robot is planning. Analytical models are often fast to evaluate but only work in limited ranges of conditions. Meanwhile, physics simulators are effective at modeling complex interactions between objects but are typically more computationally expensive. Learning when to switch between the various models can greatly improve the speed of planning and task success reliability. In this work, we learn model deviation estimators (MDEs) to predict the error between real-world states and the states outputted by transition models. MDEs can be used to define a model precondition that describes which transitions are accurately modeled. We then propose a planner that uses the learned model preconditions to switch between various models in order to use models in conditions where they are accurate, prioritizing faster models when possible. We evaluate our method on two real-world tasks: placing a rod into a box and placing a rod into a closed drawer.", "keywords": "manipulation;planning", "primary_area": "", "supplementary_material": "/attachment/f8d9180ddbf7284035f485218a9ff1525e303953.zip", "author": "Alex Licari LaGrassa;Oliver Kroemer", "authorids": "~Alex_Licari_LaGrassa1;~Oliver_Kroemer1", "gender": "Non-Binary;M", "homepage": "https://cs.cmu.edu/~alagrass;https://www.ri.cmu.edu/ri-faculty/oliver-kroemer/", "dblp": "275/3372;04/7743", "google_scholar": "BeNOXOYAAAAJ;_tbXjP4AAAAJ", "orcid": ";", "linkedin": "alex-lagrassa-47a30299/;", "or_profile": "~Alex_Licari_LaGrassa1;~Oliver_Kroemer1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlagrassa2021learning,\ntitle={Learning Model Preconditions for Planning with Multiple Models},\nauthor={Alex Licari LaGrassa and Oliver Kroemer},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=av44VzmMKwW}\n}", "github": "", "project": "", "reviewers": "MmFq;iJpM;SzMb;wM8w", "site": "https://openreview.net/forum?id=av44VzmMKwW", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4569829619137622235&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "bEito8UUUmf", "title": "Probabilistic and Geometric Depth: Detecting Objects in Perspective", "track": "main", "status": "Poster", "tldr": "", "abstract": "3D object detection is an important capability needed in various practical applications such as driver assistance systems. Monocular 3D detection, a representative general setting among image-based approaches, provides a more economical solution than conventional settings relying on LiDARs but still yields unsatisfactory results. This paper first presents a systematic study on this problem. We observe that the current monocular 3D detection can be simplified as an instance depth estimation problem: The inaccurate instance depth blocks all the other 3D attribute predictions from improving the overall detection performance. Moreover, recent methods directly estimate the depth based on isolated instances or pixels while ignoring the geometric relations across different objects. To this end, we construct geometric relation graphs across predicted objects and use the graph to facilitate depth estimation. As the preliminary depth estimation of each instance is usually inaccurate in this ill-posed setting, we incorporate a probabilistic representation to capture the uncertainty. It provides an important indicator to identify confident predictions and further guide the depth propagation. Despite the simplicity of the basic idea, our method, PGD, obtains significant improvements on KITTI and nuScenes benchmarks, achieving 1st place out of all monocular vision-only methods while still maintaining real-time efficiency. Code and models will be released at https://github.com/open-mmlab/mmdetection3d.", "keywords": "Probabilistic and Geometric Depth;Monocular 3D Detection", "primary_area": "", "supplementary_material": "/attachment/aaa4e30f3a6df8dd33ab6d02670f4846ec342532.zip", "author": "Tai Wang;Xinge ZHU;Jiangmiao Pang;Dahua Lin", "authorids": "~Tai_Wang2;~Xinge_ZHU2;~Jiangmiao_Pang1;~Dahua_Lin1", "gender": "M;M;M;M", "homepage": "https://oceanpang.github.io/;http://dahua.site;https://tai-wang.github.io/;https://xingezhu.me/aboutme.html", "dblp": "231/7630;53/6088;;204/3002", "google_scholar": "https://scholar.google.com/citations?authuser=0;GMzzRRUAAAAJ;JmbbZWIAAAAJ;https://scholar.google.com.hk/citations?user=yHAcRooAAAAJ", "orcid": "0000-0002-6711-9319;;;", "linkedin": ";;%E6%B3%B0-%E7%8E%8B-2b2738147/;", "or_profile": "~Jiangmiao_Pang1;~Dahua_Lin1;~Tai_WANG1;~Xinge_Zhu3", "aff": ";The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": ";cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk", "position": ";Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nwang2021probabilistic,\ntitle={Probabilistic and Geometric Depth: Detecting Objects in Perspective},\nauthor={Tai Wang and Xinge ZHU and Jiangmiao Pang and Dahua Lin},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=bEito8UUUmf}\n}", "github": "", "project": "", "reviewers": "2epL;Ngti;HxyZ", "site": "https://openreview.net/forum?id=bEito8UUUmf", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 325, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1321793621715878501&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ceOmpjMhlyS", "title": "STORM: An Integrated Framework for Fast Joint-Space Model-Predictive Control for Reactive Manipulation", "track": "main", "status": "Oral", "tldr": "", "abstract": "Sampling-based model-predictive control (MPC) is a promising tool for feedback control of robots with complex, non-smooth dynamics, and cost functions. However, the computationally demanding nature of sampling-based MPC algorithms has been a key bottleneck in their application to high-dimensional robotic manipulation problems in the real world. Previous methods have addressed this issue by running MPC in the task space while relying on a low-level operational space controller for joint control. However, by not using the joint space of the robot in the MPC formulation, existing methods cannot directly account for non-task space related constraints such as avoiding joint limits, singular configurations, and link collisions. In this paper, we develop a system for fast, joint space sampling-based MPC for manipulators that is efficiently parallelized using GPUs. Our approach can handle task and joint space constraints while taking less than 8ms~(125Hz) to compute the next control command. Further, our method can tightly integrate perception into the control problem by utilizing learned cost functions from raw sensor data. We validate our approach by deploying it on a Franka Panda robot for a variety of dynamic manipulation tasks. We study the effect of different cost formulations and MPC parameters on the synthesized behavior and provide key insights that pave the way for the application of sampling-based MPC for manipulators in a principled manner. We also provide highly optimized, open-source code to be used by the wider robot learning and control community. Videos of experiments can be found at: https://sites.google.com/view/manipulation-mpc", "keywords": "model-predictive control;manipulation", "primary_area": "", "supplementary_material": "/attachment/a6ef679a28bbac95e1a8cec48b9e2fd11be0b174.zip", "author": "Mohak Bhardwaj;Balakumar Sundaralingam;Arsalan Mousavian;Nathan D. Ratliff;Dieter Fox;Fabio Ramos;Byron Boots", "authorids": "~Mohak_Bhardwaj1;~Balakumar_Sundaralingam1;~Arsalan_Mousavian1;~Nathan_D._Ratliff1;~Dieter_Fox1;~Fabio_Ramos1;~Byron_Boots1", "gender": ";M;M;;M;M;", "homepage": ";https://balakumar-s.github.io/;https://cs.gmu.edu/~amousavi/;;https://homes.cs.washington.edu/~fox/;https://fabioramos.github.io/;", "dblp": ";;164/8572;43/2704;f/DieterFox;22/2488;", "google_scholar": ";https://scholar.google.com/citations?hl=en;fcA9m88AAAAJ;https://scholar.google.com/citations?hl=en;DqXsbPAAAAAJ;https://scholar.google.com.au/citations?user=T_mJiHoAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;nathan-ratliff-b347018b/;;fabio-ramos-3256b421/;", "or_profile": "~Mohak_Bhardwaj1;~Balakumar_Sundaralingam1;~Arsalan_Mousavian1;~Nathan_D._Ratliff1;~Dieter_Fox1;~Fabio_Ramos1;~Byron_Boots1", "aff": ";NVIDIA;NVIDIA;NVIDIA;Department of Computer Science;NVIDIA;", "aff_domain": ";nvidia.com;nvidia.com;nvidia.com;cs.washington.edu;nvidia.com;", "position": ";Research Scientist;Research Scientist;Researcher;Full Professor;Principal Research Scientist;", "bibtex": "@inproceedings{\nbhardwaj2021storm,\ntitle={{STORM}: An Integrated Framework for Fast Joint-Space Model-Predictive Control for Reactive Manipulation},\nauthor={Mohak Bhardwaj and Balakumar Sundaralingam and Arsalan Mousavian and Nathan D. Ratliff and Dieter Fox and Fabio Ramos and Byron Boots},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=ceOmpjMhlyS}\n}", "github": "", "project": "", "reviewers": "U2Zx;sfFw;poXv;xm2M", "site": "https://openreview.net/forum?id=ceOmpjMhlyS", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 24, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 152, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17390312984137650798&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "NVIDIA;Unknown Institution", "aff_unique_dep": "NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;", "aff_unique_abbr": "NVIDIA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "csMg2h_LR37", "title": "Orientation Probabilistic Movement Primitives on Riemannian Manifolds", "track": "main", "status": "Poster", "tldr": "", "abstract": "Learning complex robot motions necessarily demands to have models that are able to encode and retrieve full-pose trajectories when tasks are defined in operational spaces. Probabilistic movement primitives (ProMPs) stand out as a principled approach that models trajectory distributions learned from demonstrations. ProMPs allow for trajectory modulation and blending to achieve better generalization to novel situations. However, when ProMPs are employed in operational space, their original formulation does not directly apply to full-pose movements including rotational trajectories described by quaternions. This paper proposes a Riemannian formulation of ProMPs that enables encoding and retrieving of quaternion trajectories. Our method builds on Riemannian manifold theory, and exploits multilinear geodesic regression for estimating the ProMPs parameters. This novel approach makes ProMPs a suitable model for learning complex full-pose robot motion patterns. Riemannian ProMPs are tested on toy examples to illustrate their workflow, and on real learning-from-demonstration experiments. ", "keywords": "learning from demonstration;movement primitives;riemannian manifolds", "primary_area": "", "supplementary_material": "/attachment/e4f1148ed26a8658df9f8c649fdbcefb4a383650.zip", "author": "Leonel Rozo;Vedant Dave", "authorids": "~Leonel_Rozo1;~Vedant_Dave1", "gender": "M;M", "homepage": "https://vedantdave97.github.io/;https://leonelrozo.weebly.com/", "dblp": ";10/9515", "google_scholar": "8Gi6AaEAAAAJ;https://scholar.google.it/citations?user=vLWgi-YAAAAJ", "orcid": ";0000-0001-5970-9135", "linkedin": "vedant-dave-095629178/;leonelrozo/", "or_profile": "~Vedant_Dave1;~Leonel_Dario_Rozo1", "aff": "Montanuniversit\u00e4t Leoben;Robert Bosch GmbH, Bosch", "aff_domain": "cps.unileoben.ac.at;de.bosch.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nrozo2021orientation,\ntitle={Orientation Probabilistic Movement Primitives on Riemannian Manifolds},\nauthor={Leonel Rozo and Vedant Dave},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=csMg2h_LR37}\n}", "github": "", "project": "", "reviewers": "G9zn;MyGv;RYej;qZSt", "site": "https://openreview.net/forum?id=csMg2h_LR37", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7774780388445236168&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1", "aff_unique_norm": "Montanuniversit\u00e4t Leoben;Robert Bosch GmbH", "aff_unique_dep": ";", "aff_unique_url": "https://www.montanuni-leoben.at;https://www.bosch.com", "aff_unique_abbr": "MUL;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Austria;Germany" }, { "id": "d_SWJhyKfVw", "title": "Rapid Exploration for Open-World Navigation with Latent Goal Models", "track": "main", "status": "Oral", "tldr": "", "abstract": "We describe a robotic learning system for autonomous exploration and navigation in diverse, open-world environments. At the core of our method is a learned latent variable model of distances and actions, along with a non-parametric topological memory of images. We use an information bottleneck to regularize the learned policy, giving us (i) a compact visual representation of goals, (ii) improved generalization capabilities, and (iii) a mechanism for sampling feasible goals for exploration. Trained on a large offline dataset of prior experience, the model acquires a representation of visual goals that is robust to task-irrelevant distractors. We demonstrate our method on a mobile ground robot in open-world exploration scenarios. Given an image of a goal that is up to 80 meters away, our method leverages its representation to explore and discover the goal in under 20 minutes, even amidst previously-unseen obstacles and weather conditions. Please check out the project website for videos of our experiments and information about the real-world dataset used at https://sites.google.com/view/recon-robot.", "keywords": "robot learning;navigation;exploration", "primary_area": "", "supplementary_material": "/attachment/ce8e9dc3998338963b2a12a06e160156e507f45e.zip", "author": "Dhruv Shah;Benjamin Eysenbach;Nicholas Rhinehart;Sergey Levine", "authorids": "~Dhruv_Shah1;~Benjamin_Eysenbach1;~Nicholas_Rhinehart1;~Sergey_Levine1", "gender": "M;M;M;M", "homepage": "http://cs.berkeley.edu/~shah;https://ben-eysenbach.github.io/;https://leaf.utias.utoronto.ca/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": ";192/1863;153/2193;80/7594", "google_scholar": ";DRnOvU8AAAAJ;xUGZX_MAAAAJ;8R35rCwAAAAJ", "orcid": ";0009-0000-7136-6307;;", "linkedin": ";benjamin-eysenbach-a7235775/;;", "or_profile": "~Dhruv_Shah1;~Benjamin_Eysenbach1;~Nicholas_Rhinehart1;~Sergey_Levine1", "aff": "UC Berkeley;Carnegie Mellon University;University of California, Berkeley;Google", "aff_domain": "berkeley.edu;cmu.edu;berkeley.edu;google.com", "position": "PhD student;PhD student;Postdoc;Research Scientist", "bibtex": "@inproceedings{\nshah2021rapid,\ntitle={Rapid Exploration for Open-World Navigation with Latent Goal Models},\nauthor={Dhruv Shah and Benjamin Eysenbach and Nicholas Rhinehart and Sergey Levine},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=d_SWJhyKfVw}\n}", "github": "", "project": "", "reviewers": "nTAe;gi9D;7dLW;fB76", "site": "https://openreview.net/forum?id=d_SWJhyKfVw", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 23, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11053520026261962121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of California, Berkeley;Carnegie Mellon University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;CMU;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "dgQdvPZnH-t", "title": "LanguageRefer: Spatial-Language Model for 3D Visual Grounding", "track": "main", "status": "Poster", "tldr": "", "abstract": "For robots to understand human instructions and perform meaningful tasks in the near future, it is important to develop learned models that comprehend referential language to identify common objects in real-world 3D scenes. In this paper, we introduce a spatial-language model for a 3D visual grounding problem. Specifically, given a reconstructed 3D scene in the form of point clouds with 3D bounding boxes of potential object candidates, and a language utterance referring to a target object in the scene, our model successfully identifies the target object from a set of potential candidates. Specifically, LanguageRefer uses a transformer-based architecture that combines spatial embedding from bounding boxes with fine-tuned language embeddings from DistilBert to predict the target object. We show that it performs competitively on visio-linguistic datasets proposed by ReferIt3D. Further, we analyze its spatial reasoning task performance decoupled from perception noise, the accuracy of view-dependent utterances, and viewpoint annotations for potential robotics applications.", "keywords": "Referring task;Language model;3D visual grounding;3D Navigation", "primary_area": "", "supplementary_material": "/attachment/18fce03d2b0ebc0fe51a6dd9592069f967a9e839.zip", "author": "Junha Roh;Karthik Desingh;Ali Farhadi;Dieter Fox", "authorids": "~Junha_Roh1;~Karthik_Desingh1;~Ali_Farhadi3;~Dieter_Fox1", "gender": ";M;M;M", "homepage": "https://rohjunha.github.io;;https://homes.cs.washington.edu/~ali/;https://homes.cs.washington.edu/~fox/", "dblp": "150/3983;124/2740;37/5826;f/DieterFox", "google_scholar": ";zgezSpQAAAAJ;jeOFRDsAAAAJ;DqXsbPAAAAAJ", "orcid": ";;;", "linkedin": "junha-roh;;;", "or_profile": "~Junha_Roh1;~Karthik_Desingh1;~Ali_Farhadi3;~Dieter_Fox1", "aff": "University of Washington;University of Washington;University of Washington;Department of Computer Science", "aff_domain": "washington.edu;washington.edu;cs.uw.edu;cs.washington.edu", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nroh2021languagerefer,\ntitle={LanguageRefer: Spatial-Language Model for 3D Visual Grounding},\nauthor={Junha Roh and Karthik Desingh and Ali Farhadi and Dieter Fox},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=dgQdvPZnH-t}\n}", "github": "", "project": "", "reviewers": "tKLL;rNyP;3bBQ;PcsE", "site": "https://openreview.net/forum?id=dgQdvPZnH-t", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10821904584586426777&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Washington;Unknown Institution", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.washington.edu;", "aff_unique_abbr": "UW;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "eIk6eBz3Wlt", "title": "Trust Your Robots! Predictive Uncertainty Estimation of Neural Networks with Sparse Gaussian Processes", "track": "main", "status": "Poster", "tldr": "", "abstract": "This paper presents a probabilistic framework to obtain both reliable and fast uncertainty estimates for predictions with Deep Neural Networks (DNNs). Our main contribution is a practical and principled combination of DNNs with sparse Gaussian Processes (GPs). We prove theoretically that DNNs can be seen as a special case of sparse GPs, namely mixtures of GP experts (MoE-GP), and we devise a learning algorithm that brings the derived theory into practice. In experiments from two different robotic tasks -- inverse dynamics of a manipulator and object detection on a micro-aerial vehicle (MAV) -- we show the effectiveness of our approach in terms of predictive uncertainty, improved scalability, and run-time efficiency on a Jetson TX2. We thus argue that our approach can pave the way towards reliable and fast robot learning systems with uncertainty awareness.", "keywords": "Robotic Introspection;Bayesian Deep Learning;Gaussian Processes", "primary_area": "", "supplementary_material": "/attachment/b90e5913716181d71dd742e3f7c6e94bfb2353a6.zip", "author": "Jongseok Lee;Jianxiang Feng;Matthias Humt;Marcus Gerhard M\u00fcller;Rudolph Triebel", "authorids": "~Jongseok_Lee1;~Jianxiang_Feng1;~Matthias_Humt1;marcus.mueller@dlr.de;~Rudolph_Triebel1", "gender": "M;M;M;;", "homepage": "https://rmc.dlr.de/rm/en/staff/jongseok.lee/;;https://hummat.com;;", "dblp": "58/4966;267/9411;267/9630;;", "google_scholar": "3GaQJP8AAAAJ;b-5CscIAAAAJ;https://scholar.google.de/citations?user=MOJSVsUAAAAJ;;", "orcid": ";;0000-0002-1523-9335;;", "linkedin": "jongseok-lee-b75362118;;matthiashumt;;", "or_profile": "~Jongseok_Lee1;~Jianxiang_Feng1;~Matthias_Humt1;marcus.mueller@dlr.de;~Rudolph_Triebel1", "aff": "German Aerospace Center (DLR);RMC, German Aerospace Center (DLR);Deutsches Zentrum f\u00fcr Luft- und Raumfahrt;;", "aff_domain": "dlr.de;dlr.de;dlr.de;;", "position": "Researcher;Researcher;Researcher;;", "bibtex": "@inproceedings{\nlee2021trust,\ntitle={Trust Your Robots! Predictive Uncertainty Estimation of Neural Networks with Sparse Gaussian Processes},\nauthor={Jongseok Lee and Jianxiang Feng and Matthias Humt and Marcus Gerhard M{\\\"u}ller and Rudolph Triebel},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=eIk6eBz3Wlt}\n}", "github": "", "project": "", "reviewers": "Q7n6;hvPu;mLeS", "site": "https://openreview.net/forum?id=eIk6eBz3Wlt", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 27, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16202404081443132410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "German Aerospace Center;Deutsches Zentrum f\u00fcr Luft- und Raumfahrt", "aff_unique_dep": ";", "aff_unique_url": "https://www.dlr.de;https://www.dlr.de", "aff_unique_abbr": "DLR;DLR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "f7KaqYLO3iE", "title": "A Differentiable Recipe for Learning Visual Non-Prehensile Planar Manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "Specifying tasks with videos is a powerful technique towards acquiring novel and general robot skills. However, reasoning over mechanics and dexterous interactions can make it challenging to scale visual learning for contact-rich manipulation. In this work, we focus on the problem of visual dexterous planar manipulation: given a video of an object in planar motion, find contact-aware robot actions that reproduce the same object motion. We propose a novel learning architecture that combines video decoding neural models with priors from contact mechanics by leveraging differentiable optimization and differentiable simulation. Through extensive simulated experiments, we investigate the interplay between traditional model-based techniques and modern deep learning approaches. We find that our modular and fully differentiable architecture outperforms learning-only methods on unseen objects and motions.", "keywords": "Manipulation;Visual learning;Differentiable optimization", "primary_area": "", "supplementary_material": "/attachment/2930890f6989dd539e028c4b5df1ef071138c39d.zip", "author": "Bernardo Aceituno;Alberto Rodriguez;Shubham Tulsiani;Abhinav Gupta;Mustafa Mukadam", "authorids": "~Bernardo_Aceituno1;albertor@mit.du;~Shubham_Tulsiani1;~Abhinav_Gupta1;~Mustafa_Mukadam1", "gender": "M;;M;M;M", "homepage": "http://aceituno.mit.edu;;https://shubhtuls.github.io/;http://www.cs.cmu.edu/~abhinavg;http://www.mustafamukadam.com", "dblp": "192/1234;;135/6623;36/7024-1;", "google_scholar": "https://scholar.google.co.ve/citations?user=9gYLFpgAAAAJ;;06rffEkAAAAJ;https://scholar.google.com.tw/citations?user=bqL73OkAAAAJ;yYpm9LoAAAAJ", "orcid": ";;;;", "linkedin": ";;;;mhmukadam/", "or_profile": "~Bernardo_Aceituno1;albertor@mit.du;~Shubham_Tulsiani1;~Abhinav_Gupta1;~Mustafa_Mukadam1", "aff": "Massachusetts Institute of Technology;;Meta Facebook;Meta Facebook;Meta AI", "aff_domain": "mit.edu;;fb.com;fb.com;meta.com", "position": "Research Assistant;;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\naceituno2021a,\ntitle={A Differentiable Recipe for Learning Visual Non-Prehensile Planar Manipulation},\nauthor={Bernardo Aceituno and Alberto Rodriguez and Shubham Tulsiani and Abhinav Gupta and Mustafa Mukadam},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=f7KaqYLO3iE}\n}", "github": "", "project": "", "reviewers": "teuP;XGL2;5sxA", "site": "https://openreview.net/forum?id=f7KaqYLO3iE", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13246656340232829100&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://web.mit.edu;https://meta.com", "aff_unique_abbr": "MIT;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ftOqDUeLPn3", "title": "Dealing with the Unknown: Pessimistic Offline Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Reinforcement Learning (RL) has been shown effective in domains where the agent can learn policies by actively interacting with its operating environment. However, if we change the RL scheme to offline setting where the agent can only update its policy via static datasets, one of the major issues in offline reinforcement learning emerges, i.e. distributional shift. We propose a Pessimistic Offline Reinforcement Learning (PessORL) algorithm to actively lead the agent back to the area where it is familiar by manipulating the value function. We focus on problems caused by out-of-distribution (OOD) states, and deliberately penalize high values at states that are absent in the training dataset, so that the learned pessimistic value function lower bounds the true value anywhere within the state space. We evaluate the PessORL algorithm on various benchmark tasks, where we show that our method gains better performance by explicitly handling OOD states, when compared to those methods merely considering OOD actions.", "keywords": "Offline Reinforcement Learning;Out-of-Distribution States", "primary_area": "", "supplementary_material": "/attachment/3e6da2cddf754bca4325465b512ad9e08f359f84.zip", "author": "Jinning Li;Chen Tang;Masayoshi Tomizuka;Wei Zhan", "authorids": "~Jinning_Li1;~Chen_Tang2;~Masayoshi_Tomizuka1;~Wei_Zhan2", "gender": ";M;M;", "homepage": "https://jinning-li.github.io/;https://chentangmark.github.io;https://me.berkeley.edu/people/masayoshi-tomizuka/;", "dblp": ";71/7642;10/4434;", "google_scholar": "VbNwxKYAAAAJ;x78TL58AAAAJ;;", "orcid": ";;;", "linkedin": "jinningli/;chen-tang-08377b5b/;;", "or_profile": "~Jinning_Li1;~Chen_Tang2;~Masayoshi_Tomizuka1;~Wei_Zhan2", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;", "position": "PhD student;PhD student;Full Professor;", "bibtex": "@inproceedings{\nli2021dealing,\ntitle={Dealing with the Unknown: Pessimistic Offline Reinforcement Learning},\nauthor={Jinning Li and Chen Tang and Masayoshi Tomizuka and Wei Zhan},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=ftOqDUeLPn3}\n}", "github": "", "project": "", "reviewers": "Qikv;YDuP;2d4v;DVzS", "site": "https://openreview.net/forum?id=ftOqDUeLPn3", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9642928956513866122&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "fy4ZBWxYbIo", "title": "A Workflow for Offline Model-Free Robotic Reinforcement Learning", "track": "main", "status": "Oral", "tldr": "", "abstract": "Offline reinforcement learning (RL) enables learning control policies by utilizing only prior experience, without any online interaction. This can allow robots to acquire generalizable skills from large and diverse datasets, without any costly or unsafe online data collection. Despite recent algorithmic advances in offline RL, applying these methods to real-world problems has proven challenging. Although offline RL methods can learn from prior data, there is no clear and well-understood process for making various design choices, from model ar- architecture to algorithm hyperparameters, without actually evaluating the learned policies online. In this paper, our aim is to develop a practical workflow for using offline RL analogous to the relatively well-understood workflows for supervised learning problems. To this end, we devise a set of metrics and conditions that can be tracked over the course of offline training and can inform the practitioner about how the algorithm and model architecture should be adjusted to improve final performance. Our workflow is derived from a conceptual understanding of the behavior of conservative offline RL algorithms and cross-validation in supervised learning. We demonstrate the efficacy of this workflow in producing effective policies without any online tuning, both in several simulated robotic learning scenarios and for three tasks on two distinct real robots, focusing on learning manipulation skills with raw image observations with sparse binary rewards. Explanatory video and additional content can be found at https://sites.google.com/view/offline-rl-workflow ", "keywords": "workflow;offline RL;no online tuning", "primary_area": "", "supplementary_material": "/attachment/3ff49234bb268b8981682e958e6a38c9e82b39c7.zip", "author": "Aviral Kumar;Anikait Singh;Stephen Tian;Chelsea Finn;Sergey Levine", "authorids": "~Aviral_Kumar2;~Anikait_Singh1;~Stephen_Tian1;~Chelsea_Finn1;~Sergey_Levine1", "gender": "M;M;M;F;M", "homepage": "https://aviralkumar2907.github.io/;https://asap7772.github.io/;http://s-tian.github.io;https://ai.stanford.edu/~cbfinn/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "202/7961;302/3876;237/9780;131/1783;80/7594", "google_scholar": ";lPaISmIAAAAJ;l19pn2sAAAAJ;vfPE6hgAAAAJ;8R35rCwAAAAJ", "orcid": ";;;;", "linkedin": ";asap7772/;;;", "or_profile": "~Aviral_Kumar2;~Anikait_Singh1;~Stephen_Tian1;~Chelsea_Finn1;~Sergey_Levine1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google;Google", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;google.com;google.com", "position": "PhD student;Undergrad student;Undergrad student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nkumar2021a,\ntitle={A Workflow for Offline Model-Free Robotic Reinforcement Learning},\nauthor={Aviral Kumar and Anikait Singh and Stephen Tian and Chelsea Finn and Sergey Levine},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=fy4ZBWxYbIo}\n}", "github": "", "project": "", "reviewers": "J1Lk;bymo;D6Ls;rBLy", "site": "https://openreview.net/forum?id=fy4ZBWxYbIo", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 23, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 105, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13443145557314290233&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;0;1;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ht3aHpc1hUt", "title": "Structure from Silence: Learning Scene Structure from Ambient Sound", "track": "main", "status": "Oral", "tldr": "", "abstract": "From whirling ceiling fans to ticking clocks, the sounds that we hear subtly vary as we move through a scene. We ask whether these ambient sounds convey information about 3D scene structure and, if so, whether they provide a useful learning signal for multimodal models. To study this, we collect a dataset of paired audio and RGB-D recordings from a variety of quiet indoor scenes. We then train models that estimate the distance to nearby walls, given only audio as input. We also use these recordings to learn multimodal representations through self-supervision, by training a network to associate images with their corresponding sounds. These results suggest that ambient sound conveys a surprising amount of information about scene structure, and that it is a useful signal for learning multimodal features.", "keywords": "audio perception;multi-modal learning;self-supervision;navigation", "primary_area": "", "supplementary_material": "/attachment/0640d3d9a2e6674ebfa67a1aa9d1c4e474c64a23.zip", "author": "Ziyang Chen;Xixi Hu;Andrew Owens", "authorids": "~Ziyang_Chen2;~Xixi_Hu2;~Andrew_Owens1", "gender": "M;;M", "homepage": "https://ificl.github.io/;https://hxixixh.github.io/;http://andrewowens.com", "dblp": ";234/1710;85/2697", "google_scholar": "PbsR83sAAAAJ;;9hX-JksAAAAJ", "orcid": ";;", "linkedin": "ziyang-chen-701982200/;xixi-hu-210200160/;", "or_profile": "~Ziyang_Chen2;~Xixi_Hu2;~Andrew_Owens1", "aff": "University of Michigan;University of Michigan;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nchen2021structure,\ntitle={Structure from Silence: Learning Scene Structure from Ambient Sound},\nauthor={Ziyang Chen and Xixi Hu and Andrew Owens},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=ht3aHpc1hUt}\n}", "github": "", "project": "", "reviewers": "jRS5;8xhP;13Bf;Vy16", "site": "https://openreview.net/forum?id=ht3aHpc1hUt", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7878167118189718675&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "hu7b7MPCqiC", "title": "Multimodal Trajectory Prediction Conditioned on Lane-Graph Traversals", "track": "main", "status": "Poster", "tldr": "", "abstract": "Accurately predicting the future motion of surrounding vehicles requires reasoning about the inherent uncertainty in driving behavior. This uncertainty can be loosely decoupled into lateral (e.g., keeping lane, turning) and longitudinal (e.g., accelerating, braking). We present a novel method that combines learned discrete policy rollouts with a focused decoder on subsets of the lane graph. The policy rollouts explore different goals given current observations, ensuring that the model captures lateral variability. Longitudinal variability is captured by our latent variable model decoder that is conditioned on various subsets of the lane graph. Our model achieves state-of-the-art performance on the nuScenes motion prediction dataset, and qualitatively demonstrates excellent scene compliance. Detailed ablations highlight the importance of the policy rollouts and the decoder architecture.", "keywords": "Motion prediction;autonomous vehicles;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/ed66ad8a6e333851cc3d30fda5ff2b846a8e7a01.zip", "author": "Nachiket Deo;Eric Wolff;Oscar Beijbom", "authorids": "~Nachiket_Deo1;eric.wolff@motional.com;~Oscar_Beijbom1", "gender": "M;;M", "homepage": ";;https://beijbom.github.io/", "dblp": "173/6507;;117/4802", "google_scholar": "gfQW70IAAAAJ;;XP_Hxm4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nachiket_Deo1;eric.wolff@motional.com;~Oscar_Beijbom1", "aff": "University of California, San Diego;;", "aff_domain": "ucsd.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\ndeo2021multimodal,\ntitle={Multimodal Trajectory Prediction Conditioned on Lane-Graph Traversals},\nauthor={Nachiket Deo and Eric Wolff and Oscar Beijbom},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=hu7b7MPCqiC}\n}", "github": "", "project": "", "reviewers": "qYAi;HKDg;hmkE;NjKJ", "site": "https://openreview.net/forum?id=hu7b7MPCqiC", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 224, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5033391406490548328&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "ibktAcINCaj", "title": "Urban Driver: Learning to Drive from Real-world Demonstrations Using Policy Gradients", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this work we are the first to present an offline policy gradient method for learning imitative policies for complex urban driving from a large corpus of real-world demonstrations. This is achieved by building a differentiable data-driven simulator on top of perception outputs and high-fidelity HD maps of the area. It allows us to synthesize new driving experiences from existing demonstrations using mid-level representations. Using this simulator we then train a policy network in closed-loop employing policy gradients.\nWe train our proposed method on 100 hours of expert demonstrations on urban roads and show that it learns complex driving policies that generalize well and can perform a variety of driving maneuvers. We demonstrate this in simulation as well as deploy our model to self-driving vehicles in the real-world. Our method outperforms previously demonstrated state-of-the-art for urban driving scenarios - all this without the need for complex state perturbations or collecting additional on-policy data during training. We make code and data publicly available.", "keywords": "Self-driving;Learning from Demonstrations;Simulation", "primary_area": "", "supplementary_material": "/attachment/8d371cd6955fc4b1a6e7e36a79e3c6cf957cbf8c.zip", "author": "Oliver Scheel;Luca Bergamini;Maciej Wolczyk;B\u0142a\u017cej Osi\u0144ski;Peter Ondruska", "authorids": "~Oliver_Scheel1;~Luca_Bergamini1;~Maciej_Wolczyk1;~B\u0142a\u017cej_Osi\u0144ski1;~Peter_Ondruska2", "gender": ";M;M;;M", "homepage": "https://www.linkedin.com/in/oliver-scheel-98a048176/;;;;http://www.ondruska.com", "dblp": ";;236/5956;218/5547;", "google_scholar": "I_mbUCQAAAAJ;;;WuWWdKcAAAAJ;https://scholar.google.co.uk/citations?user=UuJrkKkAAAAJ", "orcid": ";;;;", "linkedin": ";;;;pondruska/", "or_profile": "~Oliver_Scheel1;~Luca_Bergamini1;~Maciej_Wolczyk1;~B\u0142a\u017cej_Osi\u0144ski1;~Peter_Ondruska2", "aff": "Lyft Inc.;;Jagiellonian University Cracow;Lyft Inc.;Lyft Inc.", "aff_domain": "lyft.com;unimore.it;uj.edu.pl;lyft.com;lyft.com", "position": "Researcher;PhD student;PhD student;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nscheel2021urban,\ntitle={Urban Driver: Learning to Drive from Real-world Demonstrations Using Policy Gradients},\nauthor={Oliver Scheel and Luca Bergamini and Maciej Wolczyk and B{\\l}a{\\.z}ej Osi{\\'n}ski and Peter Ondruska},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=ibktAcINCaj}\n}", "github": "", "project": "", "reviewers": "oEVn;KT1F;wo2G;q8Fh", "site": "https://openreview.net/forum?id=ibktAcINCaj", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17616175296771168435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Lyft;Jagiellonian University", "aff_unique_dep": ";", "aff_unique_url": "https://www.lyft.com;https://www.uj.edu.pl", "aff_unique_abbr": "Lyft;UJ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cracow", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Poland" }, { "id": "j3Rguo81Yi_", "title": "Specializing Versatile Skill Libraries using Local Mixture of Experts", "track": "main", "status": "Poster", "tldr": "", "abstract": "A long-cherished vision in robotics is to equip robots with skills that match the versatility and precision of humans.\nFor example, when playing table tennis, a robot should be capable of returning the ball in various ways while precisely placing it at the desired location. \nA common approach to model such versatile behavior is to use a Mixture of Experts (MoE) model, where each expert is a contextual motion primitive.\nHowever, learning such MoEs is challenging as most objectives force the model to cover the entire context space, which prevents specialization of the primitives resulting in rather low-quality components. \nStarting from maximum entropy reinforcement learning (RL), we decompose the objective into optimizing an individual lower bound per mixture component.\nFurther, we introduce a curriculum by allowing the components to focus\non a local context region, enabling the model to learn highly accurate skill representations.\nTo this end, we use local context distributions that are adapted jointly with the expert primitives. Our lower bound advocates an iterative addition of new components, where new components will concentrate on local context regions not covered by the current MoE.\nThis local and incremental learning results in a modular MoE model of high accuracy and versatility, where both properties can be scaled by adding more components on the fly. \nWe demonstrate this by an extensive ablation and on two challenging simulated robot skill learning tasks. We compare our achieved performance to LaDiPS and HiREPS, a known hierarchical policy search method for learning diverse skills. ", "keywords": "Episodic Policy Search;Versatile Skill Learning;Hierarchical RL;Curriculum Learning", "primary_area": "", "supplementary_material": "/attachment/0623089bea2b10a14e39783a5d6d1d5a64cea952.zip", "author": "Onur Celik;Dongzhuoran Zhou;Ge Li;Philipp Becker;Gerhard Neumann", "authorids": "~Onur_Celik1;~Dongzhuoran_Zhou1;~Ge_Li3;~Philipp_Becker1;~Gerhard_Neumann2", "gender": "M;M;M;M;M", "homepage": "https://alr.anthropomatik.kit.edu/21_69.php;https://github.com/TOA-ZR;;;https://alr.anthropomatik.kit.edu/", "dblp": "243/5913;308/6036;;66/1316;60/4878", "google_scholar": "9jqaTcAAAAAJ;TbZH2gUAAAAJ;;https://scholar.google.de/citations?user=jXx-LuQAAAAJ;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;;", "linkedin": ";dongzhuoran-zhou-99858a142/;geli-bruce/;;", "or_profile": "~Onur_Celik1;~Dongzhuoran_Zhou1;~Ge_Li3;~Philipp_Becker1;~Gerhard_Neumann1", "aff": "Karlsruhe Institute of Technology;Karlsruhe Institute of Technology;Karlsruhe Institute of Technology;Karlsruhe Institute of Technology;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "position": "PhD student;MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ncelik2021specializing,\ntitle={Specializing Versatile Skill Libraries using Local Mixture of Experts},\nauthor={Onur Celik and Dongzhuoran Zhou and Ge Li and Philipp Becker and Gerhard Neumann},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=j3Rguo81Yi_}\n}", "github": "", "project": "", "reviewers": "Kbdi;vGwW;xPhY;as3F", "site": "https://openreview.net/forum?id=j3Rguo81Yi_", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 20, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=935095214533004622&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Karlsruhe Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kit.edu", "aff_unique_abbr": "KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "jOSWHddP1fZ", "title": "Goal-Auxiliary Actor-Critic for 6D Robotic Grasping with Point Clouds", "track": "main", "status": "Poster", "tldr": "", "abstract": "6D robotic grasping beyond top-down bin-picking scenarios is a challenging task. Previous solutions based on 6D grasp synthesis with robot motion planning usually operate in an open-loop setting, which are sensitive to grasp synthesis errors. In this work, we propose a new method for learning closed-loop control policies for 6D grasping. Our policy takes a segmented point cloud of an object from an egocentric camera as input, and outputs continuous 6D control actions of the robot gripper for grasping the object. We combine imitation learning and reinforcement learning and introduce a goal-auxiliary actor-critic algorithm for policy learning. We demonstrate that our learned policy can be integrated into a tabletop 6D grasping system and a human-robot handover system to improve the grasping performance of unseen objects. Videos and code are available at https://sites.google.com/view/gaddpg.", "keywords": "6D Robotic Grasping;Imitation Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/bf71eb9723201e4973e639f283b70c3418c4e06a.zip", "author": "Lirui Wang;Yu Xiang;Wei Yang;Arsalan Mousavian;Dieter Fox", "authorids": "~Lirui_Wang1;~Yu_Xiang3;~Wei_Yang2;~Arsalan_Mousavian1;~Dieter_Fox1", "gender": "M;M;M;M;M", "homepage": "https://liruiw.github.io/;http://wyang.me/;https://cs.gmu.edu/~amousavi/;https://homes.cs.washington.edu/~fox/;https://yuxng.github.io/", "dblp": "221/9612;03/1094-19;164/8572;f/DieterFox;00/6716-1", "google_scholar": "EM9YhH0AAAAJ;6QQX88UAAAAJ;fcA9m88AAAAJ;DqXsbPAAAAAJ;", "orcid": ";0000-0003-3975-2472;;;0000-0001-9431-5131", "linkedin": ";;;;", "or_profile": "~Lirui_Wang1;~Wei_Yang2;~Arsalan_Mousavian1;~Dieter_Fox1;~Yu_Xiang1", "aff": "University of Washington, Seattle;NVIDIA;NVIDIA;Department of Computer Science;NVIDIA", "aff_domain": "uw.edu;nvidia.com;nvidia.com;cs.washington.edu;nvidia.com", "position": "MS student;Research Scientist;Research Scientist;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nwang2021goalauxiliary,\ntitle={Goal-Auxiliary Actor-Critic for 6D Robotic Grasping with Point Clouds},\nauthor={Lirui Wang and Yu Xiang and Wei Yang and Arsalan Mousavian and Dieter Fox},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=jOSWHddP1fZ}\n}", "github": "", "project": "", "reviewers": "GYJh;nhX8;i4Zo;7sEr", "site": "https://openreview.net/forum?id=jOSWHddP1fZ", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6010967143703049350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of Washington;NVIDIA;Unknown Institution", "aff_unique_dep": ";NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www.washington.edu;https://www.nvidia.com;", "aff_unique_abbr": "UW;NVIDIA;", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "jV0n2wJocXr", "title": "CLASP: Constrained Latent Shape Projection for Refining Object Shape from Robot Contact", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robots need both visual and contact sensing to effectively estimate the state of their environment. Camera RGBD data provides rich information of the objects surrounding the robot, and shape priors can help correct noise and fill in gaps and occluded regions. However, when the robot senses unexpected contact, the estimate should be updated to explain the contact. To address this need, we propose CLASP: Constrained Latent Shape Projection. This approach consists of a shape completion network that generates a prior from RGBD data and a procedure to generate shapes consistent with both the network prior and robot contact observations. We find CLASP consistently decreases the Chamfer Distance between the predicted and ground truth scenes, while other approaches do not benefit from contact information.", "keywords": "Shape Completion;Contact Sensing", "primary_area": "", "supplementary_material": "/attachment/b00a90c23f2063708fe443207baf75467b66968d.zip", "author": "Brad Saund;Dmitry Berenson", "authorids": "~Brad_Saund1;~Dmitry_Berenson1", "gender": ";M", "homepage": "https://www.bradsaund.com;http://web.eecs.umich.edu/~dmitryb/", "dblp": ";", "google_scholar": ";x-n9rIMAAAAJ", "orcid": ";0000-0002-9712-109X", "linkedin": ";", "or_profile": "~Brad_Saund1;~Dmitry_Berenson1", "aff": "University of Michigan;University of Michigan", "aff_domain": "umich.edu;umich.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nsaund2021clasp,\ntitle={{CLASP}: Constrained Latent Shape Projection for Refining Object Shape from Robot Contact},\nauthor={Brad Saund and Dmitry Berenson},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=jV0n2wJocXr}\n}", "github": "", "project": "", "reviewers": "82yA;wjGX;URGm", "site": "https://openreview.net/forum?id=jV0n2wJocXr", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14990457124505057439&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "kR8UWCty--O", "title": "Visual Learning Towards Soft Robot Force Control using a 3D Metamaterial with Differential Stiffness", "track": "main", "status": "Poster", "tldr": "", "abstract": "This paper explores the feasibility of learning robot force control and interaction using soft metamaterial and machine vision. We start by investigating the differential stiffness of a hollow, cone-shaped, 3D metamaterial made from soft rubber, achieving a large stiffness ratio between the axial and radial directions that leads to an adaptive form response in omni-directions during physical interaction. Then, using image data collected from its internal deformation during various interactions, we explored two similar designs but different learning strategies to estimate force control and interactions on the end-effector of a UR10 e-series robot arm. One is to directly learn the force and torque response from raw images of the metamaterial's internal deformation. The other is to indirectly estimate the 6D force and torque using a neural network by visually tracking the 6D pose of a marker fixed inside the 3D metamaterial. Finally, we integrated the two proposed systems and achieved similar force feedback and control interactions in simple tasks such as circle following and text writing. Our results show that the learning method holds the potential to support the concept of soft robot force control, providing an intuitive interface at a low cost for robotic systems, generating comparable and capable performances against classical force and torque sensors.", "keywords": "Visual Learning;Soft Robot;Force Control", "primary_area": "", "supplementary_material": "/attachment/2d26d8f67993ff4f52bb16c79fb28c6a4541b9d8.zip", "author": "Fang Wan;Xiaobo Liu;Ning Guo;Xudong Han;Feng Tian;Chaoyang Song", "authorids": "~Fang_Wan2;~Xiaobo_Liu3;~Ning_Guo3;~Xudong_Han1;~Feng_Tian3;~Chaoyang_Song1", "gender": ";;;M;;", "homepage": "https://ancorasir.com/;;https://gabriel-ning.github.io/GuoNing.github.io/;;;", "dblp": "01/845-2;;;;;", "google_scholar": ";https://scholar.google.com.hk/citations?view_op=list_works;;nfoqsHMAAAAJ;;", "orcid": ";;;0000-0001-9986-8990;;", "linkedin": ";;;;;", "or_profile": "~Fang_Wan2;~Xiaobo_Liu3;~Ning_Guo3;~Xudong_Han1;~Feng_Tian3;~Chaoyang_Song1", "aff": "Southern University of Science and Technology;Southern University of Science and Technology;Southern University of Science and Technology;Southern University of Science and Technology;;", "aff_domain": "sustech.edu.cn;sustech.edu.cn;mail.sustech.edu.cn;sustech.edu.cn;;", "position": "Visiting Scholar;PhD student;PhD student;Undergrad student;;", "bibtex": "@inproceedings{\nwan2021visual,\ntitle={Visual Learning Towards Soft Robot Force Control using a 3D Metamaterial with Differential Stiffness},\nauthor={Fang Wan and Xiaobo Liu and Ning Guo and Xudong Han and Feng Tian and Chaoyang Song},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=kR8UWCty--O}\n}", "github": "", "project": "", "reviewers": "mjxa;avKF;Wc5U;h8MY", "site": "https://openreview.net/forum?id=kR8UWCty--O", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 21, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11938322726050843755&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Southern University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.sustech.edu.cn", "aff_unique_abbr": "SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "kSnfpHJBJOt", "title": "DexVIP: Learning Dexterous Grasping with Human Hand Pose Priors from Video", "track": "main", "status": "Poster", "tldr": "", "abstract": "Dexterous multi-fingered robotic hands have a formidable action space, yet their morphological similarity to the human hand holds immense potential to accelerate robot learning. We propose DexVIP, an approach to learn dexterous robotic grasping from human-object interactions present in in-the-wild YouTube videos. We do this by curating grasp images from human-object interaction videos and imposing a prior over the agent's hand pose when learning to grasp with deep reinforcement learning. A key advantage of our method is that the learned policy is able to leverage free-form in-the-wild visual data. As a result, it can easily scale to new objects, and it sidesteps the standard practice of collecting human demonstrations in a lab---a much more expensive and indirect way to capture human expertise. Through experiments on 27 objects with a 30-DoF simulated robot hand, we demonstrate that DexVIP compares favorably to existing approaches that lack a hand pose prior or rely on specialized tele-operation equipment to obtain human demonstrations, while also being faster to train.", "keywords": "dexterous manipulation;learning from observations;learning from demonstrations;computer vision", "primary_area": "", "supplementary_material": "/attachment/227ab1acc0e4cfff1fbb0daeb85959207fb6fc02.zip", "author": "Priyanka Mandikal;Kristen Grauman", "authorids": "~Priyanka_Mandikal2;~Kristen_Grauman1", "gender": "F;F", "homepage": "https://priyankamandikal.github.io/;http://www.cs.utexas.edu/~grauman/", "dblp": "223/5951;57/4553", "google_scholar": "NRQftjIAAAAJ;Jp6Mz1sAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Priyanka_Mandikal2;~Kristen_Grauman1", "aff": "University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nmandikal2021dexvip,\ntitle={Dex{VIP}: Learning Dexterous Grasping with Human Hand Pose Priors from Video},\nauthor={Priyanka Mandikal and Kristen Grauman},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=kSnfpHJBJOt}\n}", "github": "", "project": "", "reviewers": "HJKw;Vrhm;8fsL", "site": "https://openreview.net/forum?id=kSnfpHJBJOt", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8372342004753625396&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "kgoWLlA33-U", "title": "LS3: Latent Space Safe Sets for Long-Horizon Visuomotor Control of Sparse Reward Iterative Tasks", "track": "main", "status": "Poster", "tldr": "", "abstract": "Reinforcement learning (RL) has shown impressive success in exploring high-dimensional environments to learn complex tasks, but can often exhibit unsafe behaviors and require extensive environment interaction when exploration is unconstrained. A promising strategy for learning in dynamically uncertain environments is requiring that the agent can robustly return to learned Safe Sets, where task success (and therefore safety) can be guaranteed. While this approach has been successful in low-dimensions, enforcing this constraint in environments with visual observation spaces is exceedingly challenging. We present a novel continuous representation for Safe Sets framed as a binary classification problem in a learned latent space, which flexibly scales to high-dimensional image observations. We then present a new algorithm, Latent Space Safe Sets (LS3), which uses this representation for long-horizon control. We evaluate LS3 on 4 domains, including a challenging sequential pushing task in simulation and a physical cable routing task. We find that LS3 can use prior task successes to restrict exploration and learn more efficiently than prior algorithms while satisfying constraints. See https://tinyurl.com/latent-safe-sets for supplementary material.", "keywords": "Reinforcement Learning;Imitation Learning;Safety", "primary_area": "", "supplementary_material": "/attachment/80fcece87ad6a72731212604d647d858d84b3ead.zip", "author": "Albert Wilcox;Ashwin Balakrishna;Brijen Thananjeyan;Joseph E. Gonzalez;Ken Goldberg", "authorids": "~Albert_Wilcox1;~Ashwin_Balakrishna1;~Brijen_Thananjeyan1;~Joseph_E._Gonzalez1;~Ken_Goldberg1", "gender": "M;M;M;M;M", "homepage": "https://albertwilcox.github.io/;https://abalakrishna123.github.io/;http://bthananjeyan.github.io/;http://eecs.berkeley.edu/~jegonzal;http://goldberg.berkeley.edu/", "dblp": ";218/5246.html;203/5466;61/8262;g/KennethYGoldberg", "google_scholar": "bj628LsAAAAJ;tfN6V84AAAAJ;fftO_HsAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ", "orcid": ";;;0000-0003-2921-956X;0000-0001-6747-9499", "linkedin": "albert-wilcox-314898184/;ashwin-balakrishna-9b71a357/;;;goldbergken/", "or_profile": "~Albert_Wilcox1;~Ashwin_Balakrishna1;~Brijen_Thananjeyan1;~Joseph_E._Gonzalez1;~Ken_Goldberg1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "Undergrad student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwilcox2021ls,\ntitle={{LS}3: Latent Space Safe Sets for Long-Horizon Visuomotor Control of Sparse Reward Iterative Tasks},\nauthor={Albert Wilcox and Ashwin Balakrishna and Brijen Thananjeyan and Joseph E. Gonzalez and Ken Goldberg},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=kgoWLlA33-U}\n}", "github": "", "project": "", "reviewers": "phDr;pzwD;5joi;aHEL", "site": "https://openreview.net/forum?id=kgoWLlA33-U", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6171241967300604653&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "knObbYqSowX", "title": "Group-based Motion Prediction for Navigation in Crowded Environments", "track": "main", "status": "Oral", "tldr": "", "abstract": "We focus on the problem of planning the motion of a robot in a dynamic multiagent environment such as a pedestrian scene. Enabling the robot to navigate safely and in a socially compliant fashion in such scenes requires a representation that accounts for the unfolding multiagent dynamics. Existing approaches to this problem tend to employ microscopic models of motion prediction that reason about the individual behavior of other agents. While such models may achieve high tracking accuracy in trajectory prediction benchmarks, they often lack an understanding of the group structures unfolding in crowded scenes. Inspired by the Gestalt theory from psychology, we build a Model Predictive Control framework (G-MPC) that leverages group-based prediction for robot motion planning. We conduct an extensive simulation study involving a series of challenging navigation tasks in scenes extracted from two real-world pedestrian datasets. We illustrate that G-MPC enables a robot to achieve statistically significantly higher safety and lower number of group intrusions than a series of baselines featuring individual pedestrian motion prediction models. Finally, we show that G-MPC can handle noisy lidar-scan estimates without significant performance losses.", "keywords": "Social Navigation;Group-based Navigation;Applications of Robot Learning in Navigation", "primary_area": "", "supplementary_material": "/attachment/77e3cfc69a2f28ab027f2279bdf1a848cbd47693.zip", "author": "Allan Wang;Christoforos Mavrogiannis;Aaron Steinfeld", "authorids": "~Allan_Wang1;~Christoforos_Mavrogiannis1;~Aaron_Steinfeld1", "gender": "M;;", "homepage": "https://allanwangliqian.com;https://www.chrismavrogiannis.com;", "dblp": ";135/8549;", "google_scholar": "te0G04oAAAAJ;dTV6Zj4AAAAJ;", "orcid": "0000-0002-8253-2742;;", "linkedin": "allanwangliqian/;;", "or_profile": "~Allan_Wang1;~Christoforos_Mavrogiannis1;~Aaron_Steinfeld1", "aff": "Carnegie Mellon University;University of Washington;Carnegie-Mellon University", "aff_domain": "cmu.edu;cs.washington.edu;", "position": "PhD student;Postdoc;", "bibtex": "@inproceedings{\nwang2021groupbased,\ntitle={Group-based Motion Prediction for Navigation in Crowded Environments},\nauthor={Allan Wang and Christoforos Mavrogiannis and Aaron Steinfeld},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=knObbYqSowX}\n}", "github": "", "project": "", "reviewers": "rGtm;6KBt;UWWw;qTAW", "site": "https://openreview.net/forum?id=knObbYqSowX", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2137717649752272387&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.washington.edu", "aff_unique_abbr": "CMU;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "lAtePxetBNb", "title": "Multi-Agent Trajectory Prediction by Combining Egocentric and Allocentric Views", "track": "main", "status": "Poster", "tldr": "", "abstract": "Trajectory prediction of road participants such as vehicles and pedestrians is crucial for autonomous driving. Recently, graph neural network (GNN) is widely adopted to capture the social interactions among the agents. Many GNN-based models formulate the prediction task as a single-agent prediction problem where multiple inference is needed for multi-agent prediction (which is common in practice), which leads to fundamental inconsistency in terms of homotopy as well as inefficiency for the memory and time. Moreover, even for models that do perform joint prediction, typically one centric agent is selected and all other agents\u2019 information is normalized based on that. Such centric-only normalization leads to asymmetric encoding of different agents in GNN, which might harm its performance. In this work, we propose a efficient multi-agent prediction framework that can predict all agents' trajectories jointly by normalizing and processing all agents' information symmetrically and homogeneously with combined egocentirc and allocentric views. Experiments are conducted on two interaction-rich behavior datasets: INTERACTION (vehicles) and TrajNet++ (pedestrian). The results show that the proposed framework can significantly boost the inference speed of the GNN-based model for multi-agent prediction and achieve better performance. In the INTERACTION dataset's challenge, the proposed model achieved the 1st place in the regular track and generalization track.", "keywords": "Autonomous Driving;Joint Trajectory Prediction;Multi-Agent Interaction", "primary_area": "", "supplementary_material": "/attachment/90c3407b7dcd5d9f0793f4fa2e8dc3bbff80aa94.zip", "author": "Xiaosong Jia;Liting Sun;Hang Zhao;Masayoshi Tomizuka;Wei Zhan", "authorids": "~Xiaosong_Jia1;litingsun@berkeley.edu;~Hang_Zhao1;~Masayoshi_Tomizuka2;~Wei_Zhan2", "gender": "M;;M;;", "homepage": "https://jiaxiaosong1002.github.io/;;http://www.mit.edu/~hangzhao/;;", "dblp": "274/6360;;;;", "google_scholar": "JeFQwxUAAAAJ;;DmahiOYAAAAJ;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xiaosong_Jia1;litingsun@berkeley.edu;~Hang_Zhao1;~Masayoshi_Tomizuka2;~Wei_Zhan2", "aff": "University of California, Berkeley;;Tsinghua University;;", "aff_domain": "berkeley.edu;;tsinghua.edu.cn;;", "position": "PhD student;;Assistant Professor;;", "bibtex": "@inproceedings{\njia2021multiagent,\ntitle={Multi-Agent Trajectory Prediction by Combining Egocentric and Allocentric Views},\nauthor={Xiaosong Jia and Liting Sun and Hang Zhao and Masayoshi Tomizuka and Wei Zhan},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=lAtePxetBNb}\n}", "github": "", "project": "", "reviewers": "ddzJ;CH6w;Wu5R;zwfy", "site": "https://openreview.net/forum?id=lAtePxetBNb", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 22, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10216326092863337528&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Berkeley;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UC Berkeley;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "id": "m5k1XdK5nI2", "title": "Exploring Adversarial Robustness of Multi-sensor Perception Systems in Self Driving", "track": "main", "status": "Poster", "tldr": "", "abstract": "Modern self-driving perception systems have been shown to improve upon processing complementary inputs such as LiDAR with images. In isolation, 2D images have been found to be extremely vulnerable to adversarial attacks. Yet, there are limited studies on the adversarial robustness of multi-modal models that fuse LiDAR and image features. Furthermore, existing works do not consider physically realizable perturbations that are consistent across the input modalities. In this paper, we showcase practical susceptibilities of multi-sensor detection by inserting an adversarial object on a host vehicle. We focus on physically realizable and input-agnostic attacks that are feasible to execute in practice, and show that a single universal adversary can hide different host vehicles from state-of-the-art multi-modal detectors. Our experiments demonstrate that successful attacks are primarily caused by easily corrupted image features. Furthermore, in modern sensor fusion methods which project image features into 3D, adversarial attacks can exploit the projection process to generate false positives in distant regions in 3D. Towards more robust multi-modal perception systems, we show that adversarial training with feature denoising can boost robustness to such attacks significantly.", "keywords": "Adversarial;Self-Driving;Detection;Multimodal", "primary_area": "", "supplementary_material": "/attachment/b38c4b457dedc4ce4553ce5dc7b17c4fabde871b.zip", "author": "James Tu;Huichen Li;Xinchen Yan;Mengye Ren;Yun Chen;Ming Liang;Eilyan Bitar;Ersin Yumer;Raquel Urtasun", "authorids": "~James_Tu1;~Huichen_Li1;~Xinchen_Yan2;~Mengye_Ren1;~Yun_Chen3;~Ming_Liang2;~Eilyan_Bitar1;~Ersin_Yumer1;~Raquel_Urtasun1", "gender": "M;;;;;;;M;F", "homepage": ";;;http://www.cs.toronto.edu/~mren;;;https://bitar.engineering.cornell.edu/;http://www.meyumer.com;http://www.cs.toronto.edu/~urtasun/", "dblp": ";218/5232;;163/1952;;87/4697;;http://dblp.uni-trier.de/pers/hd/y/Yumer:Ersin;u/RaquelUrtasun", "google_scholar": "https://scholar.google.ca/citations?user=x6gPeg4AAAAJ;Vf3dpOgAAAAJ;;XcQ9WqMAAAAJ;;I5kGk98AAAAJ;;s4Q8hbUAAAAJ;https://scholar.google.ca/citations?user=jyxO2akAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;meyumer/;", "or_profile": "~James_Tu1;~Huichen_Li1;~Xinchen_Yan2;~Mengye_Ren1;~Yun_Chen3;~Ming_Liang2;~Eilyan_Bitar1;~Ersin_Yumer1;~Raquel_Urtasun1", "aff": "Department of Computer Science, University of Toronto;University of Illinois, Urbana Champaign;;University of Toronto;;;Cornell University;Adobe;Department of Computer Science, University of Toronto", "aff_domain": "cs.toronto.edu;illinois.edu;;toronto.edu;;;cornell.edu; ;cs.toronto.edu", "position": "MS student;PhD student;;PhD student;;;Associate Professor;Research Scientist;Full Professor", "bibtex": "@inproceedings{\ntu2021exploring,\ntitle={Exploring Adversarial Robustness of Multi-sensor Perception Systems in Self Driving},\nauthor={James Tu and Huichen Li and Xinchen Yan and Mengye Ren and Yun Chen and Ming Liang and Eilyan Bitar and Ersin Yumer and Raquel Urtasun},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=m5k1XdK5nI2}\n}", "github": "", "project": "", "reviewers": "2ySC;4ZtU;2uDN", "site": "https://openreview.net/forum?id=m5k1XdK5nI2", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0, "gs_citation": 94, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8432404360743212878&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "University of Toronto;University of Illinois Urbana-Champaign;Cornell University;Adobe", "aff_unique_dep": "Department of Computer Science;;;Adobe Inc.", "aff_unique_url": "https://www.utoronto.ca;https://illinois.edu;https://www.cornell.edu;https://www.adobe.com", "aff_unique_abbr": "U of T;UIUC;Cornell;Adobe", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Toronto;Urbana-Champaign;", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "Canada;United States" }, { "id": "mOLu2rODIJF", "title": "SORNet: Spatial Object-Centric Representations for Sequential Manipulation", "track": "main", "status": "Oral", "tldr": "", "abstract": "Sequential manipulation tasks require a robot to perceive the state of an environment and plan a sequence of actions leading to a desired goal state, where the ability to reason about spatial relationships among object entities from raw sensor inputs is crucial. Prior works relying on explicit state estimation or end-to-end learning struggle with novel objects or new tasks. In this work, we propose SORNet (Spatial Object-Centric Representation Network), which extracts object-centric representations from RGB images conditioned on canonical views of the objects of interest. We show that the object embeddings learned by SORNet generalize zero-shot to unseen object entities on three spatial reasoning tasks: spatial relationship classification, skill precondition classification and relative direction regression, significantly outperforming baselines. Further, we present real-world robotic experiments demonstrating the usage of the learned object embeddings in task planning for sequential manipulation.", "keywords": "Object-centric Representation;Spatial Reasoning;Manipulation", "primary_area": "", "supplementary_material": "/attachment/728778a3e86f036e1f934631bb47f63d18b735ff.zip", "author": "Wentao Yuan;Chris Paxton;Karthik Desingh;Dieter Fox", "authorids": "~Wentao_Yuan1;~Chris_Paxton1;~Karthik_Desingh1;~Dieter_Fox1", "gender": "M;M;M;M", "homepage": "https://wentaoyuan.github.io;https://cpaxton.github.io/;;https://homes.cs.washington.edu/~fox/", "dblp": "225/4795.html;;124/2740;f/DieterFox", "google_scholar": "PZZZG6YAAAAJ;I1mOQpAAAAAJ;zgezSpQAAAAJ;DqXsbPAAAAAJ", "orcid": "0000-0002-3836-8877;;;", "linkedin": ";;;", "or_profile": "~Wentao_Yuan1;~Chris_Paxton1;~Karthik_Desingh1;~Dieter_Fox1", "aff": "University of Washington, Seattle;NVIDIA;University of Washington;Department of Computer Science", "aff_domain": "uw.edu;nvidia.com;washington.edu;cs.washington.edu", "position": "PhD student;Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyuan2021sornet,\ntitle={{SORN}et: Spatial Object-Centric Representations for Sequential Manipulation},\nauthor={Wentao Yuan and Chris Paxton and Karthik Desingh and Dieter Fox},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=mOLu2rODIJF}\n}", "github": "", "project": "", "reviewers": "eyTp;zJy5;nXEV", "site": "https://openreview.net/forum?id=mOLu2rODIJF", "pdf_size": 0, "rating": "10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8596053764885562458&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Washington;NVIDIA;Unknown Institution", "aff_unique_dep": ";NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www.washington.edu;https://www.nvidia.com;", "aff_unique_abbr": "UW;NVIDIA;", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "n1hDe9iK6ms", "title": "Learning Visible Connectivity Dynamics for Cloth Smoothing", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robotic manipulation of cloth remains challenging due to the complex dynamics of cloth, lack of a low-dimensional state representation, and self-occlusions. In contrast to previous model-based approaches that learn a pixel-based dynamics model or a compressed latent vector dynamics, we propose to learn a particle-based dynamics model from a partial point cloud observation. To overcome the challenges of partial observability, we infer which visible points are connected on the underlying cloth mesh. We then learn a dynamics model over this visible connectivity graph. Compared to previous learning-based approaches, our model poses strong inductive bias with its particle based representation for learning the underlying cloth physics; it can generalize to cloths with novel shapes; it is invariant to visual features; and the predictions can be more easily visualized. We show that our method greatly outperforms previous state-of-the-art model-based and model-free reinforcement learning methods in simulation. Furthermore, we demonstrate zero-shot sim-to-real transfer where we deploy the model trained in simulation on a Franka arm and show that the model can successfully smooth cloths of different materials, geometries and colors from crumpled configurations. Videos can be found in the supplement and on our anonymous project website: https://sites.google.com/view/vcd-cloth.", "keywords": "Deformable object manipulation;dynamics model learning", "primary_area": "", "supplementary_material": "/attachment/cbaa5fb495fda2aa04382286cf82454fa8e4a6d1.zip", "author": "Xingyu Lin;Yufei Wang;Zixuan Huang;David Held", "authorids": "~Xingyu_Lin1;~Yufei_Wang4;~Zixuan_Huang3;~David_Held1", "gender": "M;;;M", "homepage": "https://xingyu-lin.github.io;https://yufeiwang63.github.io/;https://zxhuang97.github.io/;http://davheld.github.io/", "dblp": ";;;22/11147", "google_scholar": ";HQl9718AAAAJ;;0QtU-NsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xingyu_Lin1;~Yufei_Wang4;~Zixuan_Huang3;~David_Held1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nlin2021learning,\ntitle={Learning Visible Connectivity Dynamics for Cloth Smoothing},\nauthor={Xingyu Lin and Yufei Wang and Zixuan Huang and David Held},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=n1hDe9iK6ms}\n}", "github": "", "project": "", "reviewers": "Lzhn;ViDy;9vXf;hhGm", "site": "https://openreview.net/forum?id=n1hDe9iK6ms", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 124, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8924952869455538507&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "n6xYib0irVR", "title": "Influencing Towards Stable Multi-Agent Interactions", "track": "main", "status": "Oral", "tldr": "", "abstract": "Learning in multi-agent environments is difficult due to the non-stationarity introduced by an opponent's or partner's changing behaviors. Instead of reactively adapting to the other agent's (opponent or partner) behavior, we propose an algorithm to proactively influence the other agent's strategy to stabilize -- which can restrain the non-stationarity caused by the other agent. We learn a low-dimensional latent representation of the other agent's strategy and the dynamics of how the latent strategy evolves with respect to our robot's behavior. With this learned dynamics model, we can define an unsupervised stability reward to train our robot to deliberately influence the other agent to stabilize towards a single strategy. We demonstrate the effectiveness of stabilizing in improving efficiency of maximizing the task reward in a variety of simulated environments, including autonomous driving, emergent communication, and robotic manipulation.", "keywords": "multi-agent interactions;human-robot interaction;non-stationarity", "primary_area": "", "supplementary_material": "/attachment/eb0e3cc56b0e13b9d1026e1c5b2c6b69dee94a7e.zip", "author": "Woodrow Zhouyuan Wang;Andy Shih;Annie Xie;Dorsa Sadigh", "authorids": "~Woodrow_Zhouyuan_Wang1;~Andy_Shih1;~Annie_Xie1;~Dorsa_Sadigh1", "gender": "M;;;F", "homepage": "https://woodywang153.github.io/;https://cs.stanford.edu/~andyshih/;https://cs.stanford.edu/~anniexie/;https://dorsa.fyi/", "dblp": ";https://dblp.uni-trier.de/pers/hd/s/Shih:Andy;215/3608;117/3174", "google_scholar": ";G85kxUUAAAAJ;;ZaJEZpYAAAAJ", "orcid": ";;;", "linkedin": "woodrow-wang-214043150/;;;", "or_profile": "~Woodrow_Zhouyuan_Wang1;~Andy_Shih1;~Annie_Xie1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University;Meta Facebook;Stanford University", "aff_domain": "stanford.edu;cs.stanford.edu;fb.com;stanford.edu", "position": "Undergrad student;PhD student;Research Intern;Assistant Professor", "bibtex": "@inproceedings{\nwang2021influencing,\ntitle={Influencing Towards Stable Multi-Agent Interactions},\nauthor={Woodrow Zhouyuan Wang and Andy Shih and Annie Xie and Dorsa Sadigh},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=n6xYib0irVR}\n}", "github": "", "project": "", "reviewers": "mhit;naZQ;CoBB;whdT", "site": "https://openreview.net/forum?id=n6xYib0irVR", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3824058912207840396&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.stanford.edu;https://meta.com", "aff_unique_abbr": "Stanford;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "nWLt35BU1z_", "title": "Taskography: Evaluating robot task planning over large 3D scene graphs", "track": "main", "status": "Poster", "tldr": "", "abstract": "3D scene graphs (3DSGs) are an emerging description; unifying symbolic, topological, and metric scene representations. However, typical 3DSGs contain hundreds of objects and symbols even for small environments; rendering task planning on the \\emph{full} graph impractical. We construct \\textbf{Taskography}, the first large-scale robotic task planning benchmark over 3DSGs. While most benchmarking efforts in this area focus on \\emph{vision-based planning}, we systematically study \\emph{symbolic} planning, to decouple planning performance from visual representation learning. We observe that, among existing methods, neither classical nor learning-based planners are capable of real-time planning over \\emph{full} 3DSGs. Enabling real-time planning demands progress on \\emph{both} (a) sparsifying 3DSGs for tractable planning and (b) designing planners that better exploit 3DSG hierarchies. Towards the former goal, we propose \\textbf{Scrub}, a task-conditioned 3DSG sparsification method; enabling classical planners to match (and surpass) state-of-the-art learning-based planners. Towards the latter goal, we propose \\textbf{Seek}, a procedure enabling learning-based planners to exploit 3DSG structure, reducing the number of replanning queries required by current best approaches by an order of magnitude. We will open-source all code and baselines to spur further research along the intersections of robot task planning, learning and 3DSGs.", "keywords": "Robot task planning;3D scene graphs;learning to plan;benchmarks;planning", "primary_area": "", "supplementary_material": "/attachment/4538bb3adb8101042221dd91e8629d3c1ab30711.zip", "author": "Christopher Agia;Krishna Murthy Jatavallabhula;Mohamed Khodeir;Ondrej Miksik;Vibhav Vineet;Mustafa Mukadam;Liam Paull;Florian Shkurti", "authorids": "~Christopher_Agia1;~Krishna_Murthy_Jatavallabhula1;~Mohamed_Khodeir1;~Ondrej_Miksik1;~Vibhav_Vineet5;~Mustafa_Mukadam1;~Liam_Paull1;~Florian_Shkurti1", "gender": "M;;M;;;M;;M", "homepage": "https://www.chrisagia.com/;;;https://miksik.co.uk/;;http://www.mustafamukadam.com;;http://www.cs.toronto.edu/~florian/", "dblp": "268/3555;;;85/9964;;;;21/10333", "google_scholar": "t8Em5FwAAAAJ;;;Q5CBlNcAAAAJ;;yYpm9LoAAAAJ;;https://scholar.google.ca/citations?hl=en", "orcid": "0000-0002-1208-2539;;;;;;;", "linkedin": "agiachris/;;khodeir/;ondrej-miksik-40a401a/;;mhmukadam/;;", "or_profile": "~Christopher_Agia1;~Krishna_Murthy_Jatavallabhula1;~Mohamed_Khodeir1;~Ondrej_Miksik1;~Vibhav_Vineet5;~Mustafa_Mukadam1;~Liam_Paull1;~Florian_Shkurti1", "aff": "University of Toronto;;Department of Computer Science, University of Toronto;Microsoft;;Meta AI;;University of Toronto", "aff_domain": "toronto.edu;;cs.toronto.edu;microsoft.com;;meta.com;;cs.toronto.edu", "position": "Undergrad student;;MS student;Scientist;;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nagia2021taskography,\ntitle={Taskography: Evaluating robot task planning over large 3D scene graphs},\nauthor={Christopher Agia and Krishna Murthy Jatavallabhula and Mohamed Khodeir and Ondrej Miksik and Vibhav Vineet and Mustafa Mukadam and Liam Paull and Florian Shkurti},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=nWLt35BU1z_}\n}", "github": "", "project": "", "reviewers": "aMNE;TxRd;PJVU;TvP2", "site": "https://openreview.net/forum?id=nWLt35BU1z_", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12175435985147586085&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Toronto;Microsoft;Meta", "aff_unique_dep": ";Microsoft Corporation;Meta AI", "aff_unique_url": "https://www.utoronto.ca;https://www.microsoft.com;https://meta.com", "aff_unique_abbr": "U of T;Microsoft;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Canada;United States" }, { "id": "ofioIEZvJRG", "title": "Decentralized Control of Quadrotor Swarms with End-to-end Deep Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "We demonstrate the possibility of learning drone swarm controllers that are zero-shot transferable to real quadrotors via large-scale multi-agent end-to-end reinforcement learning. We train policies parameterized by neural networks that are capable of controlling individual drones in a swarm in a fully decentralized manner. Our policies, trained in simulated environments with realistic quadrotor physics, demonstrate advanced flocking behaviors, perform aggressive maneuvers in tight formations while avoiding collisions with each other, break and re-establish formations to avoid collisions with moving obstacles, and efficiently coordinate in pursuit-evasion tasks. We analyze, in simulation, how different model architectures and parameters of the training regime influence the final performance of neural swarms. We demonstrate the successful deployment of the model learned in simulation to highly resource-constrained physical quadrotors performing station keeping and goal swapping behaviors. Video demonstrations and source code are available at the project website https://sites.google.com/view/swarm-rl.", "keywords": "Deep Reinforcement Learning;Robot Learning;Multi-Agent Systems;Quadrotors", "primary_area": "", "supplementary_material": "/attachment/d63e775ba2529591dfe38a7b66cdb79443fd03d9.zip", "author": "Sumeet Batra;Zhehui Huang;Aleksei Petrenko;Tushar Kumar;Artem Molchanov;Gaurav S. Sukhatme", "authorids": "~Sumeet_Batra1;~Zhehui_Huang1;~Aleksei_Petrenko1;~Tushar_Kumar2;~Artem_Molchanov2;~Gaurav_S._Sukhatme1", "gender": "M;M;M;;M;M", "homepage": "https://sumeetbatra.github.io/;https://zhehui-huang.github.io/;https://alex-petrenko.github.io/;;https://amolchanov86.github.io/;http://www-robotics.usc.edu/~gaurav/", "dblp": "255/5461;267/9259;267/9701;;164/8497;s/GauravSSukhatme", "google_scholar": "https://scholar.google.com/citations?hl=ja;vLUrmdMAAAAJ;G2zXCNkAAAAJ;6vZneqcAAAAJ;BSJyuqQAAAAJ;https://scholar.google.com.tw/citations?user=lRUi-A8AAAAJ", "orcid": ";0000-0001-9620-1699;;;;0000-0003-2408-474X", "linkedin": "sumeetbatra/;zhehui-huang/?locale=en_US;apetrenko-nn/;tushartk/;artem-molchanov-7153986a/;gaurav-sukhatme-9b6420b/", "or_profile": "~Sumeet_Batra1;~Zhehui_Huang1;~Aleksei_Petrenko1;~Tushar_Kumar2;~Artem_Molchanov2;~Gaurav_S._Sukhatme1", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California;NVIDIA;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu;nvidia.com;usc.edu", "position": "PhD student;MS student;PhD student;MS student;Senior Deep Learning Scientist;Full Professor", "bibtex": "@inproceedings{\nbatra2021decentralized,\ntitle={Decentralized Control of Quadrotor Swarms with End-to-end Deep Reinforcement Learning},\nauthor={Sumeet Batra and Zhehui Huang and Aleksei Petrenko and Tushar Kumar and Artem Molchanov and Gaurav S. Sukhatme},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=ofioIEZvJRG}\n}", "github": "", "project": "", "reviewers": "s99C;YbEi;g2nt", "site": "https://openreview.net/forum?id=ofioIEZvJRG", "pdf_size": 0, "rating": "1;6;6", "confidence": "", "rating_avg": 4.333333333333333, "confidence_avg": 0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15235136301991180547&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Southern California;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.usc.edu;https://www.nvidia.com", "aff_unique_abbr": "USC;NVIDIA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "oqZrUx-PRqb", "title": "Look Before You Leap: Safe Model-Based Reinforcement Learning with Human Intervention", "track": "main", "status": "Poster", "tldr": "", "abstract": "Safety has become one of the main challenges of applying deep reinforcement learning to real world systems. Currently, the incorporation of external knowledge such as human oversight is the only means to prevent the agent from visiting the catastrophic state. In this paper, we propose MBHI, a novel framework for safe model-based reinforcement learning, which ensures safety in the state-level and can effectively avoid both local and non-local catastrophes. An ensemble of supervised learners are trained in MBHI to imitate human blocking decisions. Similar to human decision-making process, MBHI will roll out an imagined trajectory in the dynamics model before executing actions to the environment, and estimate its safety. When the imagination encounters a catastrophe, MBHI will block the current action and use an efficient MPC method to output a safety policy. We evaluate our method on several safety tasks, and the results show that MBHI achieved better performance in terms of sample efficiency and number of catastrophes compared to the baselines.\n", "keywords": "Safety RL;Model-based RL;Model Predict Control", "primary_area": "", "supplementary_material": "/attachment/48572530c11840bbfebb69d092e621dd9d79850a.zip", "author": "Yunkun Xu;Zhenyu Liu;Guifang Duan;Jiangcheng Zhu;Xiaolong Bai;Jianrong Tan", "authorids": "~Yunkun_Xu1;liuzy@zju.edu.cn;gfduan@zju.edu.cn;~Jiangcheng_Zhu1;baixiaolong1@huawei.com;egi@zju.edu.cn", "gender": ";;;M;;", "homepage": ";;;;;", "dblp": "250/3975.html;;;202/5904.html;;", "google_scholar": "E-xdIwkAAAAJ;;;ZosT8hcAAAAJ;;", "orcid": ";;;;;", "linkedin": "%E4%BA%91%E6%98%86-%E5%BE%90-779387116/;;;https://cn.linkedin.com/in/%E7%96%86%E6%88%90-%E6%9C%B1-85672b169;;", "or_profile": "~Yunkun_Xu1;liuzy@zju.edu.cn;gfduan@zju.edu.cn;~Jiangcheng_Zhu1;baixiaolong1@huawei.com;egi@zju.edu.cn", "aff": "Huawei Technologies Ltd.;;;Huawei Technologies Ltd.;;", "aff_domain": "huawei.com;;;huawei.com;;", "position": "Research intern;;;Researcher;;", "bibtex": "@inproceedings{\nxu2021look,\ntitle={Look Before You Leap: Safe Model-Based Reinforcement Learning with Human Intervention},\nauthor={Yunkun Xu and Zhenyu Liu and Guifang Duan and Jiangcheng Zhu and Xiaolong Bai and Jianrong Tan},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=oqZrUx-PRqb}\n}", "github": "", "project": "", "reviewers": "VQ9u;QSAz;wEtd;rwsy", "site": "https://openreview.net/forum?id=oqZrUx-PRqb", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10307473226603994174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "ovRdr3FOIIm", "title": "Geometry-aware Bayesian Optimization in Robotics using Riemannian Mat\u00e9rn Kernels", "track": "main", "status": "Poster", "tldr": "", "abstract": "Bayesian optimization is a data-efficient technique which can be used for control parameter tuning, parametric policy adaptation, and structure design in robotics. Many of these problems require optimization of functions defined on non-Euclidean domains like spheres, rotation groups, or spaces of positive-definite matrices. To do so, one must place a Gaussian process prior, or equivalently define a kernel, on the space of interest. Effective kernels typically reflect the geometry of the spaces they are defined on, but designing them is generally non-trivial. Recent work on the Riemannian Mat\u00e9rn kernels, based on stochastic partial differential equations and spectral theory of the Laplace--Beltrami operator, offers promising avenues towards constructing such geometry-aware kernels. In this paper, we study techniques for implementing these kernels on manifolds of interest in robotics, demonstrate their performance on a set of artificial benchmark functions, and illustrate geometry-aware Bayesian optimization for a variety of robotic applications, covering orientation control, manipulability optimization, and motion planning, while showing its improved performance.", "keywords": "Bayesian optimization;Mat\u00e9rn kernels;Riemannian manifolds", "primary_area": "", "supplementary_material": "/attachment/9b985ed45e5a138cf96c78bb83a69967e6ca4559.zip", "author": "No\u00e9mie Jaquier;Viacheslav Borovitskiy;Andrei Smolensky;Alexander Terenin;Tamim Asfour;Leonel Rozo", "authorids": "~No\u00e9mie_Jaquier1;~Viacheslav_Borovitskiy1;~Andrei_Smolensky1;~Alexander_Terenin1;~Tamim_Asfour1;~Leonel_Rozo1", "gender": "M;;M;M;M;F", "homepage": "https://vab.im/;;https://avt.im/;http://www.humanoids.kit.edu/;https://leonelrozo.weebly.com/;https://njaquier.ch/", "dblp": "259/3201;;185/1040;34/6686.html;10/9515;", "google_scholar": "https://scholar.google.ru/citations?user=1KqNyNMAAAAJ;https://scholar.google.ru/citations?user=xPb6-CsAAAAJ;6Qa-wXMAAAAJ;https://scholar.google.de/citations?user=65bIT4oAAAAJ;https://scholar.google.it/citations?user=vLWgi-YAAAAJ;j3rJXU4AAAAJ", "orcid": ";;0000-0001-5292-3104;;0000-0001-5970-9135;", "linkedin": ";;;;leonelrozo/;", "or_profile": "~Viacheslav_Borovitskiy1;~Andrei_Smolensky1;~Alexander_Terenin1;~Tamim_Asfour1;~Leonel_Dario_Rozo1;~Noemie_Jaquier1", "aff": "St. Petersburg State University;St. Petersburg State University;Imperial College London;Karlsruhe Institute of Technology;Robert Bosch GmbH, Bosch;Karlsruhe Institute of Technology", "aff_domain": "spbu.ru;spbu.ru;imperial.ac.uk;kit.edu;de.bosch.com;kit.edu", "position": "Researcher;Assistant Professor;PhD student;Full Professor;Researcher;Postdoc", "bibtex": "@inproceedings{\njaquier2021geometryaware,\ntitle={Geometry-aware Bayesian Optimization in Robotics using Riemannian Mat\\'ern Kernels},\nauthor={No{\\'e}mie Jaquier and Viacheslav Borovitskiy and Andrei Smolensky and Alexander Terenin and Tamim Asfour and Leonel Rozo},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=ovRdr3FOIIm}\n}", "github": "", "project": "", "reviewers": "f3EN;3LxP;6VZw;znr9", "site": "https://openreview.net/forum?id=ovRdr3FOIIm", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15891840477373466734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2;3;2", "aff_unique_norm": "St. Petersburg State University;Imperial College London;Karlsruhe Institute of Technology;Robert Bosch GmbH", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.spbu.ru;https://www.imperial.ac.uk;https://www.kit.edu;https://www.bosch.com", "aff_unique_abbr": "SPbU;ICL;KIT;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;2", "aff_country_unique": "Russian Federation;United Kingdom;Germany" }, { "id": "p-TBwVowXRH", "title": "Learning Multi-Stage Tasks with One Demonstration via Self-Replay", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this work, we introduce a novel method to learn everyday-like multi-stage tasks from a single human demonstration, without requiring any prior object knowledge. Inspired by the recent Coarse-to-Fine Imitation Learning, we model imitation learning as a learned object reaching phase followed by an open-loop replay of the operator's actions. We build upon this for multi-stage tasks where, following the human demonstration, the robot can autonomously collect image data for the entire multi-stage task, by reaching the next object in the sequence and then replaying the demonstration, repeating in a loop for all stages of the task. We evaluate with real-world experiments on a set of everyday multi-stage tasks, which we show that our method can solve from a single demonstration.\nVideos and supplementary material can be found at this webpage: https://www.robot-learning.uk/self-replay.", "keywords": "Imitation Learning;Multi-Stage Imitation Learning;Robot Manipulation", "primary_area": "", "supplementary_material": "/attachment/9d1f0d4f04275f87595147e4672108afad7eb800.zip", "author": "Norman Di Palo;Edward Johns", "authorids": "~Norman_Di_Palo1;~Edward_Johns1", "gender": "M;M", "homepage": ";https://www.robot-learning.uk", "dblp": ";68/9968", "google_scholar": "8tjrGBUAAAAJ;https://scholar.google.co.uk/citations?user=sMIUkiQAAAAJ", "orcid": ";0000-0002-8914-8786", "linkedin": ";https://uk.linkedin.com/in/edward-johns-1b24845a", "or_profile": "~Norman_Di_Palo1;~Edward_Johns1", "aff": "Imperial College London;Imperial College London", "aff_domain": "ic.ac.uk;imperial.ac.uk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\npalo2021learning,\ntitle={Learning Multi-Stage Tasks with One Demonstration via Self-Replay},\nauthor={Norman Di Palo and Edward Johns},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=p-TBwVowXRH}\n}", "github": "", "project": "", "reviewers": "yNGA;qL1E;ViCo;tkPW", "site": "https://openreview.net/forum?id=p-TBwVowXRH", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14811348063115880186&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "p9Pe-l9MMEq", "title": "Scaling Up Multi-Task Robotic Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "General-purpose robotic systems must master a large repertoire of diverse skills. While reinforcement learning provides a powerful framework for acquiring individual behaviors, the time needed to acquire each skill makes the prospect of a generalist robot trained with RL daunting. In this paper, we study how a large-scale collective robotic learning system can acquire a repertoire of behaviors simultaneously, sharing exploration, experience, and representations across tasks. In this framework, new tasks can be continuously instantiated from previously learned tasks improving overall performance and capabilities of the system. To instantiate this system, we develop a scalable and intuitive framework for specifying new tasks through user-provided examples of desired outcomes, devise a multi-robot collective learning system for data collection that simultaneously collects experience for multiple tasks, and develop a scalable and generalizable multi-task deep reinforcement learning method, which we call MT-Opt. We demonstrate how MT-Opt can learn a wide range of skills, including semantic picking (i.e., picking an object from a particular category), placing into various fixtures (e.g., placing a food item onto a plate), covering, aligning, and rearranging. We train and evaluate our system on a set of 12 real-world tasks with data collected from 7 robots, and demonstrate the performance of our system both in terms of its ability to generalize to structurally similar new tasks, and acquire distinct new tasks more quickly by leveraging past experience. We recommend viewing the videos at https://karolhausman.github.io/mt-opt/.", "keywords": "Multi-Task Reinforcement Learning;Robot Learning", "primary_area": "", "supplementary_material": "/attachment/8c1fa09293b2bc58946635e7851fe2768da1f3fd.zip", "author": "Dmitry Kalashnikov;Jake Varley;Yevgen Chebotar;Benjamin Swanson;Rico Jonschkowski;Chelsea Finn;Sergey Levine;Karol Hausman", "authorids": "~Dmitry_Kalashnikov1;~Jake_Varley1;~Yevgen_Chebotar1;~Benjamin_Swanson2;~Rico_Jonschkowski1;~Chelsea_Finn1;~Sergey_Levine1;~Karol_Hausman2", "gender": ";M;M;M;;F;M;M", "homepage": ";http://www.cs.columbia.edu/~jvarley/;;https://www.linkedin.com/in/benjamin-swanson-23516a117;;https://ai.stanford.edu/~cbfinn/;https://people.eecs.berkeley.edu/~svlevine/;https://karolhausman.github.io/", "dblp": "222/2882;;01/11424;;165/1321;131/1783;80/7594;135/8164", "google_scholar": ";UJcm1MoAAAAJ;ADkiClQAAAAJ;;5ErX8dMAAAAJ;vfPE6hgAAAAJ;8R35rCwAAAAJ;yy0UFOwAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;karolhausman/", "or_profile": "~Dmitry_Kalashnikov1;~Jake_Varley1;~Yevgen_Chebotar1;~Benjamin_Swanson2;~Rico_Jonschkowski1;~Chelsea_Finn1;~Sergey_Levine1;~Karol_Hausman1", "aff": "Google;Google;Google;;Google;Google;Google;Google Brain", "aff_domain": "google.com;google.com;google.com;;google.com;google.com;google.com;google.com", "position": "Researcher;Engineer;Research Scientist;;Research Scientist;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nkalashnikov2021scaling,\ntitle={Scaling Up Multi-Task Robotic Reinforcement Learning},\nauthor={Dmitry Kalashnikov and Jake Varley and Yevgen Chebotar and Benjamin Swanson and Rico Jonschkowski and Chelsea Finn and Sergey Levine and Karol Hausman},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=p9Pe-l9MMEq}\n}", "github": "", "project": "", "reviewers": "xsVH;UVT2;k7mA;vENi", "site": "https://openreview.net/forum?id=p9Pe-l9MMEq", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 25, "authors#_avg": 8, "corr_rating_confidence": 0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9930128373964612806&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "pXpytHo_GC1", "title": "Learning to Plan Optimistically: Uncertainty-Guided Deep Exploration via Latent Model Ensembles", "track": "main", "status": "Poster", "tldr": "", "abstract": "Learning complex robot behaviors through interaction requires structured exploration. Planning should target interactions with the potential to optimize long-term performance, while only reducing uncertainty where conducive to this objective. This paper presents Latent Optimistic Value Exploration (LOVE), a strategy that enables deep exploration through optimism in the face of uncertain long-term rewards. We combine latent world models with value function estimation to predict infinite-horizon returns and recover associated uncertainty via ensembling. The policy is then trained on an upper confidence bound (UCB) objective to identify and select the interactions most promising to improve long-term performance. We apply LOVE to visual robot control tasks in continuous action spaces and demonstrate on average more than 20% improved sample efficiency in comparison to state-of-the-art and other exploration objectives. In sparse and hard to explore environments we achieve an average improvement of over 30%.", "keywords": "Learning Control;Sample Efficiency;Exploration", "primary_area": "", "supplementary_material": "/attachment/0ea92c3cc5412d720bbcfdc4ee20af4308caec6d.zip", "author": "Tim Seyde;Wilko Schwarting;Sertac Karaman;Daniela Rus", "authorids": "~Tim_Seyde1;~Wilko_Schwarting1;~Sertac_Karaman1;~Daniela_Rus1", "gender": ";;M;F", "homepage": ";;https://karaman.mit.edu;https://www.csail.mit.edu/person/daniela-rus", "dblp": "226/6408;191/0268;45/1718;r/DanielaRus", "google_scholar": "FJ7ILzkAAAAJ;;Vu-Zb7EAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tim_Seyde1;~Wilko_Schwarting1;~Sertac_Karaman1;~Daniela_Rus1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu;mit.edu", "position": "Student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nseyde2021learning,\ntitle={Learning to Plan Optimistically: Uncertainty-Guided Deep Exploration via Latent Model Ensembles},\nauthor={Tim Seyde and Wilko Schwarting and Sertac Karaman and Daniela Rus},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=pXpytHo_GC1}\n}", "github": "", "project": "", "reviewers": "Z9mA;KfAh;BJGj;5s1s", "site": "https://openreview.net/forum?id=pXpytHo_GC1", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6389480677998364023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "rif3a5NAxU6", "title": "Implicit Behavioral Cloning", "track": "main", "status": "Poster", "tldr": "", "abstract": "We find that across a wide range of robot policy learning scenarios, treating supervised policy learning with an implicit model generally performs better, on average, than commonly used explicit models. We present extensive experiments on this finding, and we provide both intuitive insight and theoretical arguments distinguishing the properties of implicit models compared to their explicit counterparts, particularly with respect to approximating complex, potentially discontinuous and multi-valued (set-valued) functions. On robotic policy learning tasks we show that implicit behavior-cloning policies with energy-based models (EBM) often outperform common explicit (Mean Square Error, or Mixture Density) behavior-cloning policies, including on tasks with high-dimensional action spaces and visual image inputs. We find these policies provide competitive results or outperform state-of-the-art offline reinforcement learning methods on the challenging human-expert tasks from the D4RL benchmark suite, despite using no reward information. In the real world, robots with implicit policies can learn complex and remarkably subtle behaviors on contact-rich tasks from human demonstrations, including tasks with high combinatorial complexity and tasks requiring 1mm precision. ", "keywords": "Implicit Models;Energy-Based Models;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/cf1037cf0c271932ea94f66b734c3df5bf018992.zip", "author": "Pete Florence;Corey Lynch;Andy Zeng;Oscar A Ramirez;Ayzaan Wahid;Laura Downs;Adrian Wong;Johnny Lee;Igor Mordatch;Jonathan Tompson", "authorids": "~Pete_Florence1;~Corey_Lynch1;~Andy_Zeng3;~Oscar_A_Ramirez1;~Ayzaan_Wahid1;ldowns@google.com;~Adrian_Wong1;johnnylee@google.com;~Igor_Mordatch4;~Jonathan_Tompson1", "gender": ";M;;M;M;;;;;M", "homepage": "http://www.peteflorence.com/;https://coreylynch.github.io/;;;https://ayzaan.com;;http://almostsquare.com/;;;http://jonathantompson.com", "dblp": ";155/3141;;145/7596;;;;;;139/0769", "google_scholar": ";CYWO-oAAAAAJ;;LLnrH8IAAAAJ;;;9MjZO8wAAAAJ;;;U_Jw8DUAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";;;oscar-ramirez-905913b9;;;almostsquare;;;", "or_profile": "~Pete_Florence1;~Corey_Lynch1;~Andy_Zeng3;~Oscar_A_Ramirez1;~Ayzaan_Wahid1;ldowns@google.com;~Adrian_Wong1;johnnylee@google.com;~Igor_Mordatch4;~Jonathan_Tompson1", "aff": "Google;Google;;Google;Robotics at Google;;Google;;;Google DeepMind", "aff_domain": "google.com;google.com;;google.com;google.com;;google.com;;;google.com", "position": "Research Scientist;Researcher;;Researcher;Software Engineer;;Researcher;;;Researcher", "bibtex": "@inproceedings{\nflorence2021implicit,\ntitle={Implicit Behavioral Cloning},\nauthor={Pete Florence and Corey Lynch and Andy Zeng and Oscar A Ramirez and Ayzaan Wahid and Laura Downs and Adrian Wong and Johnny Lee and Igor Mordatch and Jonathan Tompson},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=rif3a5NAxU6}\n}", "github": "", "project": "", "reviewers": "BwVg;iNFj;o4TR", "site": "https://openreview.net/forum?id=rif3a5NAxU6", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 18, "authors#_avg": 10, "corr_rating_confidence": 0, "gs_citation": 467, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=278145167063497759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "s3tu6Ma1jga", "title": "Motivating Physical Activity via Competitive Human-Robot Interaction", "track": "main", "status": "Oral", "tldr": "", "abstract": "This project aims to motivate research in competitive human-robot interaction by creating a robot competitor that can challenge human users in certain scenarios such as physical exercise and games. With this goal in mind, we introduce the Fencing Game, a human-robot competition used to evaluate both the capabilities of the robot competitor and user experience. We develop the robot competitor through iterative multi-agent reinforcement learning and show that it can perform well against human competitors. Our user study additionally found that our system was able to continuously create challenging and enjoyable interactions that significantly increased human subjects' heart rates. The majority of human subjects considered the system to be entertaining and desirable for improving the quality of their exercise.", "keywords": "Competitive Human-robot Interaction;Reinforcement Learning;HRI;Adversarial Learning;Multi-agent System", "primary_area": "", "supplementary_material": "/attachment/187291f0554b6c3a7468be1f88e89b836f70adc7.zip", "author": "Boling Yang;Golnaz Habibi;Patrick Lancaster;Byron Boots;Joshua Smith", "authorids": "~Boling_Yang1;~Golnaz_Habibi1;planc509@cs.washington.edu;~Byron_Boots1;~Joshua_Smith2", "gender": "M;F;;;M", "homepage": "https://homes.cs.washington.edu/~bolingy/;https://airou.cs.ou.edu/airou/;;;http://sensor.cs.washington.edu", "dblp": "203/5117.html;;;;s/JoshuaRSmith.html", "google_scholar": "sw__JwIAAAAJ;hU-LeNEAAAAJ;;;LnAus20AAAAJ", "orcid": "0000-0002-6211-122X;0000-0002-9130-9323;;;0000-0002-5331-4770", "linkedin": "boling-yang-104534123/;golnaz-habibi-66ba05a/;;;joshua-smith-b8a0b61/", "or_profile": "~Boling_Yang1;~Golnaz_Habibi1;planc509@cs.washington.edu;~Byron_Boots1;~Joshua_Smith2", "aff": "Department of Computer Science, University of Washington;Massachusetts Institute of Technology;;;University of Washington", "aff_domain": "cs.washington.edu;mit.edu;;;cs.washington.edu", "position": "Graduate Research Assistant;Researcher;;;Full Professor", "bibtex": "@inproceedings{\nyang2021motivating,\ntitle={Motivating Physical Activity via Competitive Human-Robot Interaction},\nauthor={Boling Yang and Golnaz Habibi and Patrick Lancaster and Byron Boots and Joshua Smith},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=s3tu6Ma1jga}\n}", "github": "", "project": "", "reviewers": "DiX9;BAQR;h8aJ", "site": "https://openreview.net/forum?id=s3tu6Ma1jga", "pdf_size": 0, "rating": "4;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7895828013361325933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Washington;Massachusetts Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.washington.edu;https://web.mit.edu", "aff_unique_abbr": "UW;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "s8xjoLghedM", "title": "Learning Multimodal Rewards from Rankings", "track": "main", "status": "Oral", "tldr": "", "abstract": "Learning from human feedback has shown to be a useful approach in acquiring robot reward functions. However, expert feedback is often assumed to be drawn from an underlying unimodal reward function. This assumption does not always hold including in settings where multiple experts provide data or when a single expert provides data for different tasks---we thus go beyond learning a unimodal reward and focus on learning a multimodal reward function. We formulate the multimodal reward learning as a mixture learning problem and develop a novel ranking-based learning approach, where the experts are only required to rank a given set of trajectories. Furthermore, as access to interaction data is often expensive in robotics, we develop an active querying approach to accelerate the learning process. We conduct experiments and user studies using a multi-task variant of OpenAI's LunarLander and a real Fetch robot, where we collect data from multiple users with different preferences. The results suggest that our approach can efficiently learn multimodal reward functions, and improve data-efficiency over benchmark methods that we adapt to our learning problem.", "keywords": "HRI;reward learning;multi-modality;rankings;active learning", "primary_area": "", "supplementary_material": "/attachment/ac1d1bedcefc99add72b1b9a864d5977d9f17468.zip", "author": "Vivek Myers;Erdem Biyik;Nima Anari;Dorsa Sadigh", "authorids": "~Vivek_Myers1;~Erdem_Biyik1;~Nima_Anari1;~Dorsa_Sadigh1", "gender": ";M;M;F", "homepage": "https://people.eecs.berkeley.edu/~vmyers/;http://people.eecs.berkeley.edu/~ebiyik/;https://nimaanari.com;https://dorsa.fyi/", "dblp": "270/8694;194/2736;60/8821;117/3174", "google_scholar": "5NGAbT4AAAAJ;https://scholar.google.com.tr/citations?user=P-G3sjYAAAAJ;kmeUhO8AAAAJ;ZaJEZpYAAAAJ", "orcid": ";0000-0002-9516-3130;0000-0002-4394-3530;", "linkedin": ";https://linkedin.com/in/ebiyik;;", "or_profile": "~Vivek_Myers1;~Erdem_Biyik1;~Nima_Anari1;~Dorsa_Sadigh1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Undergrad student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nmyers2021learning,\ntitle={Learning Multimodal Rewards from Rankings},\nauthor={Vivek Myers and Erdem Biyik and Nima Anari and Dorsa Sadigh},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=s8xjoLghedM}\n}", "github": "", "project": "", "reviewers": "zn9H;H6SU;94gh;4NNL", "site": "https://openreview.net/forum?id=s8xjoLghedM", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10336106492102104472&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "sIVC-oZN1PQ", "title": "Visually-Grounded Library of Behaviors for Manipulating Diverse Objects across Diverse Configurations and Views", "track": "main", "status": "Poster", "tldr": "", "abstract": "We propose a visually-grounded library of behaviors approach for learning to manipulate diverse objects across varying initial and goal configurations and camera placements. Our key innovation is to disentangle the standard image-to-action mapping into two separate modules that use different types of perceptual input: (1) a behavior selector which conditions on intrinsic and semantically-rich object appearance features to select the behaviors that can successfully perform the desired tasks on the object in hand, and (2) a library of behaviors each of which conditions on extrinsic and abstract object properties, such as object location and pose, to predict actions to execute over time. The selector uses a semantically-rich 3D object feature representation extracted from images in a differential end-to-end manner. This representation is trained to be view-invariant and affordance-aware using self-supervision, by predicting varying views and successful object manipulations. We test our framework on pushing and grasping diverse objects in simulation as well as transporting rigid, granular, and liquid food ingredients in a real robot setup. Our model outperforms image-to-action mappings that do not factorize static and dynamic object properties. We further ablate the contribution of the selector's input and show the benefits of the proposed view-predictive, affordance-aware 3D visual object representations.", "keywords": "robot learning;visual representation;interactive perception", "primary_area": "", "supplementary_material": "/attachment/ac69c937828c3909c0a6001da2d5129c43bbfe71.zip", "author": "Jingyun Yang;Hsiao-Yu Tung;Yunchu Zhang;Gaurav Pathak;Ashwini Pokle;Christopher G Atkeson;Katerina Fragkiadaki", "authorids": "~Jingyun_Yang1;~Hsiao-Yu_Tung1;~Yunchu_Zhang1;~Gaurav_Pathak1;~Ashwini_Pokle1;~Christopher_G_Atkeson1;~Katerina_Fragkiadaki1", "gender": "M;M;M;F;F;M;F", "homepage": "https://yjy0625.github.io;https://yunchuzhang.github.io/;;https://ashwinipokle.github.io/;https://www.cs.cmu.edu/~katef/;http://www.cs.cmu.edu/~cga/;", "dblp": ";;;228/5527;21/8780;https://dblp.uni-trier.de/pers/a/Atkeson:Christopher_G=.html;199/1661", "google_scholar": "7XBAa2QAAAAJ;;;o_1YtVoAAAAJ;FWp7728AAAAJ;https://scholar.google.ch/citations?user=NB4pgZYAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;gauravpathak1/;;;;", "or_profile": "~Jingyun_Yang1;~Yunchu_Zhang1;~Gaurav_Pathak1;~Ashwini_Pokle1;~Katerina_Fragkiadaki1;~Christopher_Atkeson1;~Hsiao-Yu_Fish_Tung1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;andrew.cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "MS student;MS student;Research Associate;PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nyang2021visuallygrounded,\ntitle={Visually-Grounded Library of Behaviors for Manipulating Diverse Objects across Diverse Configurations and Views},\nauthor={Jingyun Yang and Hsiao-Yu Tung and Yunchu Zhang and Gaurav Pathak and Ashwini Pokle and Christopher G Atkeson and Katerina Fragkiadaki},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=sIVC-oZN1PQ}\n}", "github": "", "project": "", "reviewers": "XqcZ;UjV5;GybB;7MZX", "site": "https://openreview.net/forum?id=sIVC-oZN1PQ", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6434792671190894921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "sWBqOL5Nh4P", "title": "Learning Density Distribution of Reachable States for Autonomous Systems", "track": "main", "status": "Poster", "tldr": "", "abstract": "State density distribution, in contrast to worst-case reachability, can be leveraged for safety-related problems to better quantify the likelihood of the risk for potentially hazardous situations. In this work, we propose a data-driven method to compute the density distribution of reachable states for nonlinear and even black-box systems. Our semi-supervised approach learns system dynamics and the state density jointly from trajectory data, guided by the fact that the state density evolution follows the Liouville partial differential equation. With the help of neural network reachability tools, our approach can estimate the set of all possible future states as well as their density. Moreover, we could perform online safety verification with probability ranges for unsafe behaviors to occur. We use an extensive set of experiments to show that our learned solution can produce a much more accurate estimate on density distribution, and can quantify risks less conservatively and flexibly comparing with worst-case analysis.", "keywords": "Reachability Density Distribution;Learning Density Distribution;Liouville Theorem", "primary_area": "", "supplementary_material": "/attachment/e956ed54baf8f1879ee4586b3bc6867ce7bf6e00.zip", "author": "Yue Meng;Dawei Sun;Zeng Qiu;Md Tawhid Bin Waez;Chuchu Fan", "authorids": "~Yue_Meng1;~Dawei_Sun3;cqiu1@ford.com;mwaez@ford.com;~Chuchu_Fan2", "gender": "M;M;;;F", "homepage": "https://mengyuest.github.io;https://www.daweisun.me;;;https://chuchu.mit.edu", "dblp": ";;;;127/1756", "google_scholar": "HQHZKyQAAAAJ;JwuiGckAAAAJ;;;J-dq_8EAAAAJ", "orcid": "0000-0003-0204-4819;;;;", "linkedin": "yuemeng95/;;;;chuchu-fan/", "or_profile": "~Yue_Meng1;~Dawei_Sun3;cqiu1@ford.com;mwaez@ford.com;~Chuchu_Fan2", "aff": "Massachusetts Institute of Technology;University of Illinois, Urbana Champaign;;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;illinois.edu;;;mit.edu", "position": "PhD student;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nmeng2021learning,\ntitle={Learning Density Distribution of Reachable States for Autonomous Systems},\nauthor={Yue Meng and Dawei Sun and Zeng Qiu and Md Tawhid Bin Waez and Chuchu Fan},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=sWBqOL5Nh4P}\n}", "github": "", "project": "", "reviewers": "mxEJ;HgHK;bVFw", "site": "https://openreview.net/forum?id=sWBqOL5Nh4P", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 7, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=72455670975125729&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://illinois.edu", "aff_unique_abbr": "MIT;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "tCfLLiP7vje", "title": "Seeing Glass: Joint Point-Cloud and Depth Completion for Transparent Objects", "track": "main", "status": "Oral", "tldr": "", "abstract": "The basis of many object manipulation algorithms is RGB-D input. Yet, commodity RGB-D sensors can only provide distorted depth maps for a wide range of transparent objects due light refraction and absorption. To tackle the perception challenges posed by transparent objects, we propose TranspareNet, a joint point cloud and depth completion method, with the ability to complete the depth of transparent objects in cluttered and complex scenes, even with partially filled fluid contents within the vessels. To address the shortcomings of existing transparent object data collection schemes in literature, we also propose an automated dataset creation workflow that consists of robot-controlled image collection and vision-based automatic annotation. Through this automated workflow, we created Transparent Object Depth Dataset (TODD), which consists of nearly 15000 RGB-D images. Our experimental evaluation demonstrates that TranspareNet outperforms existing state-of-the-art depth completion methods on multiple datasets, including ClearGrasp, and that it also handles cluttered scenes when trained on TODD. Code and dataset will be released at https://www.pair.toronto.edu/TranspareNet/", "keywords": "Transparent Objects;Depth Completion;3D Perception;Data Collection", "primary_area": "", "supplementary_material": "/attachment/86e2a2ca8e4350cf9b69c009dcc31ac6ea7d535b.zip", "author": "Haoping Xu;Yi Ru Wang;Sagi Eppel;Alan Aspuru-Guzik;Florian Shkurti;Animesh Garg", "authorids": "~Haoping_Xu1;~Yi_Ru_Wang1;sagieppel@gmail.com;~Alan_Aspuru-Guzik2;~Florian_Shkurti1;~Animesh_Garg1", "gender": "M;;;M;M;M", "homepage": ";;;http://matter.toronto.edu;http://www.cs.toronto.edu/~florian/;http://animesh.garg.tech", "dblp": ";302/0208;;;21/10333;123/5728", "google_scholar": "9mD-LUMAAAAJ;OTL-u30AAAAJ;;Ag_6KEgAAAAJ;https://scholar.google.ca/citations?hl=en;zp8V7ZMAAAAJ", "orcid": ";;;0000-0002-8277-4434;;0000-0003-0482-4296", "linkedin": ";yi-ru-helen-wang/;;;;animeshgarg/", "or_profile": "~Haoping_Xu1;~Yi_Ru_Wang1;sagieppel@gmail.com;~Alan_Aspuru-Guzik2;~Florian_Shkurti1;~Animesh_Garg1", "aff": "Toronto University;University of Toronto;;University of Toronto;University of Toronto;University of Toronto", "aff_domain": "utoronto.ca;utoronto.ca;;utoronto.ca;cs.toronto.edu;toronto.edu", "position": "PhD student;Undergrad student;;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2021seeing,\ntitle={Seeing Glass: Joint Point-Cloud and Depth Completion for Transparent Objects},\nauthor={Haoping Xu and Yi Ru Wang and Sagi Eppel and Alan Aspuru-Guzik and Florian Shkurti and Animesh Garg},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=tCfLLiP7vje}\n}", "github": "", "project": "", "reviewers": "ieyv;vich;UETv", "site": "https://openreview.net/forum?id=tCfLLiP7vje", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10376210519695633116&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "id": "tL24gvaTwhb", "title": "Skill Preferences: Learning to Extract and Execute Robotic Skills from Human Feedback", "track": "main", "status": "Poster", "tldr": "", "abstract": "A promising approach to solving challenging long-horizon tasks has been to extract behavior priors (skills) by fitting generative models to large offline datasets of demonstrations. However, such generative models inherit the biases of the underlying data and result in poor and unusable skills when trained on imperfect demonstration data. To better align skill extraction with human intent we present Skill Preferences (SkiP), an algorithm that learns a model over human preferences and uses it to extract human-aligned skills from offline data. After extracting human-preferred skills, SkiP also utilizes human feedback to solve downstream tasks with RL. We show that SkiP enables a simulated kitchen robot to solve complex multi-step manipulation tasks and substantially outperforms prior leading RL algorithms with human preferences as well as leading skill extraction algorithms without human preferences.", "keywords": "Reinforcement Learning;Skill Extraction;Human Preferences", "primary_area": "", "supplementary_material": "/attachment/3c30db438d96b0c288229a5be4c13f0a2c453aa9.zip", "author": "Xiaofei Wang;Kimin Lee;Kourosh Hakhamaneshi;Pieter Abbeel;Michael Laskin", "authorids": "~Xiaofei_Wang5;~Kimin_Lee1;~Kourosh_Hakhamaneshi1;~Pieter_Abbeel2;~Michael_Laskin1", "gender": ";M;M;M;M", "homepage": "https://www.linkedin.com/in/xiaofei-wang-b01743149/;https://sites.google.com/view/kiminlee;https://kouroshhakha.github.io/;https://people.eecs.berkeley.edu/~pabbeel/;http://mishalaskin.com", "dblp": ";183/6849;;;", "google_scholar": ";92M8xv4AAAAJ;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;DOGDnwsAAAAJ", "orcid": ";;;;", "linkedin": ";;;;mishalaskin", "or_profile": "~Xiaofei_Wang5;~Kimin_Lee1;~Kourosh_Hakhamaneshi1;~Pieter_Abbeel2;~Michael_Laskin1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Covariant;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;covariant.ai;berkeley.edu", "position": "Undergrad student;Postdoc;PhD student;Founder;Postdoc", "bibtex": "@inproceedings{\nwang2021skill,\ntitle={Skill Preferences: Learning to Extract and Execute Robotic Skills from Human Feedback},\nauthor={Xiaofei Wang and Kimin Lee and Kourosh Hakhamaneshi and Pieter Abbeel and Michael Laskin},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=tL24gvaTwhb}\n}", "github": "", "project": "", "reviewers": "XBne;6ucT;hYAX;noDs", "site": "https://openreview.net/forum?id=tL24gvaTwhb", "pdf_size": 0, "rating": "4;6;10;10", "confidence": "", "rating_avg": 7.5, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10817062050482384311&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Covariant", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "tfLu5W6SW5J", "title": "Learning Language-Conditioned Robot Behavior from Offline Data and Crowd-Sourced Annotation", "track": "main", "status": "Poster", "tldr": "", "abstract": "We study the problem of learning a range of vision-based manipulation tasks from a large offline dataset of robot interaction. In order to accomplish this, humans need easy and effective ways of specifying tasks to the robot. Goal images are one popular form of task specification, as they are already grounded in the robot's observation space. However, goal images also have a number of drawbacks: they are inconvenient for humans to provide, they can over-specify the desired behavior leading to a sparse reward signal, or under-specify task information in the case of non-goal reaching tasks. Natural language provides a convenient and flexible alternative for task specification, but comes with the challenge of grounding language in the robot's observation space. To scalably learn this grounding we propose to leverage offline pre-collected robotic datasets (including highly sub-optimal, autonomously-collected data) with crowd-sourced natural language labels. With this data, we learn a simple classifier which predicts if a change in state completes a language instruction. This provides a language-conditioned reward function that can then be used for offline multi-task RL. In our experiments, we find that on language-conditioned manipulation tasks our approach outperforms both goal-image specifications and language conditioned imitation techniques by more than 25%, and is able to perform a range of visuomotor tasks from natural language, such as \u201copen the right drawer\u201d and \u201cmove the stapler\u201d, on a Franka Emika Panda robot.", "keywords": "Natural Language;Offline Reinforcement Learning;Visuomotor Manipulation", "primary_area": "", "supplementary_material": "/attachment/05d0c2fc8f1ca23cfcb753e1372d932803179683.zip", "author": "Suraj Nair;Eric Mitchell;Kevin Chen;brian ichter;Silvio Savarese;Chelsea Finn", "authorids": "~Suraj_Nair1;~Eric_Mitchell1;~Kevin_Chen2;~brian_ichter1;~Silvio_Savarese1;~Chelsea_Finn1", "gender": "M;M;M;;M;F", "homepage": "https://suraj-nair-1.github.io/;https://ericmitchell.ai;;;;https://ai.stanford.edu/~cbfinn/", "dblp": ";238/0419;39/1303-1;;50/3578;131/1783", "google_scholar": "EHSuFcwAAAAJ;q77J4fgAAAAJ;VCpSh3gAAAAJ;-w5DuHgAAAAJ;ImpbxLsAAAAJ;vfPE6hgAAAAJ", "orcid": ";0000-0002-7487-1744;;;;", "linkedin": ";;;;;", "or_profile": "~Suraj_Nair1;~Eric_Mitchell1;~Kevin_Chen2;~brian_ichter1;~Silvio_Savarese1;~Chelsea_Finn1", "aff": "Meta Facebook;Stanford University;Stanford University;Google;Stanford University;Google", "aff_domain": "facebook.com;stanford.edu;stanford.edu;google.com;stanford.edu;google.com", "position": "Student Researcher;PhD student;Graduate Student;Research Scientist;Associate professor;Research Scientist", "bibtex": "@inproceedings{\nnair2021learning,\ntitle={Learning Language-Conditioned Robot Behavior from Offline Data and Crowd-Sourced Annotation},\nauthor={Suraj Nair and Eric Mitchell and Kevin Chen and brian ichter and Silvio Savarese and Chelsea Finn},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=tfLu5W6SW5J}\n}", "github": "", "project": "", "reviewers": "P4BX;avX2;aTgV;k4Ao", "site": "https://openreview.net/forum?id=tfLu5W6SW5J", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 170, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12364060098964778019&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;2;1;2", "aff_unique_norm": "Meta;Stanford University;Google", "aff_unique_dep": "Meta Platforms, Inc.;;Google", "aff_unique_url": "https://meta.com;https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Meta;Stanford;Google", "aff_campus_unique_index": "1;1;2;1;2", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "tjdXRqKaz5Y", "title": "Aligning an optical interferometer with beam divergence control and continuous action space", "track": "main", "status": "Poster", "tldr": "", "abstract": "Reinforcement learning is finding its way to real-world problem application, transferring from simulated environments to physical setups. In this work, we implement vision-based alignment of an optical Mach-Zehnder interferometer with a confocal telescope in one arm, which controls the diameter and divergence of the corresponding beam. We use a continuous action space; exponential scaling enables us to handle actions within a range of over two orders of magnitude. Our agent trains only in a simulated environment with domain randomizations. In an experimental evaluation, the agent significantly outperforms an existing solution and a human expert.", "keywords": "sim-to-real;robotics;optical interferometer", "primary_area": "", "supplementary_material": "/attachment/13a58119d0b853b21874dc93c5c6af5c8f985de8.zip", "author": "Stepan Makarenko;Dmitry Igorevich Sorokin;Alexander Ulanov;Alexander Lvovsky", "authorids": "~Stepan_Makarenko1;~Dmitry_Igorevich_Sorokin1;~Alexander_Ulanov2;~Alexander_Lvovsky1", "gender": "M;M;;Not Specified", "homepage": ";;;https://users.physics.ox.ac.uk/~lvovsky/", "dblp": ";https://dblp.uni-trier.de/pid/203/4483;;", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ru/citations?user=9vlPZJoAAAAJ;vhkS2c4AAAAJ", "orcid": ";;;", "linkedin": "stepan-makarenko-9bb96a1b7;;;", "or_profile": "~Stepan_Makarenko1;~Dmitry_Igorevich_Sorokin1;~Alexander_Ulanov2;~Alexander_Lvovsky1", "aff": "Moscow Institute of Physics and Technology;Russian Quantum Center;Russian Quantum Center;University of Oxford", "aff_domain": "phystech.edu;rqc.ru;rqc.ru;ox.ac.uk", "position": "MS student;Researcher;Senior researcher;Full Professor", "bibtex": "@inproceedings{\nmakarenko2021aligning,\ntitle={Aligning an optical interferometer with beam divergence control and continuous action space},\nauthor={Stepan Makarenko and Dmitry Igorevich Sorokin and Alexander Ulanov and Alexander Lvovsky},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=tjdXRqKaz5Y}\n}", "github": "", "project": "", "reviewers": "ENy6;GxfU;PgwD;rhsP", "site": "https://openreview.net/forum?id=tjdXRqKaz5Y", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5974345157089464139&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Moscow Institute of Physics and Technology;Russian Quantum Center;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mipt.ru/en;https://russianquantumcenter.ru;https://www.ox.ac.uk", "aff_unique_abbr": "MIPT;RQC;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Russian Federation;United Kingdom" }, { "id": "tmZsuYPmQ9m", "title": "Anomaly Detection in Multi-Agent Trajectories for Automated Driving", "track": "main", "status": "Poster", "tldr": "", "abstract": "Human drivers can recognise fast abnormal driving situations to avoid accidents. Similar to humans, automated vehicles are supposed to perform anomaly detection. In this work, we propose the spatio-temporal graph auto-encoder for learning normal driving behaviours. Our innovation is the ability to jointly learn multiple trajectories of a dynamic number of agents. To perform anomaly detection, we first estimate a density function of the learned trajectory feature representation and then detect anomalies in low-density regions. Due to the lack of multi-agent trajectory datasets for anomaly detection in automated driving, we introduce our dataset using a driving simulator for normal and abnormal manoeuvres. Our evaluations show that our approach learns the relation between different agents and delivers promising results compared to the related works. The code, simulation and the dataset are publicly available.", "keywords": "Anomaly Detection;Multi-Agent Trajectory;Graph Neural Networks;Automated Driving", "primary_area": "", "supplementary_material": "/attachment/99633c42769ba0f03471d80244b9e97aef91b1f3.zip", "author": "Julian Wiederer;Arij Bouazizi;Marco Troina;Ulrich Kressel;Vasileios Belagiannis", "authorids": "~Julian_Wiederer1;~Arij_Bouazizi1;~Marco_Troina1;~Ulrich_Kressel2;~Vasileios_Belagiannis1", "gender": "M;;M;M;M", "homepage": ";;;;https://www.lms.tf.fau.eu/person/vasileios-belagiannis/", "dblp": "271/4656;271/4748.html;;11/4128.html;75/7627", "google_scholar": "https://scholar.google.de/citations?user=YRAtWesAAAAJ;;;https://scholar.google.de/citations?user=5vrUdB4AAAAJ;4IlWd90AAAAJ", "orcid": ";;;;0000-0003-0960-8453", "linkedin": "julian-wiederer/;;marco-troina/;;", "or_profile": "~Julian_Wiederer1;~Arij_Bouazizi1;~Marco_Troina1;~Ulrich_Kressel2;~Vasileios_Belagiannis1", "aff": "Ulm University;;Swiss Federal Institute of Technology Lausanne;Mercedes Benz Research & Development;Ulm University", "aff_domain": "uni-ulm.de;;epfl.ch;daimler.com;uni-ulm.de", "position": "PhD student;;MS student;Manager;Assistant Professor", "bibtex": "@inproceedings{\nwiederer2021anomaly,\ntitle={Anomaly Detection in Multi-Agent Trajectories for Automated Driving},\nauthor={Julian Wiederer and Arij Bouazizi and Marco Troina and Ulrich Kressel and Vasileios Belagiannis},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=tmZsuYPmQ9m}\n}", "github": "", "project": "", "reviewers": "tMsu;McLa;BxGJ", "site": "https://openreview.net/forum?id=tmZsuYPmQ9m", "pdf_size": 0, "rating": "6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11830114033223212917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Ulm University;Swiss Federal Institute of Technology Lausanne;Mercedes-Benz Research & Development", "aff_unique_dep": ";;Research & Development", "aff_unique_url": "https://www.uni-ulm.de/;https://www.epfl.ch;https://www.mercedes-benz.com", "aff_unique_abbr": "U Ulm;EPFL;MB R&D", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;Switzerland" }, { "id": "uJi8OvaanP6", "title": "Redundancy Resolution as Action Bias in Policy Search for Robotic Manipulation", "track": "main", "status": "Poster", "tldr": "", "abstract": "We propose a novel approach that biases actions during policy search by lifting the concept of redundancy resolution from multi-DoF robot kinematics to the level of the reward in deep reinforcement learning and evolution strategies. The key idea is to bias the distribution of executed actions in the sense that the immediate reward remains unchanged. The resulting biased actions favor secondary objectives yielding policies that are safer to apply on the real robot. We demonstrate the feasibility of our method, considered as policy search with redundant action bias (PSRAB), in a reaching and a pick-and-lift task with a 7-DoF Franka robot arm trained in RLBench - a recently introduced benchmark for robotic manipulation - using state-of-the-art TD3 deep reinforcement learning and OpenAI's evolutionary strategy. We show that it is a flexible approach without the need of significant fine-tuning and interference with the main objective even across different policy search methods and tasks of different complexity. We evaluate our approach in simulation and on the real robot. Our project website with videos and further results can be found at: https://sites.google.com/view/redundant-action-bias", "keywords": "Deep Reinforcement Learning;Evolution Strategies;Redundancy Resolution;Action Bias;Velocity Control", "primary_area": "", "supplementary_material": "/attachment/6e71f20aa5458c10aee658eec03e659e61a80a17.zip", "author": "Firas Al-Hafez;Jochen J. Steil", "authorids": "~Firas_Al-Hafez1;~Jochen_J._Steil1", "gender": "M;", "homepage": "https://firasalhafez.com/;", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": ";", "or_profile": "~Firas_Al-Hafez1;~Jochen_J._Steil1", "aff": "Technische Universit\u00e4t Braunschweig;", "aff_domain": "tu-braunschweig.de;", "position": "MS student;", "bibtex": "@inproceedings{\nal-hafez2021redundancy,\ntitle={Redundancy Resolution as Action Bias in Policy Search for Robotic Manipulation},\nauthor={Firas Al-Hafez and Jochen J. Steil},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=uJi8OvaanP6}\n}", "github": "", "project": "", "reviewers": "pAEv;9a3E;g7Ri;S6P5", "site": "https://openreview.net/forum?id=uJi8OvaanP6", "pdf_size": 0, "rating": "6;6;6;6", "confidence": "", "rating_avg": 6.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9283747006615394238&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Technische Universit\u00e4t Braunschweig", "aff_unique_dep": "", "aff_unique_url": "https://tu-braunschweig.de", "aff_unique_abbr": "TU Braunschweig", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "udFuJTvlhsJ", "title": "Learning Reward Functions from Scale Feedback", "track": "main", "status": "Poster", "tldr": "", "abstract": "Today's robots are increasingly interacting with people and need to efficiently learn inexperienced user's preferences. A common framework is to iteratively query the user about which of two presented robot trajectories they prefer. While this minimizes the users effort, a strict choice does not yield any information on how much one trajectory is preferred. We propose scale feedback, where the user utilizes a slider to give more nuanced information. We introduce a probabilistic model on how users would provide feedback and derive a learning framework for the robot. We demonstrate the performance benefit of slider feedback in simulations, and validate our approach in two user studies suggesting that scale feedback enables more effective learning in practice.", "keywords": "HRI;reward learning;learning from choice;active learning", "primary_area": "", "supplementary_material": "/attachment/5937fa255e318b57eb2a6e503f21f9715fde751d.zip", "author": "Nils Wilde;Erdem Biyik;Dorsa Sadigh;Stephen L. Smith", "authorids": "~Nils_Wilde1;~Erdem_Biyik1;~Dorsa_Sadigh1;~Stephen_L._Smith1", "gender": "M;M;F;M", "homepage": "https://sites.google.com/view/nwilde/home;http://people.eecs.berkeley.edu/~ebiyik/;https://dorsa.fyi/;https://ece.uwaterloo.ca/~sl2smith/", "dblp": ";194/2736;117/3174;80/6078-1", "google_scholar": ";https://scholar.google.com.tr/citations?user=P-G3sjYAAAAJ;ZaJEZpYAAAAJ;https://scholar.google.ca/citations?user=_gfwCNwAAAAJ", "orcid": "0000-0003-3238-8153;0000-0002-9516-3130;;0000-0002-8636-407X", "linkedin": ";https://linkedin.com/in/ebiyik;;stephen-l-smith-7088756b/", "or_profile": "~Nils_Wilde1;~Erdem_Biyik1;~Dorsa_Sadigh1;~Stephen_L._Smith1", "aff": "University of Waterloo;Stanford University;Stanford University;University of Waterloo", "aff_domain": "uwaterloo.ca;stanford.edu;stanford.edu;uwaterloo.ca", "position": "Postdoc;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwilde2021learning,\ntitle={Learning Reward Functions from Scale Feedback},\nauthor={Nils Wilde and Erdem Biyik and Dorsa Sadigh and Stephen L. Smith},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=udFuJTvlhsJ}\n}", "github": "", "project": "", "reviewers": "cvuG;2eoP;pmpv;Z5iG", "site": "https://openreview.net/forum?id=udFuJTvlhsJ", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 19, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7239587387692200666&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Waterloo;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.stanford.edu", "aff_unique_abbr": "UW;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;United States" }, { "id": "vm8Hr9YJHZ1", "title": "Fast and Efficient Locomotion via Learned Gait Transitions", "track": "main", "status": "Oral", "tldr": "", "abstract": "We focus on the problem of developing energy efficient controllers for quadrupedal robots. Animals can actively switch gaits at different speeds to lower their energy consumption. In this paper, we devise a hierarchical learning framework, in which distinctive locomotion gaits and natural gait transitions emerge automatically with a simple reward of energy minimization. We use evolutionary strategies (ES) to train a high-level gait policy that specifies gait patterns of each foot, while the low-level convex MPC controller optimizes the motor commands so that the robot can walk at a desired velocity using that gait pattern. We test our learning framework on a quadruped robot and demonstrate automatic gait transitions, from walking to trotting and to fly-trotting, as the robot increases its speed. We show that the learned hierarchical controller consumes much less energy across a wide range of locomotion speed than baseline controllers.", "keywords": "Legged Locomotion;Hierarchical Control;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/dc10ec364c590bb011906c631dca9692501b6bb8.zip", "author": "Yuxiang Yang;Tingnan Zhang;Erwin Coumans;Jie Tan;Byron Boots", "authorids": "~Yuxiang_Yang2;~Tingnan_Zhang1;~Erwin_Coumans1;~Jie_Tan1;~Byron_Boots1", "gender": "M;M;M;M;", "homepage": "https://yxyang.github.io;;;http://www.jie-tan.net;", "dblp": ";https://dblp.uni-trier.de/pers/hd/z/Zhang:Tingnan;;81/7419;", "google_scholar": "2NQKmzIAAAAJ;RM2vMNcAAAAJ;-aapzdEAAAAJ;neGbgzYAAAAJ;", "orcid": ";;;;", "linkedin": ";;;jie-tan/;", "or_profile": "~Yuxiang_Yang2;~Tingnan_Zhang1;~Erwin_Coumans1;~Jie_Tan1;~Byron_Boots1", "aff": "Google;Google;Google;Google;", "aff_domain": "google.com;google.com;google.com;google.com;", "position": "Researcher;Software Engineer;Researcher;Research Scientist;", "bibtex": "@inproceedings{\nyang2021fast,\ntitle={Fast and Efficient Locomotion via Learned Gait Transitions},\nauthor={Yuxiang Yang and Tingnan Zhang and Erwin Coumans and Jie Tan and Byron Boots},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=vm8Hr9YJHZ1}\n}", "github": "", "project": "", "reviewers": "ED3s;22ve;J9Ji;Zoo3", "site": "https://openreview.net/forum?id=vm8Hr9YJHZ1", "pdf_size": 0, "rating": "6;10;10;10", "confidence": "", "rating_avg": 9.0, "confidence_avg": 0, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2698644321080050263&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "vrU4d2yjjUn", "title": "Assisted Robust Reward Design", "track": "main", "status": "Poster", "tldr": "", "abstract": "Real-world robotic tasks require complex reward functions. When we define the problem the robot needs to solve, we pretend that a designer specifies this complex reward exactly, and it is set in stone from then on. In practice, however, reward design is an iterative process: the designer chooses a reward, eventually encounters an ``````''edge-case'' environment where the reward incentivizes the wrong behavior, revises the reward, and repeats. What would it mean to rethink robotics problems to formally account for this iterative nature of reward design? We propose that the robot not take the specified reward for granted, but rather have uncertainty about it, and account for the future design iterations as future evidence. We contribute an Assisted Reward Design method that speeds up the design process by anticipating and influencing this future evidence: rather than letting the designer eventually encounter failure cases and revise the reward then, the method actively exposes the designer to such environments during the development phase. We test this method in an autonomous driving task and find that it more quickly improves the car's behavior in held-out environments by proposing environments that are ``''edge cases'' for the current reward.", "keywords": "Reward Design;Safety;Human-in-the-loop;Active Learning", "primary_area": "", "supplementary_material": "/attachment/2b4140f756d8798cd8fd5e75738dd650d3fcf588.zip", "author": "Jerry Zhi-Yang He;Anca Dragan", "authorids": "~Jerry_Zhi-Yang_He1;~Anca_Dragan1", "gender": "M;F", "homepage": "https://herobotics.me;http://www.ancadragan.com/", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Jerry_Zhi-Yang_He1;~Anca_Dragan1", "aff": "University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nhe2021assisted,\ntitle={Assisted Robust Reward Design},\nauthor={Jerry Zhi-Yang He and Anca Dragan},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=vrU4d2yjjUn}\n}", "github": "", "project": "", "reviewers": "pnb1;5qJa;oemY;J4qS", "site": "https://openreview.net/forum?id=vrU4d2yjjUn", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 6, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18301206459174445786&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "wBT0lZJAJ0V", "title": "Learn2Assemble with Structured Representations and Search for Robotic Architectural Construction", "track": "main", "status": "Poster", "tldr": "", "abstract": "Autonomous robotic assembly requires a well-orchestrated sequence of high-level actions and smooth manipulation executions. Learning to assemble complex 3D structures remains a challenging problem that requires drawing connections between target designs and building blocks, and creating valid assembly sequences considering structural stability and feasibility. To address the combinatorial complexity of the assembly tasks, we propose a multi-head attention graph representation that can be trained with reinforcement learning (RL) to encode the spatial relations and provide meaningful assembly actions. Combining structured representations with model-free RL and Monte-Carlo planning allows agents to operate with various target shapes and building block types. We design a hierarchical control framework that learns to sequence the building blocks to construct arbitrary 3D designs and ensures their feasibility, as we plan the geometric execution with the robot-in-the-loop. We demonstrate the flexibility of the proposed structured representation and our algorithmic solution in a series of simulated 3D assembly tasks with robotic evaluation, which showcases our method's ability to learn to construct stable structures with a large number of building blocks. Code and videos are available at: https://sites.google.com/view/learn2assemble", "keywords": "Structured representations;Autonomous assembly;Manipulation", "primary_area": "", "supplementary_material": "/attachment/c400d10746402a4ec0b08935f08b08c8b22e5147.zip", "author": "Niklas Funk;Georgia Chalvatzaki;Boris Belousov;Jan Peters", "authorids": "~Niklas_Funk1;~Georgia_Chalvatzaki1;~Boris_Belousov1;~Jan_Peters3", "gender": "M;F;M;M", "homepage": "https://niklasfunk.de;https://www.ias.informatik.tu-darmstadt.de/Team/GeorgiaChalvatzaki;https://www.jan-peters.net;https://www.ias.informatik.tu-darmstadt.de/Team/BorisBelousov", "dblp": "209/9556;145/3334;p/JanPeters1;191/6726", "google_scholar": "zhsWjy8AAAAJ;https://scholar.google.gr/citations?user=mlho5FkAAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ;https://scholar.google.de/citations?user=XjNbRVYAAAAJ", "orcid": ";;0000-0002-5266-8091;0000-0001-7172-9104", "linkedin": ";;janrpeters/;boris-belousov/", "or_profile": "~Niklas_Funk1;~Georgia_Chalvatzaki1;~Jan_Peters3;~Boris_Belousov2", "aff": "TU Darmstadt;TU Darmstadt;Max Planck Institute for Intelligent Systems;Technical University of Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;tue.mpg.de;tu-darmstadt.de", "position": "PhD student;Postdoc;Researcher;PhD student", "bibtex": "@inproceedings{\nfunk2021learnassemble,\ntitle={Learn2Assemble with Structured Representations and Search for Robotic Architectural Construction},\nauthor={Niklas Funk and Georgia Chalvatzaki and Boris Belousov and Jan Peters},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=wBT0lZJAJ0V}\n}", "github": "", "project": "", "reviewers": "a8Bf;2DWg;7iDb;o7Zw;RVUr", "site": "https://openreview.net/forum?id=wBT0lZJAJ0V", "pdf_size": 0, "rating": "4;6;6;6;10", "confidence": "", "rating_avg": 6.4, "confidence_avg": 0, "replies_avg": 32, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4058966258134971782&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Max Planck Institute for Intelligent Systems;Technical University of Darmstadt", "aff_unique_dep": ";Intelligent Systems;", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.mpi-is.mpg.de;https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt;MPI-IS;TUD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "wK2fDDJ5VcF", "title": "Learning to Walk in Minutes Using Massively Parallel Deep Reinforcement Learning", "track": "main", "status": "Poster", "tldr": "", "abstract": "In this work, we present and study a training set-up that achieves fast policy generation for real-world robotic tasks by using massive parallelism on a single workstation GPU. We analyze and discuss the impact of different training algorithm components in the massively parallel regime on the final policy performance and training times. In addition, we present a novel game-inspired curriculum that is well suited for training with thousands of simulated robots in parallel. We evaluate the approach by training the quadrupedal robot ANYmal to walk on challenging terrain. The parallel approach allows training policies for flat terrain in under four minutes, and in twenty minutes for uneven terrain. This represents a speedup of multiple orders of magnitude compared to previous work. Finally, we transfer the policies to the real robot to validate the approach.", "keywords": "Reinforcement Learning;Legged Robots;Sim-to-Real", "primary_area": "", "supplementary_material": "/attachment/0a98f9aec9e11095e89821864c695c1d89164ae3.zip", "author": "Nikita Rudin;David Hoeller;Philipp Reist;Marco Hutter", "authorids": "~Nikita_Rudin1;dhoeller@ethz.ch;preist@nvidia.com;mahutter@ethz.ch", "gender": "M;;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0000-0001-5893-0348;;;", "linkedin": "nikita-rudin-bb3199121;;;", "or_profile": "~Nikita_Rudin1;dhoeller@ethz.ch;preist@nvidia.com;mahutter@ethz.ch", "aff": "Swiss Federal Institute of Technology;;;", "aff_domain": "ethz.ch;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nrudin2021learning,\ntitle={Learning to Walk in Minutes Using Massively Parallel Deep Reinforcement Learning},\nauthor={Nikita Rudin and David Hoeller and Philipp Reist and Marco Hutter},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=wK2fDDJ5VcF}\n}", "github": "", "project": "", "reviewers": "Y3ip;cSHN;2XwF;d4o6", "site": "https://openreview.net/forum?id=wK2fDDJ5VcF", "pdf_size": 0, "rating": "6;6;6;10", "confidence": "", "rating_avg": 7.0, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 668, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8503164023891275626&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0", "aff_unique_norm": "Swiss Federal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "wMoHIYBsj2_", "title": "V-MAO: Generative Modeling for Multi-Arm Manipulation of Articulated Objects", "track": "main", "status": "Poster", "tldr": "", "abstract": "Manipulating articulated objects requires multiple robot arms in general. It is challenging to enable multiple robot arms to collaboratively complete manipulation tasks on articulated objects. In this paper, we present V-MAO, a framework for learning multi-arm manipulation of articulated objects. Our framework includes a variational generative model that learns contact point distribution over object rigid parts for each robot arm. The training signal is obtained from interaction with the simulation environment which is enabled by planning and a novel formulation of object-centric control for articulated objects. We deploy our framework in a customized MuJoCo simulation environment and demonstrate that our framework achieves a high success rate on six different objects and two different robots. We also show that generative modeling can effectively learn the contact point distribution on articulated objects.", "keywords": "Articulated Object;Generative Model;Variational Inference", "primary_area": "", "supplementary_material": "/attachment/794bee2fb71b755a11cf24092ec9d583150526eb.zip", "author": "Xingyu Liu;Kris M. Kitani", "authorids": "~Xingyu_Liu1;~Kris_M._Kitani1", "gender": "M;M", "homepage": "https://xingyul.github.io;http://www.cs.cmu.edu/~kkitani/", "dblp": ";42/163", "google_scholar": "ZVABLi8AAAAJ;yv3sH74AAAAJ", "orcid": ";0000-0002-9389-4060", "linkedin": "xing-yu-liu/;", "or_profile": "~Xingyu_Liu1;~Kris_M._Kitani1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nliu2021vmao,\ntitle={V-{MAO}: Generative Modeling for Multi-Arm Manipulation of Articulated Objects},\nauthor={Xingyu Liu and Kris M. Kitani},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=wMoHIYBsj2_}\n}", "github": "", "project": "", "reviewers": "Fyb6;War1;E39s", "site": "https://openreview.net/forum?id=wMoHIYBsj2_", "pdf_size": 0, "rating": "4;6;10", "confidence": "", "rating_avg": 6.666666666666667, "confidence_avg": 0, "replies_avg": 7, "authors#_avg": 2, "corr_rating_confidence": 0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8576660888713777418&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "wVIqlSqKu2D", "title": "DiffImpact: Differentiable Rendering and Identification of Impact Sounds", "track": "main", "status": "Oral", "tldr": "", "abstract": "Rigid objects make distinctive sounds during manipulation. These sounds are a function of object features, such as shape and material, and of contact forces during manipulation. Being able to infer from sound an object's acoustic properties, how it is being manipulated, and what events it is participating in could augment and complement what robots can perceive from vision, especially in case of occlusion, low visual resolution, poor lighting, or blurred focus. Annotations on sound data are rare. Therefore, existing inference systems mostly include a sound renderer in the loop, and use analysis-by-synthesis to optimize for object acoustic properties. Optimizing parameters with respect to a non-differentiable renderer is slow and hard to scale to complex scenes. We present DiffImpact, a fully differentiable model for sounds rigid objects make during impacts, based on physical principles of impact forces, rigid object vibration, and other acoustic effects. Its differentiability enables gradient-based, efficient joint inference of acoustic properties of the objects and characteristics and timings of each individual impact. DiffImpact can also be plugged in as the decoder of an autoencoder, and trained end-to-end on real audio data, so that the encoder can learn to solve the inverse problem in a self-supervised way. Experiments demonstrate that our model's physics-based inductive biases make it more resource efficient and expressive than state-of-the-art pure learning-based alternatives, on both forward rendering of impact sounds and inverse tasks such as acoustic property inference and blind source separation of impact sounds.", "keywords": "Differentiable Sound Rendering;Auditory Scene Analysis", "primary_area": "", "supplementary_material": "/attachment/cb1dc1c42330389f4f181f619f1b5b42ffac6a19.zip", "author": "Samuel Clarke;Negin Heravi;Mark Rau;Ruohan Gao;Jiajun Wu;Doug James;Jeannette Bohg", "authorids": "~Samuel_Clarke1;~Negin_Heravi1;mrau@stanford.edu;~Ruohan_Gao2;~Jiajun_Wu1;~Doug_James1;~Jeannette_Bohg1", "gender": ";;;M;M;M;", "homepage": ";;;https://ruohangao.github.io/;https://jiajunwu.com;https://profiles.stanford.edu/doug-james/;https://web.stanford.edu/~bohg/", "dblp": ";249/8257;;176/5787;117/4768;;52/7377", "google_scholar": ";;;i02oEgMAAAAJ;2efgcS0AAAAJ;https://scholar.google.com.tw/citations?user=77JHlSkAAAAJ;rjnJnEkAAAAJ", "orcid": ";;;0000-0002-8346-1114;0000-0002-4176-343X;0000-0003-3532-8383;0000-0002-4921-7193", "linkedin": ";negin-heravi-40a919a2;;;jiajunwu/;;", "or_profile": "~Samuel_Clarke1;~Negin_Heravi1;mrau@stanford.edu;~Ruohan_Gao2;~Jiajun_Wu1;~Doug_James1;~Jeannette_Bohg1", "aff": ";Stanford University;;University of Texas, Austin;Stanford University;Computer Science Department, Stanford University;Stanford University", "aff_domain": ";stanford.edu;;cs.utexas.edu;stanford.edu;cs.stanford.edu;stanford.edu", "position": ";PhD student;;PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nclarke2021diffimpact,\ntitle={DiffImpact: Differentiable Rendering and Identification of Impact Sounds},\nauthor={Samuel Clarke and Negin Heravi and Mark Rau and Ruohan Gao and Jiajun Wu and Doug James and Jeannette Bohg},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=wVIqlSqKu2D}\n}", "github": "", "project": "", "reviewers": "FKcr;8sMD;7DAN", "site": "https://openreview.net/forum?id=wVIqlSqKu2D", "pdf_size": 0, "rating": "10;10;10", "confidence": "", "rating_avg": 10.0, "confidence_avg": 0, "replies_avg": 9, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15814276261327745021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Stanford University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.utexas.edu", "aff_unique_abbr": "Stanford;UT Austin", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Stanford;Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "xHnJS2GYFDz", "title": "DETR3D: 3D Object Detection from Multi-view Images via 3D-to-2D Queries", "track": "main", "status": "Poster", "tldr": "", "abstract": "We introduce a framework for multi-camera 3D object detection. In contrast to existing works, which estimate 3D bounding boxes directly from monocular images or use depth prediction networks to generate input for 3D object detection from 2D information, our method manipulates predictions directly in 3D space. Our architecture extracts 2D features from multiple camera images and then uses a sparse set of 3D object queries to index into these 2D features, linking 3D positions to multi-view images using camera transformation matrices. Finally, our model makes a bounding box prediction per object query, using a set-to-set loss to measure the discrepancy between the ground-truth and the prediction. This top-down approach outperforms its bottom-up counterpart in which object bounding box prediction follows per-pixel depth estimation, since it does not suffer from the compounding error introduced by a depth prediction model. Moreover, our method does not require post-processing such as non-maximum suppression, dramatically improving inference speed. We achieve state-of-the-art performance on the nuScenes autonomous driving benchmark.", "keywords": "multi-camera;3D object detection;autonomous driving", "primary_area": "", "supplementary_material": "/attachment/1f90e051916d46c34509a63454582b00339702ef.zip", "author": "Yue Wang;Vitor Campagnolo Guizilini;Tianyuan Zhang;Yilun Wang;Hang Zhao;Justin Solomon", "authorids": "~Yue_Wang2;~Vitor_Campagnolo_Guizilini2;~Tianyuan_Zhang2;~Yilun_Wang1;~Hang_Zhao1;~Justin_Solomon1", "gender": "M;M;;M;M;M", "homepage": "https://yuewang.xyz;;;http://www.mit.edu/~hangzhao/;http://people.csail.mit.edu/jsolomon/;http://tianyuanzhang.com", "dblp": "33/4822-41;;;;80/5094;145/6286-2", "google_scholar": "v-AEFIEAAAAJ;UH9tP6QAAAAJ;https://scholar.google.com.hk/citations?hl=en;DmahiOYAAAAJ;pImSVwoAAAAJ;uJocZjkAAAAJ", "orcid": ";;;;0000-0002-7701-7586;", "linkedin": ";vitorguizilini/;yilunw/;;justin-solomon-8a587914/;", "or_profile": "~Yue_Wang2;~Vitor_Campagnolo_Guizilini2;~Yilun_Wang1;~Hang_Zhao1;~Justin_Solomon1;~Zhang_Tianyuan1", "aff": "Massachusetts Institute of Technology;Toyota Research Institute;Li Auto;Tsinghua University;Massachusetts Institute of Technology;Carnegie Mellon University", "aff_domain": "mit.edu;tri.global;lixiang.com;tsinghua.edu.cn;mit.edu;cmu.edu", "position": "PhD student;Staff Research Scientist;Researcher;Assistant Professor;Associate Professor;MS student", "bibtex": "@inproceedings{\nwang2021detrd,\ntitle={{DETR}3D: 3D Object Detection from Multi-view Images via 3D-to-2D Queries},\nauthor={Yue Wang and Vitor Campagnolo Guizilini and Tianyuan Zhang and Yilun Wang and Hang Zhao and Justin Solomon},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=xHnJS2GYFDz}\n}", "github": "", "project": "", "reviewers": "Gg7X;KAPs;tKqW", "site": "https://openreview.net/forum?id=xHnJS2GYFDz", "pdf_size": 0, "rating": "4;6;6", "confidence": "", "rating_avg": 5.333333333333333, "confidence_avg": 0, "replies_avg": 5, "authors#_avg": 6, "corr_rating_confidence": 0, "gs_citation": 899, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1986765630077984342&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Massachusetts Institute of Technology;Toyota Research Institute;Li Auto;Tsinghua University;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://web.mit.edu;https://www.tri.global;https://www.liauto.com;https://www.tsinghua.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "MIT;TRI;Li Auto;THU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "United States;China" }, { "id": "xQ8rr3-zpiH", "title": "Just Label What You Need: Fine-Grained Active Selection for P&P through Partially Labeled Scenes", "track": "main", "status": "Poster", "tldr": "", "abstract": "Self-driving vehicles must perceive and predict the future positions of nearby actors to avoid collisions and drive safely. A deep learning module is often responsible for this task, requiring large-scale, high-quality training datasets. Due to high labeling costs, active learning approaches are an appealing solution to maximizing model performance for a given labeling budget. However, despite its appeal, there has been little scientific analysis of active learning approaches for the perception and prediction (P&P) problem. In this work, we study active learning techniques for P&P and find that the traditional active learning formulation is ill-suited. We thus introduce generalizations that ensure that our approach is both cost-aware and allows for fine-grained selection of examples through partially labeled scenes. Extensive experiments on a real-world dataset suggest significant improvements across perception, prediction, and downstream planning tasks. ", "keywords": "Self-Driving;Active Learning", "primary_area": "", "supplementary_material": "/attachment/b7ebb0ef06cd80f4b7fa3dd7e5fabcc5386116d1.zip", "author": "Sean Segal;Nishanth Kumar;Sergio Casas;Wenyuan Zeng;Mengye Ren;Jingkang Wang;Raquel Urtasun", "authorids": "~Sean_Segal1;~Nishanth_Kumar1;~Sergio_Casas2;~Wenyuan_Zeng1;~Mengye_Ren1;~Jingkang_Wang1;~Raquel_Urtasun1", "gender": "M;M;M;;M;F;M", "homepage": ";http://nishanthjkumar.com/;;http://www.cs.toronto.edu/~mren;http://www.cs.toronto.edu/~wangjk/;http://www.cs.toronto.edu/~urtasun/;http://www.cs.toronto.edu/~sergio/", "dblp": "251/3238;211/7595;186/7854;163/1952;223/9910;u/RaquelUrtasun;46/6535-2", "google_scholar": "DfhYi2QAAAAJ;FE512o4AAAAJ;;XcQ9WqMAAAAJ;c0BTYC4AAAAJ;https://scholar.google.ca/citations?user=jyxO2akAAAAJ;Vgo1x9YAAAAJ", "orcid": ";0000-0001-9291-3728;;;;;", "linkedin": ";nishanth-kumar;;;;;sergio-casas/", "or_profile": "~Sean_Segal1;~Nishanth_Kumar1;~Wenyuan_Zeng1;~Mengye_Ren1;~Jingkang_Wang1;~Raquel_Urtasun1;~Sergio_Casas_Romero1", "aff": "Department of Computer Science, University of Toronto;Brown University;University of Toronto;University of Toronto;University of Toronto;Department of Computer Science, University of Toronto;Uber ATG", "aff_domain": "cs.toronto.edu;brown.edu;toronto.edu;toronto.edu;toronto.edu;cs.toronto.edu;uber.com", "position": "PhD student;Undergrad student;PhD student;PhD student;PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nsegal2021just,\ntitle={Just Label What You Need: Fine-Grained Active Selection for P\\&P through Partially Labeled Scenes},\nauthor={Sean Segal and Nishanth Kumar and Sergio Casas and Wenyuan Zeng and Mengye Ren and Jingkang Wang and Raquel Urtasun},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=xQ8rr3-zpiH}\n}", "github": "", "project": "", "reviewers": "TXdJ;KVfT;PW9h;TDyJ", "site": "https://openreview.net/forum?id=xQ8rr3-zpiH", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 7, "corr_rating_confidence": 0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2290038614597560247&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;0;0;0;2", "aff_unique_norm": "University of Toronto;Brown University;Uber", "aff_unique_dep": "Department of Computer Science;;Advanced Technologies Group", "aff_unique_url": "https://www.utoronto.ca;https://www.brown.edu;https://www.uber.com", "aff_unique_abbr": "U of T;Brown;Uber ATG", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Toronto;", "aff_country_unique_index": "0;1;0;0;0;0;1", "aff_country_unique": "Canada;United States" }, { "id": "xwEaXgFa0MR", "title": "AW-Opt: Learning Robotic Skills with Imitation andReinforcement at Scale", "track": "main", "status": "Poster", "tldr": "", "abstract": "Robotic skills can be learned via imitation learning (IL) using user-provided demonstrations, or via reinforcement learning (RL) using large amounts of autonomously collected experience. Both methods have complementary strengths and weaknesses: RL can reach a high level of performance, but requires exploration, which can be very time consuming and unsafe; IL does not require exploration, but only learns skills that are as good as the provided demonstrations. Can a single method combine the strengths of both approaches? A number of prior methods have aimed to address this question, proposing a variety of techniques that integrate elements of IL and RL. However, scaling up such methods to complex robotic skills that integrate diverse offline data and generalize meaningfully to real-world scenarios still presents a major challenge. In this paper, our aim is to test the scalability of prior IL + RL algorithms and devise a system based on detailed empirical experimentation that combines existing components in the most effective and scalable way. \nTo that end, we present a series of experiments aimed at understanding the implications of each design decision, so as to develop a combined approach that can utilize demonstrations and heterogeneous prior data to attain the best performance on a range of real-world and realistic simulated robotic problems. Our complete method, which we call AW-Opt, combines elements of advantage-weighted regression and QT-Opt, providing a unified approach for integrating demonstrations and offline data for robotic manipulation.", "keywords": "reinforcement learning;imitation learning", "primary_area": "", "supplementary_material": "/attachment/412c46c545f48e62be3728f48406d30cc4bbeaa7.zip", "author": "Yao Lu;Karol Hausman;Yevgen Chebotar;Mengyuan Yan;Eric Jang;Alexander Herzog;Ted Xiao;Alex Irpan;Mohi Khansari;Dmitry Kalashnikov;Sergey Levine", "authorids": "~Yao_Lu13;~Karol_Hausman2;~Yevgen_Chebotar1;~Mengyuan_Yan1;~Eric_Jang1;~Alexander_Herzog2;~Ted_Xiao1;~Alex_Irpan1;~Mohi_Khansari1;~Dmitry_Kalashnikov1;~Sergey_Levine1", "gender": ";M;F;M;M;M;M;;;M;M", "homepage": ";;;http://evjang.com;;https://www.tedxiao.me;http://www.alexirpan.com;https://cs.stanford.edu/people/khansari/;;https://people.eecs.berkeley.edu/~svlevine/;https://karolhausman.github.io/", "dblp": "26/5662-6;01/11424;164/5672;190/7794;;198/0598;202/2063;;222/2882;80/7594;135/8164", "google_scholar": "OI7zFmwAAAAJ;ADkiClQAAAAJ;https://scholar.google.com/citations?hl=en;Izhkp4YAAAAJ;jrfFYAIAAAAJ;;;Z3dxz9IAAAAJ;;8R35rCwAAAAJ;yy0UFOwAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": ";;;;alexander-herzog-154030a5/;;;;;;karolhausman/", "or_profile": "~Yao_Lu13;~Yevgen_Chebotar1;~Mengyuan_Yan1;~Eric_Jang1;~Alexander_Herzog2;~Ted_Xiao1;~Alex_Irpan1;~Mohi_Khansari1;~Dmitry_Kalashnikov1;~Sergey_Levine1;~Karol_Hausman1", "aff": "Google;Google;Google;Google;Google;Google;Google DeepMind;Google;Google;Google;Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Researcher;Researcher;Researcher;Researcher;Sr. Roboticist;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlu2021awopt,\ntitle={{AW}-Opt: Learning Robotic Skills with Imitation andReinforcement at Scale},\nauthor={Yao Lu and Karol Hausman and Yevgen Chebotar and Mengyuan Yan and Eric Jang and Alexander Herzog and Ted Xiao and Alex Irpan and Mohi Khansari and Dmitry Kalashnikov and Sergey Levine},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=xwEaXgFa0MR}\n}", "github": "", "project": "", "reviewers": "qeFD;3em5;qTLQ", "site": "https://openreview.net/forum?id=xwEaXgFa0MR", "pdf_size": 0, "rating": "6;6;10", "confidence": "", "rating_avg": 7.333333333333333, "confidence_avg": 0, "replies_avg": 13, "authors#_avg": 11, "corr_rating_confidence": 0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7658205562679733085&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "yhy25u-DrjR", "title": "Broadly-Exploring, Local-Policy Trees for Long-Horizon Task Planning", "track": "main", "status": "Poster", "tldr": "", "abstract": "Long-horizon planning in realistic environments requires the ability to reason over sequential tasks in high-dimensional state spaces with complex dynamics. Classical motion planning algorithms, such as rapidly-exploring random trees, are capable of efficiently exploring large state spaces and computing long-horizon, sequential plans. However, these algorithms are generally challenged with complex, stochastic, and high-dimensional state spaces as well as in the presence of small, topologically complex goal regions, which naturally emerge in tasks that interact with the environment. Machine learning offers a promising solution for its ability to learn general policies that can handle complex interactions and high-dimensional observations. However, these policies are generally limited in horizon length. Our approach, Broadly-Exploring, Local-policy Trees (BELT), merges these two approaches to leverage the strengths of both through a task-conditioned, model-based tree search. BELT uses an RRT-inspired tree search to efficiently explore the state space. Locally, the exploration is guided by a task-conditioned, learned policy capable of performing general short-horizon tasks. This task space can be quite general and abstract; its only requirements are to be sampleable and to well-cover the space of useful tasks. This search is aided by a task-conditioned model that temporally extends dynamics propagation to allow long-horizon search and sequential reasoning over tasks. BELT is demonstrated experimentally to be able to plan long-horizon, sequential trajectories with a goal conditioned policy and generate plans that are robust.", "keywords": "RRT;Task and Motion Planning;Model-based Planning;Tree-search", "primary_area": "", "supplementary_material": "/attachment/62ac7310ce2447a2da3080b035871ecd939af019.zip", "author": "brian ichter;Pierre Sermanet;Corey Lynch", "authorids": "~brian_ichter1;~Pierre_Sermanet1;~Corey_Lynch1", "gender": ";;M", "homepage": ";https://sermanet.github.io/;https://coreylynch.github.io/", "dblp": ";28/6457;155/3141", "google_scholar": "-w5DuHgAAAAJ;0nPi5YYAAAAJ;CYWO-oAAAAAJ", "orcid": ";;", "linkedin": ";sermanet/;", "or_profile": "~brian_ichter1;~Pierre_Sermanet1;~Corey_Lynch1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nichter2021broadlyexploring,\ntitle={Broadly-Exploring, Local-Policy Trees for Long-Horizon Task Planning},\nauthor={brian ichter and Pierre Sermanet and Corey Lynch},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=yhy25u-DrjR}\n}", "github": "", "project": "", "reviewers": "yRzc;FQLy;Da4d;qgUz", "site": "https://openreview.net/forum?id=yhy25u-DrjR", "pdf_size": 0, "rating": "4;6;6;10", "confidence": "", "rating_avg": 6.5, "confidence_avg": 0, "replies_avg": 17, "authors#_avg": 3, "corr_rating_confidence": 0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1914830096336831669&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "yt3tDB67lc5", "title": "Legged Robot State Estimation using Invariant Kalman Filtering and Learned Contact Events", "track": "main", "status": "Poster", "tldr": "", "abstract": "This work develops a learning-based contact estimator for legged robots that bypasses the need for physical sensors and takes multi-modal proprioceptive sensory data as input. Unlike vision-based state estimators, proprioceptive state estimators are agnostic to perceptually degraded situations such as dark or foggy scenes. While some robots are equipped with dedicated physical sensors to detect necessary contact data for state estimation, some robots do not have dedicated contact sensors, and the addition of such sensors is non-trivial without redesigning the hardware. The trained network can estimate contact events on different terrains. The experiments show that a contact-aided invariant extended Kalman filter can generate accurate odometry trajectories compared to a state-of-the-art visual SLAM system, enabling robust proprioceptive odometry.", "keywords": "State Estimation;Deep Learning;Legged Robot;Invariant EKF", "primary_area": "", "supplementary_material": "/attachment/be52d940dccc5267442e4c46dca6cf87d672eeac.zip", "author": "Tzu-Yuan Lin;Ray Zhang;Justin Yu;Maani Ghaffari", "authorids": "~Tzu-Yuan_Lin1;rzh@umich.edu;yujustin@umich.edu;~Maani_Ghaffari1", "gender": "M;;;M", "homepage": "https://tzuyuan.github.io/;;;https://curly.engin.umich.edu/", "dblp": "116/9958;;;", "google_scholar": "1HY3TXcAAAAJ;;;l2jdSb8AAAAJ", "orcid": ";;;0000-0002-4734-4295", "linkedin": ";;;maani-ghaffari-19b017203/", "or_profile": "~Tzu-Yuan_Lin1;rzh@umich.edu;yujustin@umich.edu;~Maani_Ghaffari1", "aff": "University of Michigan;;;University of Michigan", "aff_domain": "umich.edu;;;umich.edu", "position": "PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nlin2021legged,\ntitle={Legged Robot State Estimation using Invariant Kalman Filtering and Learned Contact Events},\nauthor={Tzu-Yuan Lin and Ray Zhang and Justin Yu and Maani Ghaffari},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=yt3tDB67lc5}\n}", "github": "", "project": "", "reviewers": "TWUQ;RPT4;aydR;Ax1o", "site": "https://openreview.net/forum?id=yt3tDB67lc5", "pdf_size": 0, "rating": "4;4;4;6", "confidence": "", "rating_avg": 4.5, "confidence_avg": 0, "replies_avg": 29, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13933905627700301679&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "zOjU2vZzhCk", "title": "Dex-NeRF: Using a Neural Radiance Field to Grasp Transparent Objects", "track": "main", "status": "Poster", "tldr": "", "abstract": "The ability to grasp and manipulate transparent objects is a major challenge for robots. Existing depth cameras have difficulty detecting, localizing, and inferring the geometry of such objects. We propose using neural radiance fields (NeRF) to detect, localize, and infer the geometry of transparent objects with sufficient accuracy to find and grasp them securely. We leverage NeRF's view-independent learned density, place lights to increase specular reflections, and perform a transparency-aware depth-rendering that we feed into the Dex-Net grasp planner. We show how additional lights create specular reflections that improve the quality of the depth map, and test a setup for a robot workcell equipped with an array of cameras to perform transparent object manipulation. We also create synthetic and real datasets of transparent objects in real-world settings, including singulated objects, cluttered tables, and the top rack of a dishwasher. In each setting we show that NeRF and Dex-Net are able to reliably compute robust grasps on transparent objects, achieving 90% and 100% grasp-success rates in physical experiments on an ABB YuMi, on objects where baseline methods fail.\n", "keywords": "neural radiance fields;grasp planning;transparent objects", "primary_area": "", "supplementary_material": "", "author": "Jeffrey Ichnowski;Yahav Avigal;Justin Kerr;Ken Goldberg", "authorids": "~Jeffrey_Ichnowski1;~Yahav_Avigal1;~Justin_Kerr1;~Ken_Goldberg1", "gender": "M;M;M;M", "homepage": "https://ichnow.ski;https://yahavigal.github.io/;https://kerrj.github.io/;http://goldberg.berkeley.edu/", "dblp": "89/1741;;;g/KennethYGoldberg", "google_scholar": "1OdtfywAAAAJ;CCAaFCQAAAAJ;;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ", "orcid": "0000-0003-4874-9478;0000-0003-2062-5983;;0000-0001-6747-9499", "linkedin": ";;;goldbergken/", "or_profile": "~Jeffrey_Ichnowski1;~Yahav_Avigal1;~Justin_Kerr1;~Ken_Goldberg1", "aff": "University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;cmu.edu;berkeley.edu", "position": "Postdoc;PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nichnowski2021dexnerf,\ntitle={Dex-Ne{RF}: Using a Neural Radiance Field to Grasp Transparent Objects},\nauthor={Jeffrey Ichnowski and Yahav Avigal and Justin Kerr and Ken Goldberg},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=zOjU2vZzhCk}\n}", "github": "", "project": "", "reviewers": "nhV7;21Zr;hn8F;qjfP", "site": "https://openreview.net/forum?id=zOjU2vZzhCk", "pdf_size": 0, "rating": "4;6;6;6", "confidence": "", "rating_avg": 5.5, "confidence_avg": 0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 196, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12894996577099829317&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.cmu.edu", "aff_unique_abbr": "UC Berkeley;CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "zv3NYgRZ7Qo", "title": "3D Neural Scene Representations for Visuomotor Control", "track": "main", "status": "Oral", "tldr": "", "abstract": "Humans have a strong intuitive understanding of the 3D environment around us. The mental model of the physics in our brain applies to objects of different materials and enables us to perform a wide range of manipulation tasks that are far beyond the reach of current robots. In this work, we desire to learn models for dynamic 3D scenes purely from 2D visual observations. Our model combines Neural Radiance Fields (NeRF) and time contrastive learning with an autoencoding framework, which learns viewpoint-invariant 3D-aware scene representations. We show that a dynamics model, constructed over the learned representation space, enables visuomotor control for challenging manipulation tasks involving both rigid bodies and fluids, where the target is specified in a viewpoint different from what the robot operates on. When coupled with an auto-decoding framework, it can even support goal specification from camera viewpoints that are outside the training distribution. We further demonstrate the richness of the learned 3D dynamics model by performing future prediction and novel view synthesis. Finally, we provide detailed ablation studies regarding different system designs and qualitative analysis of the learned representations.", "keywords": "learning-based dynamics modeling;3d-aware representation learning;neural radiance field;robotic manipulation", "primary_area": "", "supplementary_material": "/attachment/2b24559f72919f3123417e9436e27e6ca644036d.zip", "author": "Yunzhu Li;Shuang Li;Vincent Sitzmann;Pulkit Agrawal;Antonio Torralba", "authorids": "~Yunzhu_Li1;~Shuang_Li5;~Vincent_Sitzmann1;~Pulkit_Agrawal1;~Antonio_Torralba1", "gender": "M;;M;M;M", "homepage": "https://yunzhuli.github.io/;;https://vsitzmann.github.io;https://people.eecs.berkeley.edu/~pulkitag/;http://web.mit.edu/torralba/www//", "dblp": "182/1831;;192/1958;149/2672;t/AntonioBTorralba", "google_scholar": "WlA92lcAAAAJ;;X44QVV4AAAAJ;UpZmJI0AAAAJ;https://scholar.google.com.tw/citations?user=8cxDHS4AAAAJ", "orcid": ";;0000-0002-0107-5704;;", "linkedin": ";;vincentsitzmann/;;", "or_profile": "~Yunzhu_Li1;~Shuang_Li5;~Vincent_Sitzmann1;~Pulkit_Agrawal1;~Antonio_Torralba1", "aff": "Massachusetts Institute of Technology;;Preferred Networks, Inc.;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;preferred.jp;mit.edu;mit.edu", "position": "PhD student;;Academic Advisor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2021d,\ntitle={3D Neural Scene Representations for Visuomotor Control},\nauthor={Yunzhu Li and Shuang Li and Vincent Sitzmann and Pulkit Agrawal and Antonio Torralba},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=zv3NYgRZ7Qo}\n}", "github": "", "project": "", "reviewers": "i7GY;BFGN;1uVz;5NEJ", "site": "https://openreview.net/forum?id=zv3NYgRZ7Qo", "pdf_size": 0, "rating": "6;6;10;10", "confidence": "", "rating_avg": 8.0, "confidence_avg": 0, "replies_avg": 15, "authors#_avg": 5, "corr_rating_confidence": 0, "gs_citation": 155, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9305170759363894656&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Preferred Networks, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.preferred-networks.com", "aff_unique_abbr": "MIT;PFN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Japan" }, { "id": "zwo1-MdMl1P", "title": "Robot Reinforcement Learning on the Constraint Manifold", "track": "main", "status": "Oral", "tldr": "", "abstract": "Reinforcement learning in robotics is extremely challenging due to many practical issues, including safety, mechanical constraints, and wear and tear. Typically, these issues are not considered in the machine learning literature. One crucial problem in applying reinforcement learning in the real world is Safe Exploration, which requires physical and safety constraints satisfaction throughout the learning process. To explore in such a safety-critical environment, leveraging known information such as robot models and constraints is beneficial to provide more robust safety guarantees. Exploiting this knowledge, we propose a novel method to learn robotics tasks in simulation efficiently while satisfying the constraints during the learning process.", "keywords": "Robot Learning;Reinforcement Learning;Constrained Markov Decision Process;Safe Exploration", "primary_area": "", "supplementary_material": "/attachment/a0efc6e92bcf8790427624256c78e11b645d553c.zip", "author": "Puze Liu;Davide Tateo;Haitham Bou Ammar;Jan Peters", "authorids": "~Puze_Liu1;~Davide_Tateo2;~Haitham_Bou_Ammar1;~Jan_Peters3", "gender": "M;M;M;M", "homepage": "https://puzeliu.github.io/;https://www.ias.informatik.tu-darmstadt.de/Team/DavideTateo;;https://www.jan-peters.net", "dblp": "292/4069;214/0808;;p/JanPeters1", "google_scholar": "zg-FMloAAAAJ;https://scholar.google.it/citations?user=LGnu3SEAAAAJ;https://scholar.google.co.uk/citations?user=AE5suDoAAAAJ;https://scholar.google.de/citations?user=-kIVAcAAAAAJ", "orcid": "0000-0001-6887-7704;0000-0002-7193-923X;;0000-0002-5266-8091", "linkedin": ";;;janrpeters/", "or_profile": "~Puze_Liu1;~Davide_Tateo2;~Haitham_Bou_Ammar1;~Jan_Peters3", "aff": "TU Darmstadt;Technische Universit\u00e4t Darmstadt;Huawei R&D UK;Max Planck Institute for Intelligent Systems", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;huawei.com;tue.mpg.de", "position": "PhD student;Postdoc;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nliu2021robot,\ntitle={Robot Reinforcement Learning on the Constraint Manifold},\nauthor={Puze Liu and Davide Tateo and Haitham Bou Ammar and Jan Peters},\nbooktitle={5th Annual Conference on Robot Learning },\nyear={2021},\nurl={https://openreview.net/forum?id=zwo1-MdMl1P}\n}", "github": "", "project": "", "reviewers": "c13Z;XePn;n6jm", "site": "https://openreview.net/forum?id=zwo1-MdMl1P", "pdf_size": 0, "rating": "6;10;10", "confidence": "", "rating_avg": 8.666666666666666, "confidence_avg": 0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1011760972193529761&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Huawei;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";R&D;Intelligent Systems", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.huawei.com/uk;https://www.mpi-is.mpg.de", "aff_unique_abbr": "TU Darmstadt;Huawei;MPI-IS", "aff_campus_unique_index": "0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United Kingdom" } ]