[ { "title": "Misspecified Phase Retrieval with Generative Priors", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55168", "id": "--aQNMdJc9x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/211ab571cc9f3802afa6ffff52ae3e5b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=--aQNMdJc9x", "openreview": "https://openreview.net/forum?id=--aQNMdJc9x", "poster": "/media/PosterPDFs/NeurIPS%202022/1bc0249a6412ef49b07fe6f62e6dc8de.png?t=1667441675.3649", "slides": "https://nips.cc/virtual/2022/poster/55168", "video": "https://nips.cc/virtual/2022/poster/55168", "author_site": "Zhaoqiang Liu, Xinshao Wang, Jiulong Liu", "tldr": "We propose a novel two-step approach with provable guarantees for misspecified phase retrieval with generative priors.", "abstract": "In this paper, we study phase retrieval under model misspecification and generative priors. In particular, we aim to estimate an $n$-dimensional signal $\\mathbf{x}$ from $m$ i.i.d.~realizations of the single index model $y = f(\\mathbf{a}^T\\mathbf{x})$, where $f$ is an unknown and possibly random nonlinear link function and $\\mathbf{a} \\in \\mathbb{R}^n$ is a standard Gaussian vector. We make the assumption $\\mathrm{Cov}[y,(\\mathbf{a}^T\\mathbf{x})^2] \\ne 0$, which corresponds to the misspecified phase retrieval problem. In addition, the underlying signal $\\mathbf{x}$ is assumed to lie in the range of an $L$-Lipschitz continuous generative model with bounded $k$-dimensional inputs. We propose a two-step approach, for which the first step plays the role of spectral initialization and the second step refines the estimated vector produced by the first step iteratively. We show that both steps enjoy a statistical rate of order $\\sqrt{(k\\log L)\\cdot (\\log m)/m}$ under suitable conditions. Experiments on image datasets are performed to demonstrate that our approach performs on par with or even significantly outperforms several competing methods. ", "keywords": "Phase retrieval;generative priors;model misspecification;single index model;near-optimal statistical rate", "primary_area": "", "supplementary_material": "/attachment/9b02445d9152d9d53c9ddbe10e0557765a78bc32.zip", "author": "Zhaoqiang Liu;Xinshao Wang;Jiulong Liu", "authorids": "~Zhaoqiang_Liu1;~Xinshao_Wang1;~Jiulong_Liu1", "gender": "M;M;M", "homepage": ";https://xinshaoamoswang.github.io/about/;", "dblp": "198/1405;230/3751;", "google_scholar": "EmGrPbIAAAAJ;yOBhB7UAAAAJ;de0zoQ4AAAAJ", "orcid": ";0000-0001-8907-8258;0000-0001-7199-4581", "linkedin": ";xinshaowang/;", "or_profile": "~Zhaoqiang_Liu1;~Xinshao_Wang1;~Jiulong_Liu1", "aff": ";Zenith Ai;Chinese Academy of Sciences", "aff_domain": ";zenithai.co.uk;ac.cn", "position": ";Senior researcher;Associate Professor", "bibtex": "@inproceedings{\nliu2022misspecified,\ntitle={Misspecified Phase Retrieval with Generative Priors},\nauthor={Zhaoqiang Liu and Xinshao Wang and Jiulong Liu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=--aQNMdJc9x}\n}", "github": "", "project": "", "reviewers": "YJ4f;nWLU;c4oX;NiZz;VWWv", "pdf_size": 911702, "rating": "5;5;6;6;6", "confidence": "3;3;3;3;3", "soundness": "3;4;4;3;3", "novelty": "3;3;2;3;3", "presentation": "2;3;3;2;3", "contribution": "3;3;2;3;3", "wc_summary": "240;82;253;156;238", "wc_strengths_and_weaknesses": "390;111;361;283;254", "wc_questions": "248;55;744;120;105", "wc_limitations": "48;24;21;6;63", "wc_review": "926;272;1379;565;660", "wc_reply_reviewers": "0;0;250;0;0", "wc_reply_authors": "510;504;669;379;306", "reply_reviewers": "0;0;1;0;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "contribution_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 193.8, 65.6060972776159 ], "wc_strengths_and_weaknesses_avg": [ 279.8, 97.88442164103539 ], "wc_questions_avg": [ 254.4, 252.93999288368772 ], "wc_limitations_avg": [ 32.4, 20.382345301755635 ], "wc_review_avg": [ 760.4, 373.33234523678766 ], "wc_reply_reviewers_avg": [ 50.0, 100.0 ], "wc_reply_authors_avg": [ 473.6, 124.4742543661138 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1648135207641613717&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";zenithai.co.uk;ac.cn", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Zenith Ai;Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": ";https://www.cas.cn", "aff_unique_abbr": ";CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "title": "Reinforcement Learning in a Birth and Death Process: Breaking the Dependence on the State Space", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53426", "id": "--fdtqo-iKM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5d2781cc34f459618a9a504761043055-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=--fdtqo-iKM", "openreview": "https://openreview.net/forum?id=--fdtqo-iKM", "poster": "/media/PosterPDFs/NeurIPS%202022/53426.png?t=1669506066.8243692", "slides": "https://nips.cc/virtual/2022/poster/53426", "video": "https://nips.cc/virtual/2022/poster/53426", "author_site": "Jonatha Anselmi, Bruno Gaujal, Louis-S\u00e9bastien Rebuffi", "tldr": "Our main insight is that efficient reinforcement learning can be achieved in the context of queueing systems independently of the diameter of the underlying Markov decision process even when this is large.", "abstract": "In this paper, we revisit the regret of undiscounted reinforcement learning in MDPs with a birth and death structure. Specifically, we consider a controlled queue with impatient jobs and the main objective is to optimize a trade-off between energy consumption and user-perceived performance. Within this setting, the diameter $D$ of the MDP is $\\Omega(S^S)$, where $S$ is the number of states. Therefore, the existing lower and upper bounds on the regret at time $T$, of order $O (\\sqrt{DSAT})$ for MDPs with $S$ states and $A$ actions, may suggest that reinforcement learning is inefficient here. \nIn our main result however, we exploit the structure of our MDPs to show that the regret of a slightly-tweaked version of the classical learning algorithm UCRL2 is in fact upper bounded by $\\tilde{\\mathcal{O}} (\\sqrt{E_2AT})$ where $E_2$ is a weighted second moment of the stationary measure of a reference policy. Importantly, $E_2$ is bounded independently of $S$. Thus, our bound is asymptotically independent of the number of states and of the diameter. This result is based on a careful study of the number of visits performed by the learning algorithm to the states of the MDP, which is highly non-uniform.", "keywords": "Markov decision processes;structured reinforcement learning;regret analysis;queueing systems", "primary_area": "", "supplementary_material": "/attachment/b9355e9cd4cec64f27dc9ac9be2dc377d3db3210.zip", "author": "Jonatha Anselmi;Bruno Gaujal;Louis-S\u00e9bastien Rebuffi", "authorids": "~Jonatha_Anselmi1;~Bruno_Gaujal1;~Louis-S\u00e9bastien_Rebuffi1", "gender": ";M;M", "homepage": "http://polaris.imag.fr/jonatha.anselmi/;https://team.inria.fr/polaris/members/bruno-gaujal/;https://team.inria.fr/polaris/louis-sebastien-rebuffi/", "dblp": ";67/1197;", "google_scholar": "https://scholar.google.it/citations?user=B_QNgB8AAAAJ;;", "orcid": ";;", "linkedin": "jonatha-anselmi/;;", "or_profile": "~Jonatha_Anselmi1;~Bruno_Gaujal1;~Louis-S\u00e9bastien_Rebuffi1", "aff": "INRIA;INRIA;Universit\u00e9 Grenoble Alpes", "aff_domain": "inria.fr;inria.fr;univ-grenoble-alpes.fr", "position": "Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\nanselmi2022reinforcement,\ntitle={Reinforcement Learning in a Birth and Death Process: Breaking the Dependence on the State Space},\nauthor={Jonatha Anselmi and Bruno Gaujal and Louis-S{\\'e}bastien Rebuffi},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=--fdtqo-iKM}\n}", "github": "", "project": "", "reviewers": "Sx4m;NyfN;sAR5;SMUj", "pdf_size": 331473, "rating": "5;5;6;7", "confidence": "4;2;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;3;4", "contribution": "2;3;3;4", "wc_summary": "51;51;81;80", "wc_strengths_and_weaknesses": "206;134;136;338", "wc_questions": "20;35;27;8", "wc_limitations": "1;10;15;4", "wc_review": "278;230;259;430", "wc_reply_reviewers": "35;17;0;0", "wc_reply_authors": "357;253;144;34", "reply_reviewers": "1;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.75, 14.7542366796795 ], "wc_strengths_and_weaknesses_avg": [ 203.5, 82.88998733260851 ], "wc_questions_avg": [ 22.5, 9.912113800799505 ], "wc_limitations_avg": [ 7.5, 5.408326913195984 ], "wc_review_avg": [ 299.25, 77.39953165232978 ], "wc_reply_reviewers_avg": [ 13.0, 14.474114826130128 ], "wc_reply_authors_avg": [ 197.0, 120.53422750405795 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15013019579205228370&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 15, "email": "inria.fr;inria.fr;univ-grenoble-alpes.fr", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "INRIA;Universit\u00e9 Grenoble Alpes", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.univ-grenoble-alpes.fr", "aff_unique_abbr": "INRIA;UGA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "An Embarrassingly Simple Approach to Semi-Supervised Few-Shot Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55024", "id": "-3Pg7QNIF1S", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5d3b57e06e3fc45f077eb5c9f28156d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-3Pg7QNIF1S", "openreview": "https://openreview.net/forum?id=-3Pg7QNIF1S", "poster": "/media/PosterPDFs/NeurIPS%202022/13111c20aee51aeb480ecbd988cd8cc9.png?t=1665212184.8788068", "slides": "https://nips.cc/virtual/2022/poster/55024", "video": "https://nips.cc/virtual/2022/poster/55024", "author_site": "Xiu-Shen Wei, H.-Y. Xu, Faen Zhang, Yuxin Peng, Wei Zhou", "tldr": "In this paper, we propose a simple but quite effective approach to predict accurate negative pseudo-labels of unlabeled data from an indirect learning perspective.", "abstract": "Semi-supervised few-shot learning consists in training a classifier to adapt to new tasks with limited labeled data and a fixed quantity of unlabeled data. Many sophisticated methods have been developed to address the challenges this problem comprises. In this paper, we propose a simple but quite effective approach to predict accurate negative pseudo-labels of unlabeled data from an indirect learning perspective, and then augment the extremely label-constrained support set in few-shot classification tasks. Our approach can be implemented in just few lines of code by only using off-the-shelf operations, yet it is able to outperform state-of-the-art methods on four benchmark datasets.", "keywords": "Semi-Supervised Few-Shot Learning;Negative Learning;Few-Shot Learning", "primary_area": "", "supplementary_material": "", "author": "Xiu-Shen Wei;He-Yang Xu;Faen Zhang;Yuxin Peng;Wei Zhou", "authorids": "~Xiu-Shen_Wei1;xuhy@njust.edu.cn;zhangfaen@ainnovation.com;~Yuxin_Peng1;zhouwei@ciccalpha.com", "gender": ";;;M;", "homepage": ";;;http://39.108.48.32/mipl/pengyuxin/;", "dblp": ";;;;", "google_scholar": ";;;mFsXPNYAAAAJ;", "orcid": ";;;0000-0001-7658-3845;", "linkedin": ";;;;", "or_profile": "~Xiu-Shen_Wei1;xuhy@njust.edu.cn;zhangfaen@ainnovation.com;~Yuxin_Peng1;zhouwei@ciccalpha.com", "aff": ";;;Peking University;", "aff_domain": ";;;pku.edu.cn;", "position": ";;;Full Professor;", "bibtex": "@inproceedings{\nwei2022an,\ntitle={An Embarrassingly Simple Approach to Semi-Supervised Few-Shot Learning},\nauthor={Xiu-Shen Wei and He-Yang Xu and Faen Zhang and Yuxin Peng and Wei Zhou},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-3Pg7QNIF1S}\n}", "github": "", "project": "", "reviewers": "nug7;46zy;bRfN;QttX", "pdf_size": 2794138, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;4;3;4", "novelty": "3;3;2;3", "presentation": "2;4;4;4", "contribution": "3;3;2;3", "wc_summary": "40;70;61;117", "wc_strengths_and_weaknesses": "146;55;131;229", "wc_questions": "96;108;37;87", "wc_limitations": "16;29;13;81", "wc_review": "298;262;242;514", "wc_reply_reviewers": "171;0;28;28", "wc_reply_authors": "831;348;419;277", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 28.16913204200655 ], "wc_strengths_and_weaknesses_avg": [ 140.25, 61.7712514038691 ], "wc_questions_avg": [ 82.0, 27.027763503479157 ], "wc_limitations_avg": [ 34.75, 27.371289702898547 ], "wc_review_avg": [ 329.0, 108.67842472174502 ], "wc_reply_reviewers_avg": [ 56.75, 66.94540686260709 ], "wc_reply_authors_avg": [ 468.75, 215.08646517156768 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9797299855157139252&as_sdt=4000005&sciodt=0,18&hl=en", "gs_version_total": 6, "email": ";;;pku.edu.cn;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Local Identifiability of Deep ReLU Neural Networks: the Theory", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53394", "id": "-3cHWtrbLYq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b0ae046e198a5e43141519868a959c74-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-3cHWtrbLYq", "openreview": "https://openreview.net/forum?id=-3cHWtrbLYq", "poster": "/media/PosterPDFs/NeurIPS%202022/53394.png?t=1669860636.3628309", "slides": "https://nips.cc/virtual/2022/poster/53394", "video": "https://nips.cc/virtual/2022/poster/53394", "author_site": "Joachim Bona-Pellissier, Fran\u00e7ois Malgouyres, Francois Bachoc", "tldr": "We characterize theoretically the question of local identifiability for deep ReLU neural networks and we provide numerically testable conditions.", "abstract": "Is a sample rich enough to determine, at least locally, the parameters of a neural network? To answer this question, we introduce a new local parameterization of a given deep ReLU neural network by fixing the values of some of its weights. This allows us to define local lifting operators whose inverses are charts of a smooth manifold of a high dimensional space. The function implemented by the deep ReLU neural network composes the local lifting with a linear operator which depends on the sample. We derive from this convenient representation a geometrical necessary and sufficient condition of local identifiability. Looking at tangent spaces, the geometrical condition provides: 1/ a sharp and testable necessary condition of identifiability and 2/ a sharp and testable sufficient condition of local identifiability. The validity of the conditions can be tested numerically using backpropagation and matrix rank computations.", "keywords": "Deep Learning;ReLU networks;Conditions of identifiability;Lifting operator", "primary_area": "", "supplementary_material": "/attachment/b787b0539866c742230b866b0373967c0b0e0175.pdf", "author": "Joachim Bona-Pellissier;Francois Malgouyres;Francois Bachoc", "authorids": "~Joachim_Bona-Pellissier1;~Francois_Malgouyres1;~Francois_Bachoc1", "gender": "M;M;M", "homepage": "https://www.math.univ-toulouse.fr/;https://www.math.univ-toulouse.fr/~fmalgouy/;https://www.math.univ-toulouse.fr/~fbachoc/", "dblp": ";97/5816;130/6786", "google_scholar": ";ECRBHzwAAAAJ;Fv36axgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Joachim_Bona-Pellissier1;~Francois_Malgouyres1;~Francois_Bachoc1", "aff": "Universit\u00e9 Paul Sabatier;Universit\u00e9 de Toulouse;Institut de Math\u00e9matiques de Toulouse", "aff_domain": "univ-tlse3.fr;univ-tlse3.fr;math.univ-toulouse.fr", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nbona-pellissier2022local,\ntitle={Local Identifiability of Deep Re{LU} Neural Networks: the Theory},\nauthor={Joachim Bona-Pellissier and Francois Malgouyres and Francois Bachoc},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-3cHWtrbLYq}\n}", "github": "", "project": "", "reviewers": "jUtV;Pq5e;JEdY", "pdf_size": 830520, "rating": "5;5;6", "confidence": "4;3;3", "soundness": "3;4;2", "novelty": "3;2;3", "presentation": "2;3;2", "contribution": "3;2;3", "wc_summary": "41;487;36", "wc_strengths_and_weaknesses": "583;191;139", "wc_questions": "10;100;208", "wc_limitations": "10;17;8", "wc_review": "644;795;391", "wc_reply_reviewers": "170;237;0", "wc_reply_authors": "3176;1293;638", "reply_reviewers": "1;2;0", "reply_authors": "5;4;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 188.0, 211.43478111859142 ], "wc_strengths_and_weaknesses_avg": [ 304.3333333333333, 198.18734122597797 ], "wc_questions_avg": [ 106.0, 80.94442537939224 ], "wc_limitations_avg": [ 11.666666666666666, 3.858612300930075 ], "wc_review_avg": [ 610.0, 166.67533310801173 ], "wc_reply_reviewers_avg": [ 135.66666666666666, 99.75414221419022 ], "wc_reply_authors_avg": [ 1702.3333333333333, 1075.8024395254404 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7521119745045034442&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "univ-tlse3.fr;univ-tlse3.fr;math.univ-toulouse.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 Paul Sabatier;Universit\u00e9 de Toulouse;Institut de Math\u00e9matiques de Toulouse", "aff_unique_dep": ";;Math\u00e9matiques", "aff_unique_url": "https://www.unipaulsabatier.fr;https://www.univ-toulouse.fr;https://www.imtoulouse.fr", "aff_unique_abbr": "UPS;UT;IMT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Object Representations as Fixed Points: Training Iterative Refinement Algorithms with Implicit Differentiation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53093", "id": "-5rFUTO2NWe", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d301e2878a7ebadf1a95029e904fc7d0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-5rFUTO2NWe", "openreview": "https://openreview.net/forum?id=-5rFUTO2NWe", "poster": "/media/PosterPDFs/NeurIPS%202022/53093.png?t=1669080041.5980375", "slides": "https://nips.cc/virtual/2022/poster/53093", "video": "https://nips.cc/virtual/2022/poster/53093", "author_site": "Michael Chang, Tom Griffiths, Sergey Levine", "tldr": "We improve the training of object-centric learning methods by applying implicit differentiation to slot attention.", "abstract": "Current work in object-centric learning has been motivated by developing learning algorithms that infer independent and symmetric entities from the perceptual input. This often requires the use iterative refinement procedures that break symmetries among equally plausible explanations for the data, but most prior works differentiate through the unrolled refinement process, which can make optimization exceptionally challenging. In this work, we observe that such iterative refinement methods can be made differentiable by means of the implicit function theorem, and develop an implicit differentiation approach that improves the stability and tractability of training such models by decoupling the forward and backward passes. This connection enables us to apply recent advances in optimizing implicit layers to not only improve the stability and optimization of the slot attention module in SLATE, a state-of-the-art method for learning entity representations, but do so with constant space and time complexity in backpropagation and only one additional line of code.", "keywords": "objects;implicit differentiation;slot attention", "primary_area": "", "supplementary_material": "/attachment/90abd51aa55a9e318551a604fbd6a1ca21d30f47.zip", "author": "Michael Chang;Thomas L. Griffiths;Sergey Levine", "authorids": "~Michael_Chang1;~Thomas_L._Griffiths1;~Sergey_Levine1", "gender": "M;;M", "homepage": "http://mbchang.github.io/;http://cocosci.princeton.edu/tom/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "192/1567;34/4472;80/7594", "google_scholar": "vgfGtykAAAAJ;https://scholar.google.com/citations?hl=en;8R35rCwAAAAJ", "orcid": ";;", "linkedin": "mbchang;;", "or_profile": "~Michael_Chang1;~Thomas_L._Griffiths1;~Sergey_Levine1", "aff": "University of California, Berkeley;Princeton University;Google", "aff_domain": "berkeley.edu;princeton.edu;google.com", "position": "PhD student;Professor;Research Scientist", "bibtex": "@inproceedings{\nchang2022object,\ntitle={Object Representations as Fixed Points: Training Iterative Refinement Algorithms with Implicit Differentiation},\nauthor={Michael Chang and Thomas L. Griffiths and Sergey Levine},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-5rFUTO2NWe}\n}", "github": "", "project": "", "reviewers": "STEd;KLwZ;pdiR;qYDF", "pdf_size": 13701464, "rating": "5;6;7;8", "confidence": "3;3;4;4", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "contribution": "2;2;3;4", "wc_summary": "56;63;23;63", "wc_strengths_and_weaknesses": "227;305;281;109", "wc_questions": "69;17;39;60", "wc_limitations": "19;1;20;6", "wc_review": "371;386;363;238", "wc_reply_reviewers": "621;358;0;27", "wc_reply_authors": "950;1014;462;316", "reply_reviewers": "2;2;0;1", "reply_authors": "4;3;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 51.25, 16.55860803328589 ], "wc_strengths_and_weaknesses_avg": [ 230.5, 75.62241731127087 ], "wc_questions_avg": [ 46.25, 20.09197601033806 ], "wc_limitations_avg": [ 11.5, 8.200609733428363 ], "wc_review_avg": [ 339.5, 59.179810746571334 ], "wc_reply_reviewers_avg": [ 251.5, 255.6975752720389 ], "wc_reply_authors_avg": [ 685.5, 301.80912842390967 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1470073374829428613&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "berkeley.edu;princeton.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Princeton University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.princeton.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Princeton;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Instance-Dependent Near-Optimal Policy Identification in Linear MDPs via Online Experiment Design", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53774", "id": "-76EsjcHnbj", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/27bf08fe91a31495099a0b9febcc9592-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-76EsjcHnbj", "openreview": "https://openreview.net/forum?id=-76EsjcHnbj", "poster": "/media/PosterPDFs/NeurIPS%202022/604616e4d592b744e14ec3ff33204dec.png?t=1667667316.96443", "slides": "https://nips.cc/virtual/2022/poster/53774", "video": "https://nips.cc/virtual/2022/poster/53774", "author_site": "Andrew Wagenmaker, Kevin Jamieson", "tldr": "In this work we show instance-dependent bounds on PAC policy learning in linear MDPs.", "abstract": "While much progress has been made in understanding the minimax sample complexity of reinforcement learning (RL)---the complexity of learning on the ``worst-case'' instance---such measures of complexity often do not capture the true difficulty of learning. In practice, on an ``easy'' instance, we might hope to achieve a complexity far better than that achievable on the worst-case instance. In this work we seek to understand this ``instance-dependent'' complexity of learning in the setting of RL with linear function approximation. We propose an algorithm, PEDEL, which achieves a fine-grained instance-dependent measure of complexity, the first of its kind in the RL with function approximation setting, thereby capturing the difficulty of learning on each particular problem instance. Through an explicit example, we show that PEDEL yields provable gains over low-regret, minimax-optimal algorithms and that such algorithms are unable to hit the instance-optimal rate. Our approach relies on a novel online experiment design-based procedure which focuses the exploration budget on the ``directions'' most relevant to learning a near-optimal policy, and may be of independent interest.", "keywords": "reinforcement learning;reinforcement learning theory;sequential decision making;function approximation;PAC;instance-dependence", "primary_area": "", "supplementary_material": "/attachment/e3dda01aa713bb4f7491931df1fa3fe334f490cd.pdf", "author": "Andrew Wagenmaker;Kevin Jamieson", "authorids": "~Andrew_Wagenmaker1;~Kevin_Jamieson1", "gender": "M;M", "homepage": "https://wagenmaker.github.io;", "dblp": "195/1036;85/10260", "google_scholar": "ym8AZSIAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Andrew_Wagenmaker1;~Kevin_Jamieson1", "aff": "Microsoft Research;University of Washington", "aff_domain": "microsoft.com;washington.edu", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nwagenmaker2022instancedependent,\ntitle={Instance-Dependent Policy Learning for Linear {MDP}s via Online Experiment Design},\nauthor={Andrew Wagenmaker and Kevin Jamieson},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-76EsjcHnbj}\n}", "github": "", "project": "", "reviewers": "zzUK;sWno;b4C6", "pdf_size": 838180, "rating": "6;8;8", "confidence": "4;3;4", "soundness": "2;4;4", "novelty": "3;4;4", "presentation": "3;4;4", "contribution": "3;4;4", "wc_summary": "94;51;203", "wc_strengths_and_weaknesses": "316;133;416", "wc_questions": "15;41;36", "wc_limitations": "10;21;41", "wc_review": "435;246;696", "wc_reply_reviewers": "88;30;43", "wc_reply_authors": "666;359;360", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 116.0, 63.97395303298575 ], "wc_strengths_and_weaknesses_avg": [ 288.3333333333333, 117.17887560857072 ], "wc_questions_avg": [ 30.666666666666668, 11.264496832477201 ], "wc_limitations_avg": [ 24.0, 12.832251036613439 ], "wc_review_avg": [ 459.0, 184.49390233826156 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 24.850665092821068 ], "wc_reply_authors_avg": [ 461.6666666666667, 144.4860623805017 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14081076203281580787&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "microsoft.com;washington.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;University of Washington", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.washington.edu", "aff_unique_abbr": "MSR;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On the Robustness of Graph Neural Diffusion to Topology Perturbations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53396", "id": "-8tU21J6BcB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/29a0ea49a103a233b17c0705cdeccb66-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-8tU21J6BcB", "openreview": "https://openreview.net/forum?id=-8tU21J6BcB", "poster": "/media/PosterPDFs/NeurIPS%202022/53396.png?t=1669082646.7951474", "slides": "https://nips.cc/virtual/2022/poster/53396", "video": "https://nips.cc/virtual/2022/poster/53396", "author_site": "Yang Song, Qiyu Kang, Sijie Wang, Kai Zhao, Wee Peng Tay", "tldr": "", "abstract": "Neural diffusion on graphs is a novel class of graph neural networks that has attracted increasing attention recently. The capability of graph neural partial differential equations (PDEs) in addressing common hurdles of graph neural networks (GNNs), such as the problems of over-smoothing and bottlenecks, has been investigated but not their robustness to adversarial attacks. In this work, we explore the robustness properties of graph neural PDEs. We empirically demonstrate that graph neural PDEs are intrinsically more robust against topology perturbation as compared to other GNNs. We provide insights into this phenomenon by exploiting the stability of the heat semigroup under graph topology perturbations. We discuss various graph diffusion operators and relate them to existing graph neural PDEs. Furthermore, we propose a general graph neural PDE framework based on which a new class of robust GNNs can be defined. We verify that the new model achieves comparable state-of-the-art performance on several benchmark datasets.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/da167775bfaa981fac0278d01375ebf200ec880f.pdf", "author": "Yang Song;QIYU KANG;Sijie Wang;Zhao Kai;Wee Peng Tay", "authorids": "~Yang_Song7;~QIYU_KANG1;~Sijie_Wang1;~Zhao_Kai2;~Wee_Peng_Tay1", "gender": "M;M;M;M;", "homepage": "https://c3-yang-song.github.io;https://kangqiyu.github.io./publications.html;https://github.com/sijieaaa;;https://personal.ntu.edu.sg/wptay/", "dblp": "24/4470-12;204/3718;21/4330.html;;45/3753", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=cS45eEcAAAAJ;IUfQMOYAAAAJ;;BkCI7rEAAAAJ", "orcid": ";;0000-0002-0925-2365;;0000-0002-1543-195X", "linkedin": ";;;zhao-kai-29010b169/;", "or_profile": "~Yang_Song7;~QIYU_KANG1;~Sijie_Wang1;~Zhao_Kai2;~Wee_Peng_Tay1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "Postdoc;Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsong2022on,\ntitle={On the Robustness of Graph Neural Diffusion to Topology Perturbations},\nauthor={Yang Song and QIYU KANG and Sijie Wang and Zhao Kai and Wee Peng Tay},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-8tU21J6BcB}\n}", "github": "", "project": "", "reviewers": "BHjT;UKSj;w639", "pdf_size": 864713, "rating": "6;6;7", "confidence": "4;3;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "contribution": "3;3;3", "wc_summary": "76;73;49", "wc_strengths_and_weaknesses": "179;440;325", "wc_questions": "141;66;56", "wc_limitations": "1;84;9", "wc_review": "397;663;439", "wc_reply_reviewers": "219;0;339", "wc_reply_authors": "2354;2278;1816", "reply_reviewers": "1;0;3", "reply_authors": "7;6;7", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.0, 12.083045973594572 ], "wc_strengths_and_weaknesses_avg": [ 314.6666666666667, 106.80303782612594 ], "wc_questions_avg": [ 87.66666666666667, 37.93268892247014 ], "wc_limitations_avg": [ 31.333333333333332, 37.38389433373088 ], "wc_review_avg": [ 499.6666666666667, 116.7599626965035 ], "wc_reply_reviewers_avg": [ 186.0, 140.34956359034396 ], "wc_reply_authors_avg": [ 2149.3333333333335, 237.73561412254205 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 6.666666666666667, 0.4714045207910317 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12358515421385829046&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Composite Feature Selection Using Deep Ensembles", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54069", "id": "-9PV7GKwYpM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/eab69250e98b1f9fc54e473cc7a69439-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-9PV7GKwYpM", "openreview": "https://openreview.net/forum?id=-9PV7GKwYpM", "poster": "/media/PosterPDFs/NeurIPS%202022/54069.png?t=1669210912.9632244", "slides": "https://nips.cc/virtual/2022/poster/54069", "video": "https://nips.cc/virtual/2022/poster/54069", "author_site": "Fergus Imrie, Alexander Norcliffe, Pietro Li\u00f3, Mihaela van der Schaar", "tldr": "We introduce the problem of finding groups of predictive features without predefined partitions and propose a novel deep learning architecture to solve this problem.", "abstract": "In many real world problems, features do not act alone but in combination with each other. For example, in genomics, diseases might not be caused by any single mutation but require the presence of multiple mutations. Prior work on feature selection either seeks to identify individual features or can only determine relevant groups from a predefined set. We investigate the problem of discovering groups of predictive features without predefined grouping. To do so, we define predictive groups in terms of linear and non-linear interactions between features. We introduce a novel deep learning architecture that uses an ensemble of feature selection models to find predictive groups, without requiring candidate groups to be provided. The selected groups are sparse and exhibit minimum overlap. Furthermore, we propose a new metric to measure similarity between discovered groups and the ground truth. We demonstrate the utility our model on multiple synthetic tasks and semi-synthetic chemistry datasets, where the ground truth structure is known, as well as an image dataset and a real-world cancer dataset.", "keywords": "Feature Selection;Group Feature Selection", "primary_area": "", "supplementary_material": "/attachment/9d4a0d6599c7bdd61ebcc3a64ca9ea43db8b81b6.zip", "author": "Fergus Imrie;Alexander Luke Ian Norcliffe;Pietro Lio;Mihaela van der Schaar", "authorids": "~Fergus_Imrie1;~Alexander_Luke_Ian_Norcliffe2;~Pietro_Lio1;~Mihaela_van_der_Schaar2", "gender": ";M;M;F", "homepage": ";;https://www.cst.cam.ac.uk/people/pl219;https://www.vanderschaar-lab.com", "dblp": "281/4466;;l/PietroLio.html;", "google_scholar": "4qCGgpsAAAAJ;BbeDr6EAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;DZ3S--MAAAAJ", "orcid": "0000-0002-6241-0123;0000-0002-9983-8027;0000-0002-0540-5053;", "linkedin": ";alex-norcliffe-5901a1171/?originalSubdomain=uk;;", "or_profile": "~Fergus_Imrie1;~Alexander_Luke_Ian_Norcliffe2;~Pietro_Lio1;~Mihaela_van_der_Schaar2", "aff": "University of California, Los Angeles;University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "ucla.edu;cam.ac.uk;cam.ac.uk;ucla.edu", "position": "Postdoc;PhD Student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nimrie2022composite,\ntitle={Composite Feature Selection Using Deep Ensembles},\nauthor={Fergus Imrie and Alexander Luke Ian Norcliffe and Pietro Lio and Mihaela van der Schaar},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-9PV7GKwYpM}\n}", "github": "", "project": "", "reviewers": "LHmD;iDHM;WTbm;jAMg", "pdf_size": 465124, "rating": "5;6;6;6", "confidence": "4;3;5;5", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "contribution": "2;2;2;3", "wc_summary": "42;58;60;104", "wc_strengths_and_weaknesses": "186;276;320;134", "wc_questions": "468;54;125;237", "wc_limitations": "1;15;40;25", "wc_review": "697;403;545;500", "wc_reply_reviewers": "0;22;7;100", "wc_reply_authors": "1796;1893;2021;1653", "reply_reviewers": "0;1;1;1", "reply_authors": "5;5;5;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 23.021728866442675 ], "wc_strengths_and_weaknesses_avg": [ 229.0, 73.08214556237385 ], "wc_questions_avg": [ 221.0, 156.8199604642215 ], "wc_limitations_avg": [ 20.25, 14.236836024903848 ], "wc_review_avg": [ 536.25, 106.05040075360394 ], "wc_reply_reviewers_avg": [ 32.25, 39.91475291167415 ], "wc_reply_authors_avg": [ 1840.75, 134.6038168106685 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.0, 0.0 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17009281324081005911&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 9, "email": "ucla.edu;cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of California, Los Angeles;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.cam.ac.uk", "aff_unique_abbr": "UCLA;Cambridge", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Los Angeles;Cambridge", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Look Around and Refer: 2D Synthetic Semantics Knowledge Distillation for 3D Visual Grounding", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54582", "id": "-AxpnEv1f1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f0b42291ddab77dcb2ef8a3488301b62-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-AxpnEv1f1", "openreview": "https://openreview.net/forum?id=-AxpnEv1f1", "poster": "/media/PosterPDFs/NeurIPS%202022/54582.png?t=1669607476.251841", "slides": "https://nips.cc/virtual/2022/poster/54582", "video": "https://nips.cc/virtual/2022/poster/54582", "author_site": "Eslam Bakr, Yasmeen Alsaedy, Mohamed Elhoseiny", "tldr": "We leverage 2D clues, synthetically generated from 3D point clouds, that empirically show their aptitude to boost the quality of the 3D learned visual representations.", "abstract": "3D visual grounding task has been explored with visual and language streams to comprehend referential language for identifying targeted objects in 3D scenes.\nHowever, most existing methods devote the visual stream to capture the 3D visual clues using off-the-shelf point clouds encoders. The main question we address is \u201ccan we consolidate the 3D visual stream by 2D clues and efficiently utilize them in both training and testing phases?\u201d. The main idea is to assist the 3D encoder by incorporating rich 2D object representations without requiring extra 2D inputs. \nTo this end, we leverage 2D clues, synthetically generated from 3D point clouds, that empirically show their aptitude to boost the quality of the learned visual representations. We validate our approach through comprehensive experiments on Nr3D, Sr3D, and ScanRefer datasets. Our experiments show consistent performance gains against counterparts, where our proposed module, dubbed as LAR, significantly outperforms state-of-the-art 3D visual grounding techniques on three benchmarks.\nOur code will be made publicly available.", "keywords": "3D Visual Grounding;Multi-Modal;3D;3D Detection;Synthetic 2D generation.", "primary_area": "", "supplementary_material": "/attachment/3aef3388597ab55520f46f888029f55ad169006e.pdf", "author": "Eslam Mohamed BAKR;Yasmeen Youssef Alsaedy;Mohamed Elhoseiny", "authorids": "~Eslam_Mohamed_BAKR1;~Yasmeen_Youssef_Alsaedy1;~Mohamed_Elhoseiny1", "gender": "M;F;M", "homepage": ";;http://www.mohamed-elhoseiny.com", "dblp": "330/8100;;125/2894", "google_scholar": "https://scholar.google.com/citations?hl=en;qyJ_Bg4AAAAJ;iRBUTOAAAAAJ", "orcid": ";0000-0003-3592-7298;0000-0001-9659-1551", "linkedin": "eslam-bakr-a693a0124/;yasmeenalsaedy;mohamed-elhoseiny-8a836215/", "or_profile": "~Eslam_Mohamed_BAKR1;~Yasmeen_Youssef_Alsaedy1;~Mohamed_Elhoseiny1", "aff": "Valeo;Jeddah University ;KAUST", "aff_domain": "valeo.com;uj.edu.sa;kaust.edu.sa", "position": "Researcher;MS student;Associate Professor", "bibtex": "@inproceedings{\nbakr2022look,\ntitle={Look Around and Refer: 2D Synthetic Semantics Knowledge Distillation for 3D Visual Grounding},\nauthor={Eslam Mohamed BAKR and Yasmeen Youssef Alsaedy and Mohamed Elhoseiny},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-AxpnEv1f1}\n}", "github": "", "project": "", "reviewers": "8Se9;G6JP;J6Di;yWd9", "pdf_size": 9135103, "rating": "5;5;5;7", "confidence": "4;3;4;4", "soundness": "3;2;3;3", "novelty": "3;3;2;3", "presentation": "2;3;2;3", "contribution": "3;3;2;3", "wc_summary": "103;54;136;81", "wc_strengths_and_weaknesses": "159;179;276;582", "wc_questions": "154;11;15;264", "wc_limitations": "15;66;16;32", "wc_review": "431;310;443;959", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1727;1061;1719;2417", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;3;4", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 30.054117854297438 ], "wc_strengths_and_weaknesses_avg": [ 299.0, 169.276401190479 ], "wc_questions_avg": [ 111.0, 105.4442980914568 ], "wc_limitations_avg": [ 32.25, 20.620075169601105 ], "wc_review_avg": [ 535.75, 249.8393233660386 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1731.0, 479.4934827502872 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4825555452150751793&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 11, "email": "valeo.com;uj.edu.sa;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Valeo;Jeddah University;King Abdullah University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.valeo.com;https://www.ju.edu.sa;https://www.kaust.edu.sa", "aff_unique_abbr": ";JU;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "France;Saudi Arabia" }, { "title": "Data-Efficient Augmentation for Training Neural Networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53906", "id": "-BxFk0t7wN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2130b8a44e2e28e25dc7d0ee4eb6d9cf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-BxFk0t7wN", "openreview": "https://openreview.net/forum?id=-BxFk0t7wN", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53906", "video": "https://nips.cc/virtual/2022/poster/53906", "author_site": "Tian Yu Liu, Baharan Mirzasoleiman", "tldr": "", "abstract": "Data augmentation is essential to achieve state-of-the-art performance in many deep learning applications. However, the most effective augmentation techniques become computationally prohibitive for even medium-sized datasets. To address this, we propose a rigorous technique to select subsets of data points that when augmented, closely capture the training dynamics of full data augmentation. We first show that data augmentation, modeled as additive perturbations, improves learning and generalization by relatively enlarging and perturbing the smaller singular values of the network Jacobian, while preserving its prominent directions. This prevents overfitting and enhances learning the harder to learn information. Then, we propose a framework to iteratively extract small subsets of training data that when augmented, closely capture the alignment of the fully augmented Jacobian with labels/residuals. We prove that stochastic gradient descent applied to the augmented subsets found by our approach has similar training dynamics to that of fully augmented data. Our experiments demonstrate that our method achieves 6.3x speedup on CIFAR10 and 2.2x speedup on SVHN, and outperforms the baselines by up to 10\\% across various subset sizes. Similarly, on TinyImageNet and ImageNet, our method beats the baselines by up to 8%, while achieving up to 3.3x speedup across various subset sizes. Finally, training on and augmenting 50% subsets using our method on a version of CIFAR10 corrupted with label noise even outperforms using the full dataset.", "keywords": "Data Augmentation;Deep Learning;Coresets", "primary_area": "", "supplementary_material": "/attachment/68a884cb0b8da5d417eb2407fd8300b94d637fac.pdf", "author": "Tian Yu Liu;Baharan Mirzasoleiman", "authorids": "~Tian_Yu_Liu2;~Baharan_Mirzasoleiman4", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Tian_Yu_Liu2;~Baharan_Mirzasoleiman4", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nliu2022dataefficient,\ntitle={Data-Efficient Augmentation for Training Neural Networks},\nauthor={Tian Yu Liu and Baharan Mirzasoleiman},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-BxFk0t7wN}\n}", "github": "", "project": "", "reviewers": "xtob;y9hQ;rqcs", "pdf_size": 503246, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "contribution": "2;3;3", "wc_summary": "76;67;117", "wc_strengths_and_weaknesses": "250;172;1047", "wc_questions": "3;140;218", "wc_limitations": "8;13;84", "wc_review": "337;392;1466", "wc_reply_reviewers": "93;0;362", "wc_reply_authors": "574;301;1555", "reply_reviewers": "1;0;1", "reply_authors": "2;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.66666666666667, 21.761331658599286 ], "wc_strengths_and_weaknesses_avg": [ 489.6666666666667, 395.3785808844761 ], "wc_questions_avg": [ 120.33333333333333, 88.86819203492077 ], "wc_limitations_avg": [ 35.0, 34.708308323320324 ], "wc_review_avg": [ 731.6666666666666, 519.7373268189316 ], "wc_reply_reviewers_avg": [ 151.66666666666666, 153.49773794931167 ], "wc_reply_authors_avg": [ 810.0, 538.4551977648651 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16120463592327015292&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";", "author_num": 2 }, { "title": "Discrete-Convex-Analysis-Based Framework for Warm-Starting Algorithms with Predictions", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55327", "id": "-GgDBzwZ-e7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/844e61124d9e1f58632bf0c8968ad728-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-GgDBzwZ-e7", "openreview": "https://openreview.net/forum?id=-GgDBzwZ-e7", "poster": "/media/PosterPDFs/NeurIPS%202022/ccb0989662211f61edae2e26d58ea92f.png?t=1667367696.4698086", "slides": "https://nips.cc/virtual/2022/poster/55327", "video": "https://nips.cc/virtual/2022/poster/55327", "author_site": "Shinsaku Sakaue, Taihei Oki", "tldr": "We present a principled discrete-convex-analysis-based framework for warm-starting algorithms with predictions to improve time complexity bounds.", "abstract": "Augmenting algorithms with learned predictions is a promising approach for going beyond worst-case bounds. Dinitz, Im, Lavastida, Moseley, and Vassilvitskii~(2021) have demonstrated that warm-starts with learned dual solutions can improve the time complexity of the Hungarian method for weighted perfect bipartite matching. We extend and improve their framework in a principled manner via \\textit{discrete convex analysis} (DCA), a discrete analog of convex analysis. We show the usefulness of our DCA-based framework by applying it to weighted perfect bipartite matching, weighted matroid intersection, and discrete energy minimization for computer vision. Our DCA-based framework yields time complexity bounds that depend on the $\\ell_\\infty$-distance from a predicted solution to an optimal solution, which has two advantages relative to the previous $\\ell_1$-distance-dependent bounds: time complexity bounds are smaller, and learning of predictions is more sample efficient. We also discuss whether to learn primal or dual solutions from the DCA perspective.", "keywords": "combinatorial optimization;discrete convex analysis;algorithms with predictions;time complexity", "primary_area": "", "supplementary_material": "/attachment/b8dab548cedd837f8c8127b30bc760f243eebdfe.pdf", "author": "Shinsaku Sakaue;Taihei Oki", "authorids": "~Shinsaku_Sakaue1;oki@mist.i.u-tokyo.ac.jp", "gender": "M;", "homepage": "https://ssakaue.github.io/;", "dblp": "183/6350;", "google_scholar": "https://scholar.google.co.jp/citations?user=9oTbrmEAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Shinsaku_Sakaue1;oki@mist.i.u-tokyo.ac.jp", "aff": "NTT;", "aff_domain": "ntt.co.jp;", "position": "Researcher;", "bibtex": "@inproceedings{\nsakaue2022discreteconvexanalysisbased,\ntitle={Discrete-Convex-Analysis-Based Framework for Warm-Starting Algorithms with Predictions},\nauthor={Shinsaku Sakaue and Taihei Oki},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-GgDBzwZ-e7}\n}", "github": "", "project": "", "reviewers": "KW47;TRn1;zvz6", "pdf_size": 837193, "rating": "6;6;7", "confidence": "1;4;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;3", "contribution": "3;3;3", "wc_summary": "81;141;154", "wc_strengths_and_weaknesses": "125;236;48", "wc_questions": "113;37;17", "wc_limitations": "23;30;1", "wc_review": "342;444;220", "wc_reply_reviewers": "0;31;0", "wc_reply_authors": "726;647;90", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 125.33333333333333, 31.794478905761125 ], "wc_strengths_and_weaknesses_avg": [ 136.33333333333334, 77.16792655558973 ], "wc_questions_avg": [ 55.666666666666664, 41.354833118055524 ], "wc_limitations_avg": [ 18.0, 12.355835328567093 ], "wc_review_avg": [ 335.3333333333333, 91.56903892085407 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 14.613540144521982 ], "wc_reply_authors_avg": [ 487.6666666666667, 283.0363149059773 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.1889822365046136, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1961214603881962807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ntt.co.jp;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "NTT Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt.co.jp", "aff_unique_abbr": "NTT", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Picking on the Same Person: Does Algorithmic Monoculture lead to Outcome Homogenization?", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54301", "id": "-H6kKm4DVo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/17a234c91f746d9625a75cf8a8731ee2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-H6kKm4DVo", "openreview": "https://openreview.net/forum?id=-H6kKm4DVo", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54301", "video": "https://nips.cc/virtual/2022/poster/54301", "author_site": "Rishi Bommasani, Kathleen A. Creel, Ananya Kumar, Dan Jurafsky, Percy Liang", "tldr": "ML is built on strong traditions of sharing: we investigate if there are harms endemic to sharing (aka algorithmic monoculture) by introducing, formalizing, and measuring outcome homogenization.", "abstract": "As the scope of machine learning broadens, we observe a recurring theme of *algorithmic monoculture*: the same systems, or systems that share components (e.g. datasets, models), are deployed by multiple decision-makers. While sharing offers advantages like amortizing effort, it also has risks. We introduce and formalize one such risk, *outcome homogenization*: the extent to which particular individuals or groups experience the same outcomes across different deployments. If the same individuals or groups exclusively experience undesirable outcomes, this may institutionalize systemic exclusion and reinscribe social hierarchy. We relate algorithmic monoculture and outcome homogenization by proposing the *component sharing hypothesis*: if algorithmic systems are increasingly built on the same data or models, then they will increasingly homogenize outcomes. We test this hypothesis on algorithmic fairness benchmarks, demonstrating that increased data-sharing reliably exacerbates homogenization and individual-level effects generally exceed group-level effects. Further, given the current regime in AI of foundation models, i.e. pretrained models that can be adapted to myriad downstream tasks, we test whether model-sharing homogenizes outcomes across tasks. We observe mixed results: we find that for both vision and language settings, the specific methods for adapting a foundation model significantly influence the degree of outcome homogenization. We also identify societal challenges that inhibit the measurement, diagnosis, and rectification of outcome homogenization in deployed machine learning systems.", "keywords": "systemic harms of ML;sharing;fairness;algorithmic monoculture;foundation models;AI Ethics", "primary_area": "", "supplementary_material": "/attachment/e7c94c10e1897668dcca5cd3cdd1b596249ad6da.zip", "author": "Rishi Bommasani;Kathleen Creel;Ananya Kumar;Dan Jurafsky;Percy Liang", "authorids": "~Rishi_Bommasani1;~Kathleen_Creel1;~Ananya_Kumar1;~Dan_Jurafsky1;~Percy_Liang1", "gender": "M;F;M;M;", "homepage": "https://rishibommasani.github.io/;https://kathleenacreel.com;https://ananyakumar.wordpress.com/;http://web.stanford.edu/~jurafsky/;https://cs.stanford.edu/~pliang/", "dblp": "245/8673;249/6794;192/0474;31/985;04/1701", "google_scholar": "WMBXw1EAAAAJ;qMIT0dcAAAAJ;tP5IBFkAAAAJ;uZg9l58AAAAJ;pouyVyUAAAAJ", "orcid": ";0000-0001-7371-2680;;;", "linkedin": ";;;;", "or_profile": "~Rishi_Bommasani1;~Kathleen_Creel1;~Ananya_Kumar1;~Dan_Jurafsky1;~Percy_Liang1", "aff": "Stanford University;Stanford University;Microsoft;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;microsoft.com;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;Intern;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbommasani2022picking,\ntitle={Picking on the Same Person: Does Algorithmic Monoculture lead to Outcome Homogenization?},\nauthor={Rishi Bommasani and Kathleen Creel and Ananya Kumar and Dan Jurafsky and Percy Liang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-H6kKm4DVo}\n}", "github": "", "project": "", "reviewers": "zUzT;h4Gz;o96f", "pdf_size": 565057, "rating": "4;6;6", "confidence": "4;3;4", "soundness": "2;3;2", "novelty": "3;3;2", "presentation": "3;3;4", "contribution": "3;3;2", "wc_summary": "53;62;365", "wc_strengths_and_weaknesses": "528;68;328", "wc_questions": "153;98;81", "wc_limitations": "30;2;18", "wc_review": "764;230;792", "wc_reply_reviewers": "0;0;261", "wc_reply_authors": "2004;468;2538", "reply_reviewers": "0;0;2", "reply_authors": "4;2;5", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 160.0, 145.00344823486094 ], "wc_strengths_and_weaknesses_avg": [ 308.0, 188.32595855767377 ], "wc_questions_avg": [ 110.66666666666667, 30.728199137310703 ], "wc_limitations_avg": [ 16.666666666666668, 11.469767022723502 ], "wc_review_avg": [ 595.3333333333334, 258.5824605206024 ], "wc_reply_reviewers_avg": [ 87.0, 123.03657992645927 ], "wc_reply_authors_avg": [ 1670.0, 877.4554119725971 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11875984953178672256&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;microsoft.com;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com", "aff_unique_abbr": "Stanford;Microsoft", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "RISE: Robust Individualized Decision Learning with Sensitive Variables", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53204", "id": "-IHPcl1ZhF5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/7b2f0758334389b8ad0665a9bd165463-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-IHPcl1ZhF5", "openreview": "https://openreview.net/forum?id=-IHPcl1ZhF5", "poster": "/media/PosterPDFs/NeurIPS%202022/53204.png?t=1669174857.9160004", "slides": "https://nips.cc/virtual/2022/poster/53204", "video": "https://nips.cc/virtual/2022/poster/53204", "author_site": "Xiaoqing Tan, Zhengling Qi, Christopher Seymour, Lu Tang", "tldr": "We introduce RISE, a robust individualized decision learning framework to improve the worst-case outcomes of individuals caused by sensitive variables that are unavailable at the time of decision.", "abstract": "This paper introduces RISE, a robust individualized decision learning framework with sensitive variables, where sensitive variables are collectible data and important to the intervention decision, but their inclusion in decision making is prohibited due to reasons such as delayed availability or fairness concerns. A naive baseline is to ignore these sensitive variables in learning decision rules, leading to significant uncertainty and bias. To address this, we propose a decision learning framework to incorporate sensitive variables during offline training but not include them in the input of the learned decision rule during model deployment. Specifically, from a causal perspective, the proposed framework intends to improve the worst-case outcomes of individuals caused by sensitive variables that are unavailable at the time of decision. Unlike most existing literature that uses mean-optimal objectives, we propose a robust learning framework by finding a newly defined quantile- or infimum-optimal decision rule. The reliable performance of the proposed method is demonstrated through synthetic experiments and three real-world applications. ", "keywords": "causal inference;individualized treatment rules;sensitive variables;robustness", "primary_area": "", "supplementary_material": "/attachment/b50dc34a7460db435e54545fe1eb95d531a79ad7.pdf", "author": "Xiaoqing Tan;Zhengling Qi;Christopher Warren Seymour;Lu Tang", "authorids": "~Xiaoqing_Tan1;~Zhengling_Qi1;~Christopher_Warren_Seymour1;~Lu_Tang1", "gender": "F;;;M", "homepage": "http://ellenxtan.github.io/;https://sites.google.com/view/statsqizl/home?authuser=0;;https://sites.pitt.edu/~lutang/", "dblp": ";173/0201;;", "google_scholar": "_zvwtKAAAAAJ;;;lD6kQl8AAAAJ", "orcid": ";;;0000-0001-6143-9314", "linkedin": "xiaoqing-tan/;;;", "or_profile": "~Xiaoqing_Tan1;~Zhengling_Qi1;~Christopher_Warren_Seymour1;~Lu_Tang1", "aff": "University of Pittsburgh;George Washington University;;University of Pittsburgh", "aff_domain": "pitt.edu;gwu.edu;;pitt.edu", "position": "PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\ntan2022rise,\ntitle={{RISE}: Robust Individualized Decision Learning with Sensitive Variables},\nauthor={Xiaoqing Tan and Zhengling Qi and Christopher Warren Seymour and Lu Tang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-IHPcl1ZhF5}\n}", "github": "", "project": "", "reviewers": "XrZR;Git7;6nvm;ouLu", "pdf_size": 477212, "rating": "5;6;6;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;4;2;3", "presentation": "3;3;3;3", "contribution": "2;4;2;3", "wc_summary": "93;63;119;40", "wc_strengths_and_weaknesses": "202;392;143;570", "wc_questions": "273;127;84;7", "wc_limitations": "6;2;30;7", "wc_review": "574;584;376;624", "wc_reply_reviewers": "55;0;0;40", "wc_reply_authors": "1477;1367;811;2073", "reply_reviewers": "1;0;0;1", "reply_authors": "3;3;2;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 78.75, 29.88624265443885 ], "wc_strengths_and_weaknesses_avg": [ 326.75, 167.89487038024717 ], "wc_questions_avg": [ 122.75, 96.81522349300238 ], "wc_limitations_avg": [ 11.25, 10.985786271359915 ], "wc_review_avg": [ 539.5, 96.23279066929318 ], "wc_reply_reviewers_avg": [ 23.75, 24.33490291741473 ], "wc_reply_authors_avg": [ 1432.0, 447.9877230460674 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14552433169165007620&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 8, "email": "pitt.edu;gwu.edu;;pitt.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Pittsburgh;George Washington University", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;https://www.gwu.edu", "aff_unique_abbr": "Pitt;GWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Differentiable Semantic Metric Approximation in Probabilistic Embedding for Cross-Modal Retrieval", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54353", "id": "-KPNRZ8i0ag", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/4e786a87e7ae249de2b1aeaf5d8fde82-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-KPNRZ8i0ag", "openreview": "https://openreview.net/forum?id=-KPNRZ8i0ag", "poster": "/media/PosterPDFs/NeurIPS%202022/54353.png?t=1669288640.3008287", "slides": "https://nips.cc/virtual/2022/poster/54353", "video": "https://nips.cc/virtual/2022/poster/54353", "author_site": "Hao Li, Jingkuan Song, Lianli Gao, Pengpeng Zeng, Haonan Zhang, Gongfu Li", "tldr": "This paper presents a method that can improve and evaluate the multiplicity of probabilistic embedding in noisy cross-modal datasets.", "abstract": "Cross-modal retrieval aims to build correspondence between multiple modalities by learning a common representation space. Typically, an image can match multiple texts semantically and vice versa, which significantly increases the difficulty of this task. To address this problem, probabilistic embedding is proposed to quantify these many-to-many relationships. However, existing datasets (e.g., MS-COCO) and metrics (e.g., Recall@K) cannot fully represent these diversity correspondences due to non-exhaustive annotations. Based on this observation, we utilize semantic correlation computed by CIDEr to find the potential correspondences. Then we present an effective metric, named Average Semantic Precision (ASP), which can measure the ranking precision of semantic correlation for retrieval sets. Additionally, we introduce a novel and concise objective, coined Differentiable ASP Approximation (DAA). Concretely, DAA can optimize ASP directly by making the ranking function of ASP differentiable through a sigmoid function. To verify the effectiveness of our approach, extensive experiments are conducted on MS-COCO, CUB Captions, and Flickr30K, which are commonly used in cross-modal retrieval. The results show that our approach obtains superior performance over the state-of-the-art approaches on all metrics. The code and trained models are released at https://github.com/leolee99/2022-NeurIPS-DAA.", "keywords": "cross-modal retrieval;probabilistic embedding;image-text matching;multiplicity;metric learning;robust", "primary_area": "", "supplementary_material": "/attachment/82eb86f294d951d4d76c0bd24fab61baead23115.pdf", "author": "Hao Li;Jingkuan Song;Lianli Gao;Pengpeng Zeng;Haonan Zhang;Gongfu Li", "authorids": "~Hao_Li21;~Jingkuan_Song3;~Lianli_Gao1;~Pengpeng_Zeng1;~Haonan_Zhang2;gongfuli@tencent.com", "gender": "M;M;F;M;M;", "homepage": "https://leolee99.github.io/;https://cfm.uestc.edu.cn/~songjingkuan/;https://lianligao.github.io/;https://ppengzeng.github.io/;https://zchoi.github.io/;", "dblp": "17/5705;70/10575;123/9849.html;222/7986;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;F5Zy9V4AAAAJ;https://scholar.google.com.au/citations?user=zsm2dpYAAAAJ;Zs0PWb4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0001-8205-6734;;;0000-0002-0672-3790;0000-0003-1015-7338;", "linkedin": "hao-li-b5b2b2208/;;;;;", "or_profile": "~Hao_Li21;~Jingkuan_Song3;~Lianli_Gao1;~Pengpeng_Zeng1;~Haonan_Zhang2;gongfuli@tencent.com", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China,;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;", "position": "MS student;Full Professor;Full Professor;PhD student;MS student;", "bibtex": "@inproceedings{\nli2022a,\ntitle={A Differentiable Semantic Metric Approximation in Probabilistic Embedding for Cross-Modal Retrieval},\nauthor={Hao Li and Jingkuan Song and Lianli Gao and Pengpeng Zeng and Haonan Zhang and Gongfu Li},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-KPNRZ8i0ag}\n}", "github": "", "project": "", "reviewers": "YuA1;vghc;CS9A;mhZH", "pdf_size": 772642, "rating": "5;5;7;8", "confidence": "5;5;5;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "contribution": "2;2;3;3", "wc_summary": "98;67;119;83", "wc_strengths_and_weaknesses": "168;400;140;317", "wc_questions": "5;109;221;3", "wc_limitations": "8;1;2;1", "wc_review": "279;577;482;404", "wc_reply_reviewers": "15;0;0;0", "wc_reply_authors": "849;1411;1488;513", "reply_reviewers": "1;0;0;0", "reply_authors": "2;3;3;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 91.75, 19.17517926904466 ], "wc_strengths_and_weaknesses_avg": [ 256.25, 106.8372009180323 ], "wc_questions_avg": [ 84.5, 89.71482597653522 ], "wc_limitations_avg": [ 3.0, 2.9154759474226504 ], "wc_review_avg": [ 435.5, 109.16615775962805 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 1065.25, 403.1143603247098 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1646203091438958458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Not too little, not too much: a theoretical analysis of graph (over)smoothing", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53409", "id": "-Lm0B9UYMy6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0f956ca6f667c62e0f71511773c86a59-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-Lm0B9UYMy6", "openreview": "https://openreview.net/forum?id=-Lm0B9UYMy6", "poster": "/media/PosterPDFs/NeurIPS%202022/53409.png?t=1669123284.273222", "slides": "https://nips.cc/virtual/2022/poster/53409", "video": "https://nips.cc/virtual/2022/poster/53409", "tldr": "We showcase two representative examples where mean aggregation provably improves learning, before it eventually collapses to oversmoothing.", "abstract": "We analyze graph smoothing with mean aggregation, where each node successively receives the average of the features of its neighbors. Indeed, it has quickly been observed that Graph Neural Networks (GNNs), which generally follow some variant of Message-Passing (MP) with repeated aggregation, may be subject to the oversmoothing phenomenon: by performing too many rounds of MP, the node features tend to converge to a non-informative limit. In the case of mean aggregation, for connected graphs, the node features become constant across the whole graph. At the other end of the spectrum, it is intuitively obvious that some MP rounds are necessary, but existing analyses do not exhibit both phenomena at once: beneficial ``finite'' smoothing and oversmoothing in the limit. In this paper, we consider simplified linear GNNs, and rigorously analyze two examples for which a finite number of mean aggregation steps provably improves the learning performance, before oversmoothing kicks in. We consider a latent space random graph model, where node features are partial observations of the latent variables and the graph contains pairwise relationships between them. We show that graph smoothing restores some of the lost information, up to a certain point, by two phenomena: graph smoothing shrinks non-principal directions in the data faster than principal ones, which is useful for regression, and shrinks nodes within communities faster than they collapse together, which improves classification.", "keywords": "graph neural network;theory;oversmoothing;aggregation", "primary_area": "", "supplementary_material": "/attachment/80bc1f86956a7a4cd3e7d48f789bd70ce0e63b8c.pdf", "author": "Nicolas Keriven", "authorids": "~Nicolas_Keriven1", "gender": "", "homepage": "https://nkeriven.github.io/", "dblp": "142/4193", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Nicolas_Keriven1", "aff": "Gipsa-lab", "aff_domain": "gipsa-lab.grenoble-inp.fr", "position": "Assistant Professor", "bibtex": "@inproceedings{\nkeriven2022not,\ntitle={Not too little, not too much: a theoretical analysis of graph (over)smoothing},\nauthor={Nicolas Keriven},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-Lm0B9UYMy6}\n}", "github": "", "project": "", "reviewers": "hUnE;seXB;Xj6j;k824", "pdf_size": 1244824, "rating": "4;6;8;8", "confidence": "4;3;2;4", "soundness": "4;3;4;4", "novelty": "2;3;4;4", "presentation": "4;3;4;4", "contribution": "2;3;4;4", "wc_summary": "78;42;102;93", "wc_strengths_and_weaknesses": "693;113;16;31", "wc_questions": "325;2;11;100", "wc_limitations": "17;1;42;7", "wc_review": "1113;158;171;231", "wc_reply_reviewers": "456;0;0;0", "wc_reply_authors": "895;240;93;267", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 78.75, 22.884219453588535 ], "wc_strengths_and_weaknesses_avg": [ 213.25, 279.4336907031792 ], "wc_questions_avg": [ 109.5, 130.18160392313501 ], "wc_limitations_avg": [ 16.75, 15.658464164789597 ], "wc_review_avg": [ 418.25, 402.0580648364114 ], "wc_reply_reviewers_avg": [ 114.0, 197.45379206285202 ], "wc_reply_authors_avg": [ 373.75, 308.1423169576032 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 121, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2063487353980385484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "gipsa-lab.grenoble-inp.fr", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Gipsa-lab", "aff_unique_dep": "", "aff_unique_url": "https://gipsa-lab.grenoble-inp.fr", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Multiclass Learnability Beyond the PAC Framework: Universal Rates and Partial Concept Classes", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54459", "id": "-N-OYK2cY7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/82f0dae85424eb743017c90380e7ab9b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-N-OYK2cY7", "openreview": "https://openreview.net/forum?id=-N-OYK2cY7", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54459", "video": "https://nips.cc/virtual/2022/poster/54459", "author_site": "Alkis Kalavasis, Grigoris Velegkas, Amin Karbasi", "tldr": "We study multiclass classification in two settings that go beyond the PAC framework (the universal learning setting and the partial concept classes), and we characterize learnability in these settings.", "abstract": "In this paper we study the problem of multiclass classification with a bounded number of different labels $k$, in the realizable setting. We extend the traditional PAC model to a) distribution-dependent learning rates, and b) learning rates under data-dependent assumptions. First, we consider the universal learning setting (Bousquet, Hanneke, Moran, van Handel and Yehudayoff, STOC'21), \nfor which we provide a complete characterization of the achievable learning rates that holds for every fixed distribution. In particular, we show the following trichotomy: for any concept class, the optimal learning rate is either exponential, linear or arbitrarily slow. Additionally, we provide complexity measures of the underlying hypothesis class that characterize when these rates occur. Second, we consider the problem of multiclass classification with structured data (such as data lying on a low dimensional manifold or satisfying margin conditions), a setting which is captured by partial concept classes (Alon, Hanneke, Holzman and Moran, FOCS'21). Partial concepts are functions that can be undefined in certain parts of the input space. We extend the traditional PAC learnability of total concept classes to partial concept classes in the multiclass setting and investigate differences between partial and total concepts.", "keywords": "multiclass classification;universal learning rates;partial concept classes;learning theory", "primary_area": "", "supplementary_material": "/attachment/9aabfcb99bcd79aacf58ad63bbbba111e77a5589.pdf", "author": "Alkis Kalavasis;Grigoris Velegkas;Amin Karbasi", "authorids": "~Alkis_Kalavasis1;~Grigoris_Velegkas1;~Amin_Karbasi3", "gender": "M;M;M", "homepage": "https://alkisk.github.io/;;http://seas.yale.edu/faculty-research/faculty-directory/amin-karbasi", "dblp": "269/9425;254/1885;49/7411", "google_scholar": "NgVIFJwAAAAJ;Ty1kgP0AAAAJ;https://scholar.google.com.tw/citations?user=VusVB38AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alkis_Kalavasis1;~Grigoris_Velegkas1;~amin_karbasi1", "aff": "National Technical University of Athens;Yale University;Google", "aff_domain": "ntua.gr;yale.edu;google.com", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nkalavasis2022multiclass,\ntitle={Multiclass Learnability Beyond the {PAC} Framework: Universal Rates and Partial Concept Classes},\nauthor={Alkis Kalavasis and Grigoris Velegkas and Amin Karbasi},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-N-OYK2cY7}\n}", "github": "", "project": "", "reviewers": "FfZM;EhpP;XbtE", "pdf_size": 462275, "rating": "6;7;7", "confidence": "4;4;3", "soundness": "3;4;4", "novelty": "2;4;3", "presentation": "3;2;3", "contribution": "2;4;3", "wc_summary": "156;267;172", "wc_strengths_and_weaknesses": "194;330;142", "wc_questions": "277;60;132", "wc_limitations": "1;17;11", "wc_review": "628;674;457", "wc_reply_reviewers": "0;54;28", "wc_reply_authors": "1478;1053;1551", "reply_reviewers": "0;1;1", "reply_authors": "3;2;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 198.33333333333334, 48.99206284922306 ], "wc_strengths_and_weaknesses_avg": [ 222.0, 79.2632743877432 ], "wc_questions_avg": [ 156.33333333333334, 90.2453446013822 ], "wc_limitations_avg": [ 9.666666666666666, 6.599663291074443 ], "wc_review_avg": [ 586.3333333333334, 93.36071027055343 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 22.050447211388303 ], "wc_reply_authors_avg": [ 1360.6666666666667, 219.58496204329558 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12457176074315439399&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ntua.gr;yale.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "National Technical University of Athens;Yale University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.ntua.gr;https://www.yale.edu;https://www.google.com", "aff_unique_abbr": "NTUA;Yale;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Greece;United States" }, { "title": "Semantic Exploration from Language Abstractions and Pretrained Representations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53259", "id": "-NOQJw5z_KY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a28e024ccd623ed113fb19683fa0910d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-NOQJw5z_KY", "openreview": "https://openreview.net/forum?id=-NOQJw5z_KY", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53259", "video": "https://nips.cc/virtual/2022/poster/53259", "author_site": "Allison Tam, Neil Rabinowitz, Andrew Lampinen, Nicholas Roy, Stephanie Chan, DJ Strouse, Jane Wang, Andrea Banino, Felix Hill", "tldr": "", "abstract": "Effective exploration is a challenge in reinforcement learning (RL). Novelty-based exploration methods can suffer in high-dimensional state spaces, such as continuous partially-observable 3D environments. We address this challenge by defining novelty using semantically meaningful state abstractions, which can be found in learned representations shaped by natural language. In particular, we evaluate vision-language representations, pretrained on natural image captioning datasets. We show that these pretrained representations drive meaningful, task-relevant exploration and improve performance on 3D simulated environments. We also characterize why and how language provides useful abstractions for exploration by considering the impacts of using representations from a pretrained model, a language oracle, and several ablations. We demonstrate the benefits of our approach with on- and off-policy RL algorithms and in two very different task domains---one that stresses the identification and manipulation of everyday objects, and one that requires navigational exploration in an expansive world. Our results suggest that using language-shaped representations could improve exploration for various algorithms and agents in challenging environments.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/82475ad6446b48e2d7ad0dabaa8dcbc82412a3cc.pdf", "author": "Allison Tam;Neil Charles Rabinowitz;Andrew Kyle Lampinen;Nicholas Andrew Roy;Stephanie C.Y. Chan;DJ Strouse;Jane X Wang;Andrea Banino;Felix Hill", "authorids": "~Allison_Tam1;~Neil_Charles_Rabinowitz1;~Andrew_Kyle_Lampinen1;~Nicholas_Andrew_Roy1;~Stephanie_C.Y._Chan1;~DJ_Strouse1;~Jane_X_Wang1;~Andrea_Banino1;~Felix_Hill1", "gender": ";M;M;;F;;;;", "homepage": ";;https://github.com/google/BIG-bench;;https://scychan.github.io/;http://www.djstrouse.com;;;https://fh295.github.io/", "dblp": ";156/0289;https://dblp.uni-trier.de/pers/hd/l/Lampinen:Andrew_K=;;255/7866;181/2305;;;116/0509", "google_scholar": ";https://scholar.google.co.uk/citations?user=AgUYQMwAAAAJ;_N44XxAAAAAJ;;https://scholar.google.com/citations?hl=en;K8E0T7MAAAAJ;;;https://scholar.google.co.uk/citations?user=4HLUnhIAAAAJ", "orcid": ";;;;;;;;", "linkedin": "allison-tam/;;;;scychan;;;;", "or_profile": "~Allison_Tam1;~Neil_Charles_Rabinowitz1;~Andrew_Kyle_Lampinen1;~Nicholas_Andrew_Roy1;~Stephanie_C.Y._Chan1;~DJ_Strouse1;~Jane_X_Wang1;~Andrea_Banino1;~Felix_Hill1", "aff": ";Google DeepMind;Google DeepMind;;Google DeepMind;Google DeepMind;;;Google", "aff_domain": ";google;google.com;;deepmind.com;google.com;;;google.com", "position": ";Research Scientist;Research Scientist;;Research Scientist;Research Scientist;;;Researcher", "bibtex": "@inproceedings{\ntam2022semantic,\ntitle={Semantic Exploration from Language Abstractions and Pretrained Representations},\nauthor={Allison Tam and Neil Charles Rabinowitz and Andrew Kyle Lampinen and Nicholas Andrew Roy and Stephanie C.Y. Chan and DJ Strouse and Jane X Wang and Andrea Banino and Felix Hill},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-NOQJw5z_KY}\n}", "github": "", "project": "", "reviewers": "mRW6;jixR;p9aE;kb5q", "pdf_size": 8389986, "rating": "4;5;6;8", "confidence": "3;3;3;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;2;4", "contribution": "2;2;3;4", "wc_summary": "89;114;124;169", "wc_strengths_and_weaknesses": "128;184;84;366", "wc_questions": "1;197;119;46", "wc_limitations": "1;58;104;74", "wc_review": "219;553;431;655", "wc_reply_reviewers": "0;0;0;80", "wc_reply_authors": "553;815;784;587", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 124.0, 28.939592256975562 ], "wc_strengths_and_weaknesses_avg": [ 190.5, 107.34407296166845 ], "wc_questions_avg": [ 90.75, 74.40556094809044 ], "wc_limitations_avg": [ 59.25, 37.46581775432107 ], "wc_review_avg": [ 464.5, 162.4153625738649 ], "wc_reply_reviewers_avg": [ 20.0, 34.64101615137755 ], "wc_reply_authors_avg": [ 684.75, 115.89731446414106 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8783100656536799, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14623113315174355821&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";google;google.com;;deepmind.com;google.com;;;google.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Improving Neural Ordinary Differential Equations with Nesterov's Accelerated Gradient Method", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54309", "id": "-OfK_B9Q5hI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/32cc61322f1e2f56f989d29ccc7cfbb7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-OfK_B9Q5hI", "openreview": "https://openreview.net/forum?id=-OfK_B9Q5hI", "poster": "/media/PosterPDFs/NeurIPS%202022/54309.png?t=1669731923.4824762", "slides": "https://nips.cc/virtual/2022/poster/54309", "video": "https://nips.cc/virtual/2022/poster/54309", "author_site": "Ho Huu Nghia Nguyen, Tan Nguyen, Huyen Vo, Stanley Osher, Thieu Vo", "tldr": "We propose the Nesterov neural ordinary differential equations (NesterovNODEs) whose layers solve the second-order ordinary differential equations limit of Nesterov's accelerated gradient method for speeding up the training and inference of NODEs.", "abstract": "We propose the Nesterov neural ordinary differential equations (NesterovNODEs), whose layers solve the second-order ordinary differential equations (ODEs) limit of Nesterov's accelerated gradient (NAG) method, and a generalization called GNesterovNODEs. Taking the advantage of the convergence rate $\\mathcal{O}(1/k^{2})$ of the NAG scheme, GNesterovNODEs speed up training and inference by reducing the number of function evaluations (NFEs) needed to solve the ODEs. We also prove that the adjoint state of a GNesterovNODEs also satisfies a GNesterovNODEs, thus accelerating both forward and backward ODE solvers and allowing the model to be scaled up for large-scale tasks. We empirically corroborate the advantage of GNesterovNODEs on a wide range of practical applications, including point cloud separation, image classification, and sequence modeling. Compared to NODEs, GNesterovNODEs require a significantly smaller number of NFEs while achieving better accuracy across our experiments.", "keywords": "neural ordinary differential equations;nesterov;momentum", "primary_area": "", "supplementary_material": "/attachment/d4faa335883b623a30063780542d8f773af862b7.zip", "author": "Nghia Nguyen;Tan Minh Nguyen;V\u00f5 Th\u1ee5c Kh\u00e1nh Huy\u1ec1n;Stanley Osher;Thieu Vo", "authorids": "~Nghia_Nguyen2;~Tan_Minh_Nguyen1;~V\u00f5_Th\u1ee5c_Kh\u00e1nh_Huy\u1ec1n1;~Stanley_Osher1;~Thieu_Vo1", "gender": "M;M;F;M;M", "homepage": "https://nghiahhnguyen.github.io/;https://tanmnguyen89.github.io/;;https://www.math.ucla.edu/~sjo/;https://sites.google.com/tdtu.edu.vn/vongocthieu", "dblp": "346/0893;255/4725;;;", "google_scholar": "YNEftmcAAAAJ;OizOh88AAAAJ;;;CM2qJSoAAAAJ", "orcid": ";;;;", "linkedin": ";;vo-thuc-khanh-huyen-b384161a0/;;", "or_profile": "~Nghia_Nguyen2;~Tan_Minh_Nguyen1;~V\u00f5_Th\u1ee5c_Kh\u00e1nh_Huy\u1ec1n1;~Stanley_Osher1;~Thieu_Vo1", "aff": "FPT Software;University of California, Los Angeles;Hanoi University of Science and Technology;University of California, Los Angeles;Ton Duc Thang University", "aff_domain": "fpt.com;ucla.edu;hust.edu.vn;ucla.edu;tdtu.edu.vn", "position": "Researcher;Postdoc;Undergrad student;Full Professor;Lecturer", "bibtex": "@inproceedings{\nnguyen2022improving,\ntitle={Improving Neural Ordinary Differential Equations with Nesterov's Accelerated Gradient Method},\nauthor={Nghia Nguyen and Tan Minh Nguyen and V{\\~o} Th\u1ee5c Kh{\\'a}nh Huy\u1ec1n and Stanley Osher and Thieu Vo},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-OfK_B9Q5hI}\n}", "github": "", "project": "", "reviewers": "tT37;hkAe;Uc87", "pdf_size": 4236140, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "2;4;3", "novelty": "2;2;2", "presentation": "3;4;3", "contribution": "2;2;2", "wc_summary": "109;165;28", "wc_strengths_and_weaknesses": "330;410;1017", "wc_questions": "235;233;91", "wc_limitations": "60;15;8", "wc_review": "734;823;1144", "wc_reply_reviewers": "44;295;91", "wc_reply_authors": "1955;2423;2517", "reply_reviewers": "1;4;1", "reply_authors": "5;7;7", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 100.66666666666667, 56.239566933689034 ], "wc_strengths_and_weaknesses_avg": [ 585.6666666666666, 306.74238195738275 ], "wc_questions_avg": [ 186.33333333333334, 67.41579109443788 ], "wc_limitations_avg": [ 27.666666666666668, 23.041026211713937 ], "wc_review_avg": [ 900.3333333333334, 176.08773065971656 ], "wc_reply_reviewers_avg": [ 143.33333333333334, 108.9474899002063 ], "wc_reply_authors_avg": [ 2298.3333333333335, 245.78762286892226 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 6.333333333333333, 0.9428090415820634 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8336523696502236826&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "fpt.com;ucla.edu;hust.edu.vn;ucla.edu;tdtu.edu.vn", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "FPT Corporation;University of California, Los Angeles;Hanoi University of Science and Technology;Ton Duc Thang University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.fpt-software.com;https://www.ucla.edu;https://www.hust.edu.vn;https://www.tdtu.edu.vn", "aff_unique_abbr": "FPT;UCLA;HUST;TDTU", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Los Angeles;Hanoi", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Vietnam;United States" }, { "title": "A Scalable Deterministic Global Optimization Algorithm for Training Optimal Decision Tree", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53587", "id": "-Oh_TKISy89", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/37771cc0be272368102a37f202bb88d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-Oh_TKISy89", "openreview": "https://openreview.net/forum?id=-Oh_TKISy89", "poster": "/media/PosterPDFs/NeurIPS%202022/53587.png?t=1668491023.972874", "slides": "https://nips.cc/virtual/2022/poster/53587", "video": "https://nips.cc/virtual/2022/poster/53587", "author_site": "Kaixun Hua, Jiayang Ren, Yankai Cao", "tldr": "A reduced space branch and bound method is proposed to train optimal decision tree for large-scale datasets", "abstract": "The training of optimal decision tree via mixed-integer programming (MIP) has attracted much attention in recent literature. However, for large datasets, state-of-the-art approaches struggle to solve the optimal decision tree training problems to a provable global optimal solution within a reasonable time. In this paper, we reformulate the optimal decision tree training problem as a two-stage optimization problem and propose a tailored reduced-space branch and bound algorithm to train optimal decision tree for the classification tasks with continuous features. We present several structure-exploiting lower and upper bounding methods. The computation of bounds can be decomposed into the solution of many small-scale subproblems and can be naturally parallelized. With these bounding methods, we prove that our algorithm can converge by branching only on variables representing the optimal decision tree structure, which is invariant to the size of datasets. Moreover, we propose a novel sample reduction method that can predetermine the cost of part of samples at each BB node. Combining the sample reduction method with the parallelized bounding strategies, our algorithm can be extremely scalable. Our algorithm can find global optimal solutions on dataset with over 245,000 samples (1000 cores, less than 1% optimality gap, within 2 hours). We test 21 real-world datasets from UCI Repository. The results reveal that for datasets with over 7,000 samples, our algorithm can, on average, improve the training accuracy by 3.6% and testing accuracy by 2.8%, compared to the current state-of-the-art.", "keywords": "optimal decision tree;branch and bound;mixed integer programs;grouping decomposition;sample reduction", "primary_area": "", "supplementary_material": "/attachment/bdd870b540dd9d0f8e70d3d1f5d31932cac8e3b2.pdf", "author": "Kaixun Hua;Jiayang Ren;Yankai Cao", "authorids": "~Kaixun_Hua1;~Jiayang_Ren1;~Yankai_Cao1", "gender": ";M;M", "homepage": "https://kingsley1989.github.io;https://jiayang.site;https://optimal.chbe.ubc.ca", "dblp": "162/3319;310/1496;155/9335", "google_scholar": ";V3QBv3cAAAAJ;M-s3mjAAAAAJ", "orcid": ";;0000-0001-9014-2552", "linkedin": ";;", "or_profile": "~Kaixun_Hua1;~Jiayang_Ren1;~Yankai_Cao1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia", "aff_domain": "ubc.ca;ubc.ca;ubc.ca", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhua2022a,\ntitle={A Scalable Deterministic Global Optimization Algorithm for Training Optimal Decision Tree},\nauthor={Kaixun Hua and Jiayang Ren and Yankai Cao},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-Oh_TKISy89}\n}", "github": "", "project": "", "reviewers": "v4RW;25Mp;fTBj;gshW", "pdf_size": 511408, "rating": "4;5;6;7", "confidence": "3;2;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "contribution": "3;3;3;3", "wc_summary": "108;49;162;106", "wc_strengths_and_weaknesses": "206;222;341;243", "wc_questions": "124;40;9;143", "wc_limitations": "22;11;14;49", "wc_review": "460;322;526;541", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1302;683;2814;668", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;4;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.25, 39.96482828688246 ], "wc_strengths_and_weaknesses_avg": [ 253.0, 52.47380298777667 ], "wc_questions_avg": [ 79.0, 55.995535536326464 ], "wc_limitations_avg": [ 24.0, 14.983324063771697 ], "wc_review_avg": [ 462.25, 86.51697810256667 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1366.75, 873.8550723661218 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12365879294221271416&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ubc.ca;ubc.ca;ubc.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "-QHUWgkh1OY", "title": "DOGE-Train: Discrete Optimization on GPU with End-to-end Training", "track": "main", "status": "Reject", "tldr": "Learn to solve LP relaxations of ILPs using graph neural network by backpropagation with self-supervised loss. ", "abstract": "We present a fast, scalable, data-driven approach for solving linear relaxations of 0-1 integer linear programs using a graph neural network.\nOur solver is based on the Lagrange decomposition based algorithm of Abbas et al. (2022).\nWe make the algorithm differentiable and perform backpropagation through the dual update scheme for end-to-end training of its algorithmic parameters.\nThis allows to preserve the algorithm's theoretical properties including feasibility and guaranteed non-decrease in the lower bound.\nSince the method of Abbas et al. (2022) can get stuck in suboptimal fixed points, we provide additional freedom to our graph neural network to predict non-parametric update steps for escaping such points while maintaining dual feasibility.\nFor training of the graph neural network we use an unsupervised loss and perform experiments on large-scale real world datasets.\nWe train on smaller problems and test on larger ones showing strong generalization performance with a graph neural network comprising only around $10k$ parameters.\nOur solver achieves significantly faster performance and better dual objectives than its non-learned version of Abbas et al. (2022).\nIn comparison to commercial solvers our learned solver achieves close to optimal objective values of LP relaxations and is faster by up to an order of magnitude on very large problems from structured prediction and on selected combinatorial optimization problems.\nOur code will be made available upon acceptance.", "keywords": "Discrete optimization;Integer linear programming;Combinatorial optimization;Graph neural networks;End-to-end;Self-supervised;Backpropagation;Message passing", "primary_area": "", "supplementary_material": "/attachment/cbac3b01350c5dc0653d59b00810201f40c9cfc4.pdf", "author": "Ahmed Abbas;Paul Swoboda", "authorids": "~Ahmed_Abbas1;~Paul_Swoboda1", "gender": "M;M", "homepage": "https://people.mpi-inf.mpg.de/~ahabbas/;http://paulswoboda.net", "dblp": "28/8106;17/3730", "google_scholar": "8NzZ7XgAAAAJ;https://scholar.google.de/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Ahmed_Abbas1;~Paul_Swoboda1", "aff": "Saarland Informatics Campus, Max-Planck Institute;Saarland Informatics Campus, Max-Planck Institute", "aff_domain": "mpi-inf.mpg.de;mpi-inf.mpg.de", "position": "PhD student;Postdoc", "bibtex": "@misc{\nabbas2022dogetrain,\ntitle={{DOGE}-Train: Discrete Optimization on {GPU} with End-to-end Training},\nauthor={Ahmed Abbas and Paul Swoboda},\nyear={2022},\nurl={https://openreview.net/forum?id=-QHUWgkh1OY}\n}", "github": "", "project": "", "reviewers": "ehgi;TgLG;jrD7", "site": "https://openreview.net/forum?id=-QHUWgkh1OY", "pdf_size": 475225, "rating": "3;4;7", "confidence": "3;2;4", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "2;2;3", "contribution": "2;2;3", "wc_summary": "31;60;118", "wc_strengths_and_weaknesses": "91;95;111", "wc_questions": "104;13;368", "wc_limitations": "1;11;21", "wc_review": "227;179;618", "wc_reply_reviewers": "622;40;166", "wc_reply_authors": "2008;327;1046", "reply_reviewers": "2;1;2", "reply_authors": "3;2;4", "rating_avg": [ 4.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 69.66666666666667, 36.16935473881477 ], "wc_strengths_and_weaknesses_avg": [ 99.0, 8.640987597877148 ], "wc_questions_avg": [ 161.66666666666666, 150.55526855241197 ], "wc_limitations_avg": [ 11.0, 8.16496580927726 ], "wc_review_avg": [ 341.3333333333333, 196.61185676917407 ], "wc_reply_reviewers_avg": [ 276.0, 250.0079998720041 ], "wc_reply_authors_avg": [ 1127.0, 688.6513389710839 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7205766921228921, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11882354638998803845&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Max-Planck Institute", "aff_unique_dep": "Informatics", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Saarland", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Gradient Descent: The Ultimate Optimizer", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54591", "id": "-Qp-3L-5ZdI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/36ce475705c1dc6c50a5956cedff3d01-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-Qp-3L-5ZdI", "openreview": "https://openreview.net/forum?id=-Qp-3L-5ZdI", "poster": "/media/PosterPDFs/NeurIPS%202022/77cdfc1e11e36a23bb030892ee00b8cf.png?t=1667186355.3739302", "slides": "https://nips.cc/virtual/2022/poster/54591", "video": "https://nips.cc/virtual/2022/poster/54591", "author_site": "Kartik Chandra, Audrey Xie, Jonathan Ragan-Kelley, ERIK MEIJER", "tldr": "We use gradient descent to tune not only hyperparameters, but also hyper-hyperparameters, and so on\u2026", "abstract": "Working with any gradient-based machine learning algorithm involves the tedious task of tuning the optimizer's hyperparameters, such as its step size. Recent work has shown how the step size can itself be optimized alongside the model parameters by manually deriving expressions for \"hypergradients\" ahead of time.\n\nWe show how to *automatically* compute hypergradients with a simple and elegant modification to backpropagation. This allows us to easily apply the method to other optimizers and hyperparameters (e.g. momentum coefficients). We can even recursively apply the method to its own *hyper*-hyperparameters, and so on ad infinitum. As these towers of optimizers grow taller, they become less sensitive to the initial choice of hyperparameters. We present experiments validating this for MLPs, CNNs, and RNNs. Finally, we provide a simple PyTorch implementation of this algorithm (see http://people.csail.mit.edu/kach/gradient-descent-the-ultimate-optimizer).", "keywords": "automatic differentiation;differentiable programming;hyperparameter optimization", "primary_area": "", "supplementary_material": "", "author": "Kartik Chandra;Audrey Xie;Jonathan Ragan-Kelley;Erik Meijer", "authorids": "~Kartik_Chandra2;~Audrey_Xie1;~Jonathan_Ragan-Kelley1;erikm@fb.com", "gender": ";;M;", "homepage": "https://cs.stanford.edu/~kach/;https://0a.ax;https://people.csail.mit.edu/jrk;", "dblp": "07/5865.html;;;", "google_scholar": "oVcz4nIAAAAJ;;https://scholar.google.com.tw/citations?user=nBcay4oAAAAJ;", "orcid": "0000-0002-1835-3707;;;", "linkedin": ";;;", "or_profile": "~Kartik_Chandra2;~Audrey_Xie1;~Jonathan_Ragan-Kelley1;erikm@fb.com", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of California, Berkeley;", "aff_domain": "mit.edu;mit.edu;berkeley.edu;", "position": "PhD student;Undergrad student;Assistant Professor;", "bibtex": "@inproceedings{\nchandra2022gradient,\ntitle={Gradient Descent: The Ultimate Optimizer},\nauthor={Kartik Chandra and Audrey Xie and Jonathan Ragan-Kelley and Erik Meijer},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-Qp-3L-5ZdI}\n}", "github": "", "project": "", "reviewers": "kxHt;oELp;zqnY", "pdf_size": 1911521, "rating": "6;7;7", "confidence": "4;4;5", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "4;2;4", "contribution": "3;3;4", "wc_summary": "90;205;71", "wc_strengths_and_weaknesses": "448;317;163", "wc_questions": "129;180;25", "wc_limitations": "1;28;8", "wc_review": "668;730;267", "wc_reply_reviewers": "8;34;0", "wc_reply_authors": "480;299;39", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 122.0, 59.200225224796796 ], "wc_strengths_and_weaknesses_avg": [ 309.3333333333333, 116.47698866681301 ], "wc_questions_avg": [ 111.33333333333333, 64.49978466802987 ], "wc_limitations_avg": [ 12.333333333333334, 11.440668201153676 ], "wc_review_avg": [ 555.0, 205.21370974344444 ], "wc_reply_reviewers_avg": [ 14.0, 14.514360704718161 ], "wc_reply_authors_avg": [ 272.6666666666667, 180.99785142985047 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2848747075524374106&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;berkeley.edu;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu", "aff_unique_abbr": "MIT;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Natural Color Fool: Towards Boosting Black-box Unrestricted Attacks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55381", "id": "-T5seeOMnM5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/31d0d59fe946684bb228e9c8e887e176-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-T5seeOMnM5", "openreview": "https://openreview.net/forum?id=-T5seeOMnM5", "poster": "/media/PosterPDFs/NeurIPS%202022/55381.png?t=1669538609.8896146", "slides": "https://nips.cc/virtual/2022/poster/55381", "video": "https://nips.cc/virtual/2022/poster/55381", "author_site": "Shengming Yuan, Qilong Zhang, Lianli Gao, Yaya Cheng, Jingkuan Song", "tldr": "we propose a Natural Color Fool (NCF), which fully exploits color distributions of semantic classes in an image to craft human-imperceptible, flexible, and highly transferable adversarial examples.", "abstract": "Unrestricted color attacks, which manipulate semantically meaningful color of an image, have shown their stealthiness and success in fooling both human eyes and deep neural networks. However, current works usually sacrifice the flexibility of the uncontrolled setting to ensure the naturalness of adversarial examples. As a result, the black-box attack performance of these methods is limited. To boost transferability of adversarial examples without damaging image quality, we propose a novel Natural Color Fool (NCF) which is guided by realistic color distributions sampled from a publicly available dataset and optimized by our neighborhood search and initialization reset. By conducting extensive experiments and visualizations, we convincingly demonstrate the effectiveness of our proposed method. Notably, on average, results show that our NCF can outperform state-of-the-art approaches by 15.0%$\\sim$32.9% for fooling normally trained models and 10.0%$\\sim$25.3% for evading defense methods. Our code is available at https://github.com/VL-Group/Natural-Color-Fool.", "keywords": "unrestricted color attack;transferability;flexible;natural;semantic-based", "primary_area": "", "supplementary_material": "/attachment/63d64c57fcb3329f2f565448ae4ca8922c5e98cf.zip", "author": "Shengming Yuan;Qilong Zhang;Lianli Gao;Yaya Cheng;Jingkuan Song", "authorids": "~Shengming_Yuan1;~Qilong_Zhang2;~Lianli_Gao1;~Yaya_Cheng1;~Jingkuan_Song3", "gender": "M;M;F;F;M", "homepage": "https://github.com/ylhz;;https://lianligao.github.io/;https://github.com/yaya-cheng;https://cfm.uestc.edu.cn/~songjingkuan/", "dblp": "330/4282;22/3730;123/9849.html;;70/10575", "google_scholar": "QjyQOJ8AAAAJ;IgPyQWYAAAAJ;https://scholar.google.com.au/citations?user=zsm2dpYAAAAJ;;F5Zy9V4AAAAJ", "orcid": ";0009-0005-2591-5762;;;", "linkedin": ";;;;", "or_profile": "~Shengming_Yuan1;~Qilong_Zhang2;~Lianli_Gao1;~Yaya_Cheng1;~Jingkuan_Song3", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China, Tsinghua University;University of Electronic Science and Technology of China,", "aff_domain": "uestc.edu.cn;uestc.edu;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "MS student;MS student;Full Professor;MS student;Full Professor", "bibtex": "@inproceedings{\nyuan2022natural,\ntitle={Natural Color Fool: Towards Boosting Black-box Unrestricted Attacks},\nauthor={Shengming Yuan and Qilong Zhang and Lianli Gao and Yaya Cheng and Jingkuan Song},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-T5seeOMnM5}\n}", "github": "", "project": "", "reviewers": "Hmyg;C6Jv;UgYm", "pdf_size": 4008587, "rating": "6;7;7", "confidence": "4;4;5", "soundness": "3;3;2", "novelty": "3;3;2", "presentation": "4;2;3", "contribution": "3;3;2", "wc_summary": "45;155;94", "wc_strengths_and_weaknesses": "188;205;74", "wc_questions": "3;126;84", "wc_limitations": "1;15;24", "wc_review": "237;501;276", "wc_reply_reviewers": "71;14;0", "wc_reply_authors": "1154;970;964", "reply_reviewers": "2;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 98.0, 44.99629614386796 ], "wc_strengths_and_weaknesses_avg": [ 155.66666666666666, 58.16260730820867 ], "wc_questions_avg": [ 71.0, 51.048996072400875 ], "wc_limitations_avg": [ 13.333333333333334, 9.463379711052259 ], "wc_review_avg": [ 338.0, 116.35291143757426 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 30.706495874470747 ], "wc_reply_authors_avg": [ 1029.3333333333333, 88.18667069851821 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1908653488262515792&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uestc.edu.cn;uestc.edu;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Uncalibrated Models Can Improve Human-AI Collaboration", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53892", "id": "-TJpOACwpl5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1968ea7d985aa377e3a610b05fc79be0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-TJpOACwpl5", "openreview": "https://openreview.net/forum?id=-TJpOACwpl5", "poster": "/media/PosterPDFs/NeurIPS%202022/53892.png?t=1669683567.1414075", "slides": "https://nips.cc/virtual/2022/poster/53892", "video": "https://nips.cc/virtual/2022/poster/53892", "author_site": "Kailas Vodrahalli, Tobias Gerstenberg, James Zou", "tldr": "We propose and empirically validate a simple framework for optimizing an AI algorithm with respect to a human end user, resulting in human-calibrated AI that calibrates AI models for human use and outperforms the baseline calibrated AI.", "abstract": "In many practical applications of AI, an AI model is used as a decision aid for human users. The AI provides advice that a human (sometimes) incorporates into their decision-making process. The AI advice is often presented with some measure of \"confidence\" that the human can use to calibrate how much they depend on or trust the advice. In this paper, we present an initial exploration that suggests showing AI models as more confident than they actually are, even when the original AI is well-calibrated, can improve human-AI performance (measured as the accuracy and confidence of the human's final prediction after seeing the AI advice). We first train a model to predict human incorporation of AI advice using data from thousands of human-AI interactions. This enables us to explicitly estimate how to transform the AI's prediction confidence, making the AI uncalibrated, in order to improve the final human prediction. We empirically validate our results across four different tasks---dealing with images, text and tabular data---involving hundreds of human participants. We further support our findings with simulation analysis. Our findings suggest the importance of jointly optimizing the human-AI system as opposed to the standard paradigm of optimizing the AI model alone.", "keywords": "Human-calibrated AI;Human-in-the-loop AI", "primary_area": "", "supplementary_material": "/attachment/96478edaff44a6f21c13e6d335ec372a457f60f5.zip", "author": "Kailas Vodrahalli;Tobias Gerstenberg;James Zou", "authorids": "~Kailas_Vodrahalli1;~Tobias_Gerstenberg1;~James_Zou1", "gender": ";M;", "homepage": ";http://cicl.stanford.edu/member/tobias_gerstenberg;", "dblp": ";;", "google_scholar": "0DeyGMcAAAAJ;d0TfP8EAAAAJ;23ZXZvEAAAAJ", "orcid": ";0000-0002-9162-0779;", "linkedin": ";;", "or_profile": "~Kailas_Vodrahalli1;~Tobias_Gerstenberg1;~James_Zou1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nvodrahalli2022uncalibrated,\ntitle={Uncalibrated Models Can Improve Human-{AI} Collaboration},\nauthor={Kailas Vodrahalli and Tobias Gerstenberg and James Zou},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-TJpOACwpl5}\n}", "github": "", "project": "", "reviewers": "k53k;LVB2;jiKw;4Md1", "pdf_size": 845946, "rating": "5;5;7;7", "confidence": "4;5;4;5", "soundness": "2;3;3;3", "novelty": "3;2;4;3", "presentation": "3;3;3;4", "contribution": "3;2;4;3", "wc_summary": "179;96;87;37", "wc_strengths_and_weaknesses": "356;410;216;36", "wc_questions": "408;76;329;69", "wc_limitations": "175;37;31;30", "wc_review": "1118;619;663;172", "wc_reply_reviewers": "106;0;74;0", "wc_reply_authors": "349;813;729;345", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.75, 50.97732338991525 ], "wc_strengths_and_weaknesses_avg": [ 254.5, 144.66081017331544 ], "wc_questions_avg": [ 220.5, 150.63283174660165 ], "wc_limitations_avg": [ 68.25, 61.69025449777298 ], "wc_review_avg": [ 643.0, 334.82906086539145 ], "wc_reply_reviewers_avg": [ 45.0, 46.400431032480725 ], "wc_reply_authors_avg": [ 559.0, 214.07475329893526 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12469546917170199830&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Active Learning for Multiple Target Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55003", "id": "-V1ITIKPH6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/faacb7a4827b4d51e201666b93ab5fa7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-V1ITIKPH6", "openreview": "https://openreview.net/forum?id=-V1ITIKPH6", "poster": "/media/PosterPDFs/NeurIPS%202022/e58aea67b01fa747687f038dfde066f6.png?t=1666234551.3685231", "slides": "https://nips.cc/virtual/2022/poster/55003", "video": "https://nips.cc/virtual/2022/poster/55003", "author_site": "Ying-Peng Tang, Sheng-Jun Huang", "tldr": "In this paper, we propose to study active learning in a novel setting, where the task is to select and label the most useful examples that are beneficial to multiple target models.", "abstract": "We describe and explore a novel setting of active learning (AL), where there are multiple target models to be learned simultaneously. In many real applications, the machine learning system is required to be deployed on diverse devices with varying computational resources (e.g., workstation, mobile phone, edge devices, etc.), which leads to the demand of training multiple target models on the same labeled dataset. However, it is generally believed that AL is model-dependent and untransferable, i.e., the data queried by one model may be less effective for training another model. This phenomenon naturally raises a question \"Does there exist an AL method that is effective for multiple target models?\" In this paper, we answer this question by theoretically analyzing the label complexity of active and passive learning under the setting with multiple target models, and conclude that AL does have potential to achieve better label complexity under this novel setting. Based on this insight, we further propose an agnostic AL sampling strategy to select the examples located in the joint disagreement regions of different target models. The experimental results on the OCR benchmarks show that the proposed method can significantly surpass the traditional active and passive learning methods under this challenging setting.", "keywords": "active learning;machine learning", "primary_area": "", "supplementary_material": "/attachment/accdd08026be2d57c34f7684fa267f0429d22978.pdf", "author": "Ying-Peng Tang;Sheng-Jun Huang", "authorids": "~Ying-Peng_Tang1;~Sheng-Jun_Huang1", "gender": "M;", "homepage": "http://tangyp.cn/;http://parnec.nuaa.edu.cn/huangsj", "dblp": "234/7906;01/3367.html", "google_scholar": "https://scholar.google.com.hk/citations?user=ohuW7YcAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-1529-9714;0000-0002-7673-5367", "linkedin": ";", "or_profile": "~Ying-Peng_Tang1;~Sheng-Jun_Huang1", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ntang2022active,\ntitle={Active Learning for Multiple Target Models},\nauthor={Ying-Peng Tang and Sheng-Jun Huang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-V1ITIKPH6}\n}", "github": "", "project": "", "reviewers": "mjax;FvK3;rdsY;xpsR", "pdf_size": 372216, "rating": "5;6;6;8", "confidence": "3;1;3;4", "soundness": "4;2;3;3", "novelty": "3;2;3;4", "presentation": "2;2;3;3", "contribution": "3;2;3;4", "wc_summary": "151;89;78;101", "wc_strengths_and_weaknesses": "888;111;117;211", "wc_questions": "71;68;14;57", "wc_limitations": "28;17;1;1", "wc_review": "1138;285;210;370", "wc_reply_reviewers": "76;12;0;0", "wc_reply_authors": "819;344;61;279", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 104.75, 27.913930214142187 ], "wc_strengths_and_weaknesses_avg": [ 331.75, 323.59030810578986 ], "wc_questions_avg": [ 52.5, 22.830900113661748 ], "wc_limitations_avg": [ 11.75, 11.431863365173676 ], "wc_review_avg": [ 500.75, 372.2454667286091 ], "wc_reply_reviewers_avg": [ 22.0, 31.559467676119 ], "wc_reply_authors_avg": [ 375.75, 276.5441872468123 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4736842105263159, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10955208640370240766&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nuaa.edu.cn;nuaa.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": "", "aff_unique_url": "http://www.nuaa.edu.cn", "aff_unique_abbr": "NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Understanding Aesthetics with Language: A Photo Critique Dataset for Aesthetic Assessment", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55632", "id": "-VyJim9UBxQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/dcd18e50ebca0af89187c6e35dabb584-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=-VyJim9UBxQ", "openreview": "https://openreview.net/forum?id=-VyJim9UBxQ", "poster": "/media/PosterPDFs/NeurIPS%202022/4f4adcbf8c6f66dcfc8a3282ac2bf10a.png?t=1666523573.0155501", "slides": "https://nips.cc/virtual/2022/poster/55632", "video": "https://nips.cc/virtual/2022/poster/55632", "author_site": "Daniel Vera Nieto, Luigi Celona, Clara Fernandez Labrador", "tldr": "We propose the Reddit Photo Critique Dataset (RPCD), which contains tuples of image and photo critiques.", "abstract": "Computational inference of aesthetics is an ill-defined task due to its subjective nature. Many datasets have been proposed to tackle the problem by providing pairs of images and aesthetic scores based on human ratings. However, humans are better at expressing their opinion, taste, and emotions by means of language rather than summarizing them in a single number. In fact, photo critiques provide much richer information as they reveal how and why users rate the aesthetics of visual stimuli. In this regard, we propose the Reddit Photo Critique Dataset (RPCD), which contains tuples of image and photo critiques. RPCD consists of 74K images and 220K comments and is collected from a Reddit community used by hobbyists and professional photographers to improve their photography skills by leveraging constructive community feedback. The proposed dataset differs from previous aesthetics datasets mainly in three aspects, namely (i) the large scale of the dataset and the extension of the comments criticizing different aspects of the image, (ii) it contains mostly UltraHD images, and (iii) it can easily be extended to new data as it is collected through an automatic pipeline. To the best of our knowledge, in this work, we propose the first attempt to estimate the aesthetic quality of visual stimuli from the critiques. To this end, we exploit the polarity of the sentiment of criticism as an indicator of aesthetic judgment. We demonstrate how sentiment polarity correlates positively with the aesthetic judgment available for two aesthetic assessment benchmarks. Finally, we experiment with several models by using the sentiment scores as a target for ranking images. Dataset and baselines are available https://github.com/mediatechnologycenter/aestheval.", "keywords": "Image aesthetic assessment;Dataset;Photo critiques;Aesthetic image captioning", "primary_area": "", "supplementary_material": "/attachment/092f4eae1e488f31c7d36b0daf3e20accacae2b0.pdf", "author": "Daniel Vera Nieto;Luigi Celona;Clara Fernandez Labrador", "authorids": "~Daniel_Vera_Nieto1;~Luigi_Celona1;~Clara_Fernandez_Labrador1", "gender": "M;M;", "homepage": "https://dveni.github.io/;http://luigicelona.it;", "dblp": ";176/5396;", "google_scholar": ";F9vDCKAAAAAJ;", "orcid": ";0000-0002-5925-2646;", "linkedin": ";;", "or_profile": "~Daniel_Vera_Nieto1;~Luigi_Celona1;~Clara_Fernandez_Labrador1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;University of Milan - Bicocca;", "aff_domain": "inf.ethz.ch;unimib.it;", "position": "Researcher;Postdoc;", "bibtex": "@inproceedings{\nnieto2022understanding,\ntitle={Understanding Aesthetics with Language: A Photo Critique Dataset for Aesthetic Assessment},\nauthor={Daniel Vera Nieto and Luigi Celona and Clara Fernandez Labrador},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=-VyJim9UBxQ}\n}", "github": "", "project": "", "reviewers": "KEe2;uNZv;8ptL;2Q2e", "pdf_size": 2342193, "rating": "5;6;7;7", "confidence": "4;3;3;2", "wc_summary_and_contributions": "105;76;40;84", "wc_strengths": "38;26;104;46", "wc_weaknesses": "52;35;91;117", "wc_correctness": "2;7;1;65", "wc_clarity": "95;1;1;8", "wc_relation_to_prior_work": "24;16;30;21", "wc_documentation": "112;1;1;4", "wc_additional_feedback": "6;1;1;3", "wc_review": "434;163;269;348", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "474;418;300;503", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 76.25, 23.45607597190971 ], "wc_strengths_avg": [ 53.5, 30.012497396917837 ], "wc_weaknesses_avg": [ 73.75, 32.18209906143476 ], "wc_correctness_avg": [ 18.75, 26.799020504488592 ], "wc_clarity_avg": [ 26.25, 39.79557136164777 ], "wc_relation_to_prior_work_avg": [ 22.75, 5.0682837331783235 ], "wc_documentation_avg": [ 29.5, 47.64714052280577 ], "wc_additional_feedback_avg": [ 2.75, 2.0463381929681126 ], "wc_review_avg": [ 303.5, 99.92622278461245 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 423.75, 77.7057752036488 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4473761993861538929&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "inf.ethz.ch;unimib.it;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;University of Milan", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.unimib.it", "aff_unique_abbr": "ETHZ;UNIMIB", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Zurich;Bicocca", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Italy" }, { "title": "Perturbation Learning Based Anomaly Detection", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54593", "id": "-Xdts90bWZ3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5c261ccdc44fbd32fbb344fa578a1844-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-Xdts90bWZ3", "openreview": "https://openreview.net/forum?id=-Xdts90bWZ3", "poster": "/media/PosterPDFs/NeurIPS%202022/0ab922ba3e948387b4b2a85fcb83d194.png?t=1666003398.9787507", "slides": "https://nips.cc/virtual/2022/poster/54593", "video": "https://nips.cc/virtual/2022/poster/54593", "author_site": "Jinyu Cai, Jicong Fan", "tldr": "", "abstract": "This paper presents a simple yet effective method for anomaly detection. The main idea is to learn small perturbations to perturb normal data and learn a classifier to classify the normal data and the perturbed data into two different classes. The perturbator and classifier are jointly learned using deep neural networks. Importantly, the perturbations should be as small as possible but the classifier is still able to recognize the perturbed data from unperturbed data. Therefore, the perturbed data are regarded as abnormal data and the classifier provides a decision boundary between the normal data and abnormal data, although the training data do not include any abnormal data.\nCompared with the state-of-the-art of anomaly detection, our method does not require any assumption about the shape (e.g. hypersphere) of the decision boundary and has fewer hyper-parameters to determine. Empirical studies on benchmark datasets verify the effectiveness and superiority of our method.", "keywords": "Anomaly detection;Machie learning;Deep learning", "primary_area": "", "supplementary_material": "/attachment/be59510bb5c8129f88f453b03c33c5f97a8f4699.zip", "author": "Jinyu Cai;Jicong Fan", "authorids": "~Jinyu_Cai2;~Jicong_Fan2", "gender": "M;M", "homepage": "https://jinyucai95.github.io/;https://jicongfan.github.io/", "dblp": "223/9427;139/1570", "google_scholar": "g9TVoA0AAAAJ;vdJsnhIAAAAJ", "orcid": "0000-0003-2241-2754;0000-0001-9665-0355", "linkedin": ";", "or_profile": "~Jinyu_Cai2;~Jicong_Fan2", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn", "position": "Intern;Research Assistant Professor", "bibtex": "@inproceedings{\ncai2022perturbation,\ntitle={Perturbation Learning Based Anomaly Detection},\nauthor={Jinyu Cai and Jicong Fan},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-Xdts90bWZ3}\n}", "github": "", "project": "", "reviewers": "dTj9;hfVr;ZTAh", "pdf_size": 1302663, "rating": "4;6;7", "confidence": "4;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "contribution": "2;3;3", "wc_summary": "53;44;40", "wc_strengths_and_weaknesses": "40;100;72", "wc_questions": "204;53;30", "wc_limitations": "1;13;43", "wc_review": "298;210;185", "wc_reply_reviewers": "0;26;0", "wc_reply_authors": "1503;345;1013", "reply_reviewers": "0;1;0", "reply_authors": "3;1;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 45.666666666666664, 5.436502143433364 ], "wc_strengths_and_weaknesses_avg": [ 70.66666666666667, 24.513035081133644 ], "wc_questions_avg": [ 95.66666666666667, 77.17656523985906 ], "wc_limitations_avg": [ 19.0, 17.663521732655695 ], "wc_review_avg": [ 231.0, 48.46304433964778 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 12.256517540566824 ], "wc_reply_authors_avg": [ 953.6666666666666, 474.609547125026 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8243547583806483103&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 9, "email": "cuhk.edu.cn;cuhk.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Why neural networks find simple solutions: The many regularizers of geometric complexity", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54458", "id": "-ZPeUAJlkEu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0ff3502bb29570b219967278db150a50-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-ZPeUAJlkEu", "openreview": "https://openreview.net/forum?id=-ZPeUAJlkEu", "poster": "/media/PosterPDFs/NeurIPS%202022/54458.png?t=1669219266.0058463", "slides": "https://nips.cc/virtual/2022/poster/54458", "video": "https://nips.cc/virtual/2022/poster/54458", "author_site": "Benoit Dherin, Michael Munn, Mihaela Rosca, David Barrett", "tldr": "", "abstract": "In many contexts, simpler models are preferable to more complex models and the control of this model complexity is the goal for many methods in machine learning such as regularization, hyperparameter tuning and architecture design. In deep learning, it has been difficult to understand the underlying mechanisms of complexity control, since many traditional measures are not naturally suitable for deep neural networks. Here we develop the notion of geometric complexity, which is a measure of the variability of the model function, computed using a discrete Dirichlet energy. Using a combination of theoretical arguments and empirical results, we show that many common training heuristics such as parameter norm regularization, spectral norm regularization, flatness regularization, implicit gradient regularization, noise regularization and the choice of parameter initialization all act to control geometric complexity, providing a unifying framework in which to characterize the behavior of deep learning models.", "keywords": "Deep Learning;Deep Learning Theory;Theory;Neural Networks;Regularization;Implicit Regularization;Smoothness;Complexity;Double-Descent", "primary_area": "", "supplementary_material": "/attachment/cf0cced73ef82abee6ce8ac2984530bd818704a8.pdf", "author": "Benoit Dherin;Michael Munn;Mihaela Rosca;David GT Barrett", "authorids": "~Benoit_Dherin1;~Michael_Munn1;~Mihaela_Rosca1;~David_GT_Barrett1", "gender": ";M;F;", "homepage": ";;http://elarosca.net/;", "dblp": ";;https://dblp.org/pers/r/Rosca:Mihaela;", "google_scholar": ";agHn7jkAAAAJ;https://scholar.google.co.uk/citations?user=MxkDwD0AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Benoit_Dherin1;~Michael_Munn1;~Mihaela_Rosca1;~David_GT_Barrett1", "aff": ";Google;Google DeepMind;", "aff_domain": ";google.com;google.com;", "position": ";Researcher;Research Engineer;", "bibtex": "@inproceedings{\ndherin2022why,\ntitle={Why neural networks find simple solutions: The many regularizers of geometric complexity},\nauthor={Benoit Dherin and Michael Munn and Mihaela Rosca and David GT Barrett},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-ZPeUAJlkEu}\n}", "github": "", "project": "", "reviewers": "UHLw;jmMn;wsiC", "pdf_size": 2072407, "rating": "6;7;7", "confidence": "3;4;2", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "3;3;3", "contribution": "3;4;3", "wc_summary": "160;142;57", "wc_strengths_and_weaknesses": "214;406;170", "wc_questions": "246;138;7", "wc_limitations": "47;11;6", "wc_review": "667;697;240", "wc_reply_reviewers": "545;109;22", "wc_reply_authors": "1680;812;395", "reply_reviewers": "2;1;1", "reply_authors": "3;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 119.66666666666667, 44.917207788948275 ], "wc_strengths_and_weaknesses_avg": [ 263.3333333333333, 102.46733896981787 ], "wc_questions_avg": [ 130.33333333333334, 97.72182742640231 ], "wc_limitations_avg": [ 21.333333333333332, 18.263503375736967 ], "wc_review_avg": [ 534.6666666666666, 208.72044035556803 ], "wc_reply_reviewers_avg": [ 225.33333333333334, 228.81190722706913 ], "wc_reply_authors_avg": [ 962.3333333333334, 535.2608917361908 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15168563859991847708&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";google.com;google.com;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Causally motivated multi-shortcut identification and removal", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55227", "id": "-ZQOx6yaVa-", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/536d643875321d6c3282ee8c7ea5eb6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-ZQOx6yaVa-", "openreview": "https://openreview.net/forum?id=-ZQOx6yaVa-", "poster": "/media/PosterPDFs/NeurIPS%202022/5caf41d62364d5b41a893adc1a9dd5d4.png?t=1667150283.947206", "slides": "https://nips.cc/virtual/2022/poster/55227", "video": "https://nips.cc/virtual/2022/poster/55227", "author_site": "Jiayun Zheng, Maggie Makar", "tldr": "We develop a method to identify and remove multiple shortcuts leading to accurate models that are robust to distribution shifts", "abstract": "For predictive models to provide reliable guidance in decision making processes, they are often required to be accurate and robust to distribution shifts. Shortcut learning--where a model relies on spurious correlations or shortcuts to predict the target label--undermines the robustness property, leading to models with poor out-of-distribution accuracy despite good in-distribution performance. Existing work on shortcut learning either assumes that the set of possible shortcuts is known a priori or is discoverable using interpretability methods such as saliency maps, which might not always be true. Instead, we propose a two step approach to (1) efficiently identify relevant shortcuts, and (2) leverage the identified shortcuts to build models that are robust to distribution shifts. Our approach relies on having access to a (possibly) high dimensional set of auxiliary labels at training time, some of which correspond to possible shortcuts. We show both theoretically and empirically that our approach is able to identify a sufficient set of shortcuts leading to more efficient predictors in finite samples.", "keywords": "shortcut learning;spurious correlations;causality", "primary_area": "", "supplementary_material": "/attachment/456d910fbf1c0ce20a001cf8ab44c320f0cf06f3.pdf", "author": "Jiayun Zheng;Maggie Makar", "authorids": "~Jiayun_Zheng1;~Maggie_Makar1", "gender": "F;F", "homepage": "https://www.linkedin.com/in/gretchen-zheng-baa4b8150/;https://mymakar.github.io/", "dblp": ";211/6995", "google_scholar": "B5odBKEAAAAJ;bmlgkM4AAAAJ", "orcid": ";", "linkedin": "jiayun-zheng-baa4b8150/;", "or_profile": "~Jiayun_Zheng1;~Maggie_Makar1", "aff": "Electrical Engineering and Computer Science, University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "eecs.umich.edu;umich.edu", "position": "MS student;Postdoc", "bibtex": "@inproceedings{\nzheng2022causally,\ntitle={Causally motivated multi-shortcut identification and removal},\nauthor={Jiayun Zheng and Maggie Makar},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-ZQOx6yaVa-}\n}", "github": "", "project": "", "reviewers": "1iAR;3Y9R;4zx1", "pdf_size": 729589, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;4;4", "contribution": "2;3;3", "wc_summary": "78;64;49", "wc_strengths_and_weaknesses": "183;376;335", "wc_questions": "59;214;143", "wc_limitations": "1;2;4", "wc_review": "321;656;531", "wc_reply_reviewers": "0;76;5", "wc_reply_authors": "707;829;663", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 11.841546445554407 ], "wc_strengths_and_weaknesses_avg": [ 298.0, 83.02208541506691 ], "wc_questions_avg": [ 138.66666666666666, 63.352628639666996 ], "wc_limitations_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_review_avg": [ 502.6666666666667, 138.22284744410223 ], "wc_reply_reviewers_avg": [ 27.0, 34.708308323320324 ], "wc_reply_authors_avg": [ 733.0, 70.21870595978444 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3811225096568034970&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 8, "email": "eecs.umich.edu;umich.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "Department of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Long-Form Video-Language Pre-Training with Multimodal Temporal Contrastive Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53633", "id": "-Zzi_ZmlDiy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f8290ccc2905538be1a7f7914ccef629-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-Zzi_ZmlDiy", "openreview": "https://openreview.net/forum?id=-Zzi_ZmlDiy", "poster": "/media/PosterPDFs/NeurIPS%202022/53633.png?t=1669627627.6571796", "slides": "https://nips.cc/virtual/2022/poster/53633", "video": "https://nips.cc/virtual/2022/poster/53633", "author_site": "Yuchong Sun, Hongwei Xue, Ruihua Song, Bei Liu, Huan Yang, Jianlong Fu", "tldr": "", "abstract": "Large-scale video-language pre-training has shown significant improvement in video-language understanding tasks. Previous studies of video-language pretraining mainly focus on short-form videos (i.e., within 30 seconds) and sentences, leaving long-form video-language pre-training rarely explored. Directly learning representation from long-form videos and language may benefit many long-form\nvideo-language understanding tasks. However, it is challenging due to the difficulty of modeling long-range relationships and the heavy computational burden caused by more frames. In this paper, we introduce a Long-Form VIdeo-LAnguage pre-training model (LF-VILA) and train it on a large-scale long-form video and paragraph dataset constructed from an existing public dataset. To effectively capture\nthe rich temporal dynamics and to better align video and language in an efficient end-to-end manner, we introduce two novel designs in our LF-VILA model. We first propose a Multimodal Temporal Contrastive (MTC) loss to learn the temporal relation across different modalities by encouraging fine-grained alignment between long-form videos and paragraphs. Second, we propose a Hierarchical Temporal Window Attention (HTWA) mechanism to effectively capture long-range dependency while reducing computational cost in Transformer. We fine-tune the pre-trained LF-VILA model on seven downstream long-form video-language understanding tasks of paragraph-to-video retrieval and long-form video question-answering, and achieve new state-of-the-art performances. Specifically, our model achieves 16.1% relative improvement on ActivityNet paragraph-to-video retrieval task and 2.4% on How2QA task, respectively. We release our code, dataset, and pre-trained models at https://github.com/microsoft/XPretrain.\n", "keywords": "video-language pre-training", "primary_area": "", "supplementary_material": "/attachment/f2c21feb613807429b0e47868a22a8cd699dd96d.pdf", "author": "Yuchong Sun;Hongwei Xue;Ruihua Song;Bei Liu;Huan Yang;Jianlong Fu", "authorids": "~Yuchong_Sun1;~Hongwei_Xue1;~Ruihua_Song1;~Bei_Liu2;~Huan_Yang4;~Jianlong_Fu1", "gender": "M;;F;F;M;M", "homepage": ";https://hellwayxue.github.io/;;https://www.microsoft.com/en-us/research/people/libei/;https://hyang0511.github.io/;", "dblp": "206/8045;272/6488;s/RuihuaSong;39/3711-1;86/4843-5;83/8692", "google_scholar": "DuSxNqgAAAAJ;k5CJa5YAAAAJ;v5LctN8AAAAJ;7IZyaZsAAAAJ;https://scholar.google.com/citations?hl=en;-WqSwu8AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yuchong_Sun1;~Hongwei_Xue1;~Ruihua_Song1;~Bei_Liu2;~Huan_Yang4;~Jianlong_Fu1", "aff": "Renmin University of China;University of Science and Technology of China;Renmin University of China;Microsoft Research Asia;Microsoft;Microsoft", "aff_domain": "ruc.edu.cn;ustc.edu.cn;ruc.edu.cn;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;PhD student;Associate Professor;Researcher;Senior Researcher;Senior Researcher", "bibtex": "@inproceedings{\nsun2022longform,\ntitle={Long-Form Video-Language Pre-Training with Multimodal Temporal Contrastive Learning},\nauthor={Yuchong Sun and Hongwei Xue and Ruihua Song and Bei Liu and Huan Yang and Jianlong Fu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-Zzi_ZmlDiy}\n}", "github": "", "project": "", "reviewers": "ZX7j;rUgm;1qEi;oRe6", "pdf_size": 790471, "rating": "4;4;6;7", "confidence": "5;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "contribution": "2;2;3;3", "wc_summary": "44;43;48;72", "wc_strengths_and_weaknesses": "298;210;202;341", "wc_questions": "22;6;3;3", "wc_limitations": "10;1;11;1", "wc_review": "374;260;264;417", "wc_reply_reviewers": "0;147;18;138", "wc_reply_authors": "871;980;210;958", "reply_reviewers": "0;2;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 51.75, 11.840080236214618 ], "wc_strengths_and_weaknesses_avg": [ 262.75, 58.81910828973864 ], "wc_questions_avg": [ 8.5, 7.88986691902975 ], "wc_limitations_avg": [ 5.75, 4.763139720814412 ], "wc_review_avg": [ 328.75, 68.47399141279848 ], "wc_reply_reviewers_avg": [ 75.75, 67.12814238454689 ], "wc_reply_authors_avg": [ 754.75, 317.14143138353904 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14516544053429726965&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ruc.edu.cn;ustc.edu.cn;ruc.edu.cn;microsoft.com;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0;2;2;2", "aff_unique_norm": "Renmin University of China;University of Science and Technology of China;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "http://www.ruc.edu.cn;http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "RUC;USTC;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "On the difficulty of learning chaotic dynamics with RNNs", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53371", "id": "-_AMpmyV0Ll", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/495e55f361708bedbab5d81f92048dcd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-_AMpmyV0Ll", "openreview": "https://openreview.net/forum?id=-_AMpmyV0Ll", "poster": "/media/PosterPDFs/NeurIPS%202022/2686b822a1b95a0940e608accafd292a.png?t=1667844293.1104252", "slides": "https://nips.cc/virtual/2022/poster/53371", "video": "https://nips.cc/virtual/2022/poster/53371", "author_site": "Jonas Mikhaeil, Zahra Monfared, Daniel Durstewitz", "tldr": "", "abstract": "Recurrent neural networks (RNNs) are wide-spread machine learning tools for modeling sequential and time series data. They are notoriously hard to train because their loss gradients backpropagated in time tend to saturate or diverge during training. This is known as the exploding and vanishing gradient problem. Previous solutions to this issue either built on rather complicated, purpose-engineered architectures with gated memory buffers, or - more recently - imposed constraints that ensure convergence to a fixed point or restrict (the eigenspectrum of) the recurrence matrix. Such constraints, however, convey severe limitations on the expressivity of the RNN. Essential intrinsic dynamics such as multistability or chaos are disabled. This is inherently at disaccord with the chaotic nature of many, if not most, time series encountered in nature and society. It is particularly problematic in scientific applications where one aims to reconstruct the underlying dynamical system. \nHere we offer a comprehensive theoretical treatment of this problem by relating the loss gradients during RNN training to the Lyapunov spectrum of RNN-generated orbits. We mathematically prove that RNNs producing stable equilibrium or cyclic behavior have bounded gradients, whereas the gradients of RNNs with chaotic dynamics always diverge. \nBased on these analyses and insights we suggest ways of how to optimize the training process on chaotic data according to the system's Lyapunov spectrum, regardless of the employed RNN architecture. ", "keywords": "Recurrent neural networks;Dynamical systems;Attractors;Time series analysis;Chaos;Exploding and vanishing gradient problem;Teacher forcing", "primary_area": "", "supplementary_material": "/attachment/e951ec1c2fea0ffa147c99b2ee2fdeccb3e8d20c.pdf", "author": "Jonas Magdy Mikhaeil;Zahra Monfared;Daniel Durstewitz", "authorids": "~Jonas_Magdy_Mikhaeil1;~Zahra_Monfared1;~Daniel_Durstewitz1", "gender": "M;F;", "homepage": ";;https://durstewitzlab.github.io", "dblp": ";;98/2120", "google_scholar": ";https://scholar.google.pl/citations?user=OPUIwIoAAAAJ;https://scholar.google.de/citations?user=2bcbKU0AAAAJ", "orcid": "0000-0001-6745-7505;;0000-0002-9340-3786", "linkedin": ";;", "or_profile": "~Jonas_Magdy_Mikhaeil1;~Zahra_Monfared1;~Daniel_Durstewitz1", "aff": "Heidelberg University;Heidelberg University(STRUCTURES);Heidelberg University", "aff_domain": "uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de", "position": "MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nmikhaeil2022on,\ntitle={On the difficulty of learning chaotic dynamics with {RNN}s},\nauthor={Jonas Magdy Mikhaeil and Zahra Monfared and Daniel Durstewitz},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-_AMpmyV0Ll}\n}", "github": "", "project": "", "reviewers": "Dedw;94xf;okuc;3EGL", "pdf_size": 5162693, "rating": "7;7;7;9", "confidence": "4;3;2;4", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "4;3;3;4", "contribution": "3;4;3;4", "wc_summary": "280;168;81;54", "wc_strengths_and_weaknesses": "283;293;114;219", "wc_questions": "384;58;37;217", "wc_limitations": "26;1;1;6", "wc_review": "973;520;233;496", "wc_reply_reviewers": "113;4;0;0", "wc_reply_authors": "887;466;492;461", "reply_reviewers": "1;1;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 145.75, 88.21670760122484 ], "wc_strengths_and_weaknesses_avg": [ 227.25, 71.28244875142829 ], "wc_questions_avg": [ 174.0, 139.7980686561871 ], "wc_limitations_avg": [ 8.5, 10.307764064044152 ], "wc_review_avg": [ 555.5, 266.0418200208381 ], "wc_reply_reviewers_avg": [ 29.25, 48.380652124583854 ], "wc_reply_authors_avg": [ 576.5, 179.65313801879444 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1853395383421685801&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Heidelberg University", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-heidelberg.de", "aff_unique_abbr": "Uni Heidelberg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Look where you look! Saliency-guided Q-networks for generalization in visual Reinforcement Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53263", "id": "-_I3i2orAV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c5ee2a08fbe743b171b0b4b2bdfd6f86-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-_I3i2orAV", "openreview": "https://openreview.net/forum?id=-_I3i2orAV", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53263", "video": "https://nips.cc/virtual/2022/poster/53263", "author_site": "David Bertoin, Adil Zouitine, Mehdi Zouitine, Emmanuel Rachelson", "tldr": "We present a generic method improving generalization for visual reinforcement learning based on attribution maps.", "abstract": "Deep reinforcement learning policies, despite their outstanding efficiency in simulated visual control tasks, have shown disappointing ability to generalize across disturbances in the input training images. \nChanges in image statistics or distracting background elements are pitfalls that prevent generalization and real-world applicability of such control policies.\nWe elaborate on the intuition that a good visual policy should be able to identify which pixels are important for its decision, and preserve this identification of important sources of information across images. \nThis implies that training of a policy with small generalization gap should focus on such important pixels and ignore the others. \nThis leads to the introduction of saliency-guided Q-networks (SGQN), a generic method for visual reinforcement learning, that is compatible with any value function learning method. \nSGQN vastly improves the generalization capability of Soft Actor-Critic agents and outperforms existing state-of-the-art methods on the Deepmind Control Generalization benchmark, setting a new reference in terms of training efficiency, generalization gap, and policy interpretability.", "keywords": "Reinforcement learning;Generalization", "primary_area": "", "supplementary_material": "/attachment/4e380c66d215291285ca7001300bca2b9ff17218.zip", "author": "David Bertoin;Adil Zouitine;Mehdi Zouitine;Emmanuel Rachelson", "authorids": "~David_Bertoin1;~Adil_Zouitine1;~Mehdi_Zouitine3;~Emmanuel_Rachelson1", "gender": "M;;M;M", "homepage": "https://davidbert.github.io/;;;https://personnel.isae-supaero.fr/emmanuel-rachelson", "dblp": ";281/6912;;52/6241", "google_scholar": "oAZZ-o4AAAAJ;https://scholar.google.fr/citations?user=jw4_zowAAAAJ;lXodf6YAAAAJ;https://scholar.google.fr/citations?user=KtG9BSgAAAAJ", "orcid": ";;;0000-0002-8559-1617", "linkedin": ";;mehdizouitine/;emmanuelrachelson/", "or_profile": "~David_Bertoin1;~Adil_Zouitine1;~Mehdi_Zouitine3;~Emmanuel_Rachelson1", "aff": "Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;Universit\u00e9 Paul Sabatier (Toulouse III);Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace", "aff_domain": "isae-supaero.fr;isae-supaero.fr;ups-tlse.fr;isae-supaero.fr", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbertoin2022look,\ntitle={Look where you look! Saliency-guided Q-networks for generalization in visual Reinforcement Learning},\nauthor={David Bertoin and Adil Zouitine and Mehdi Zouitine and Emmanuel Rachelson},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-_I3i2orAV}\n}", "github": "", "project": "", "reviewers": "S2Er;1cpS;zoMV", "pdf_size": 2387716, "rating": "5;5;7", "confidence": "5;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "4;2;3", "contribution": "3;3;3", "wc_summary": "63;64;131", "wc_strengths_and_weaknesses": "257;482;183", "wc_questions": "48;106;234", "wc_limitations": "26;21;53", "wc_review": "394;673;601", "wc_reply_reviewers": "9;0;11", "wc_reply_authors": "995;1942;672", "reply_reviewers": "1;0;1", "reply_authors": "3;4;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.0, 31.822423959633664 ], "wc_strengths_and_weaknesses_avg": [ 307.3333333333333, 127.14908135290986 ], "wc_questions_avg": [ 129.33333333333334, 77.70599862444483 ], "wc_limitations_avg": [ 33.333333333333336, 14.055445761538676 ], "wc_review_avg": [ 556.0, 118.26242006656214 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 4.784233364802441 ], "wc_reply_authors_avg": [ 1203.0, 538.9328962558017 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17499474331599164597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 25, "email": "isae-supaero.fr;isae-supaero.fr;ups-tlse.fr;isae-supaero.fr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;Universit\u00e9 Paul Sabatier", "aff_unique_dep": ";", "aff_unique_url": "https://www.isae-supaero.fr;https://www.unipaulsabatier.fr", "aff_unique_abbr": "ISAE-SUPAERO;UPS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toulouse", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Structural Kernel Search via Bayesian Optimization and Symbolical Optimal Transport", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55074", "id": "-bLLVk-WRPy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ff7373914a96956f2a7cacbdf3b0b8d8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-bLLVk-WRPy", "openreview": "https://openreview.net/forum?id=-bLLVk-WRPy", "poster": "/media/PosterPDFs/NeurIPS%202022/a00e5eb0973d24649a4a920fc53d9564.png?t=1667825174.8978808", "slides": "https://nips.cc/virtual/2022/poster/55074", "video": "https://nips.cc/virtual/2022/poster/55074", "author_site": "Matthias Bitzer, Mona Meister, Christoph Zimmer", "tldr": "We propose a new method for kernel selection for Gaussian processes, where the distance between two GPs is measured using their associated symbolic description of the statistical hypothesis.", "abstract": "Despite recent advances in automated machine learning, model selection is still a complex and computationally intensive process. For Gaussian processes (GPs), selecting the kernel is a crucial task, often done manually by the expert. Additionally, evaluating the model selection criteria for Gaussian processes typically scales cubically in the sample size, rendering kernel search particularly computationally expensive. We propose a novel, efficient search method through a general, structured kernel space. Previous methods solved this task via Bayesian optimization and relied on measuring the distance between GP's directly in function space to construct a kernel-kernel. We present an alternative approach by defining a kernel-kernel over the symbolic representation of the statistical hypothesis that is associated with a kernel. We empirically show that this leads to a computationally more efficient way of searching through a discrete kernel space.", "keywords": "Bayesian Optimization;Gaussian Process;Kernel Search;Kernel", "primary_area": "", "supplementary_material": "/attachment/6a94c5208745d4954ae1b91b51ddc34a6705f640.zip", "author": "Matthias Bitzer;Mona Meister;Christoph Zimmer", "authorids": "~Matthias_Bitzer1;~Mona_Meister1;~Christoph_Zimmer1", "gender": "M;F;", "homepage": ";;", "dblp": ";192/1464;", "google_scholar": ";;", "orcid": ";;", "linkedin": "matthias-bitzer;;", "or_profile": "~Matthias_Bitzer1;~Mona_Meister1;~Christoph_Zimmer1", "aff": "Robert Bosch GmbH, Bosch;Robert Bosch GmbH;", "aff_domain": "de.bosch.com;bosch.com;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nbitzer2022structural,\ntitle={Structural Kernel Search via Bayesian Optimization and Symbolical Optimal Transport},\nauthor={Matthias Bitzer and Mona Meister and Christoph Zimmer},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-bLLVk-WRPy}\n}", "github": "", "project": "", "reviewers": "iFD5;PF7c;Cka7;i6Db", "pdf_size": 1104022, "rating": "7;7;7;7", "confidence": "3;3;4;3", "soundness": "4;3;4;3", "novelty": "3;3;3;3", "presentation": "3;2;4;3", "contribution": "3;3;3;3", "wc_summary": "158;58;48;194", "wc_strengths_and_weaknesses": "136;154;91;260", "wc_questions": "92;188;79;98", "wc_limitations": "6;20;9;14", "wc_review": "392;420;227;566", "wc_reply_reviewers": "14;126;0;48", "wc_reply_authors": "452;749;628;680", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 114.5, 62.90270264464 ], "wc_strengths_and_weaknesses_avg": [ 160.25, 61.99344723436502 ], "wc_questions_avg": [ 114.25, 43.129891026989625 ], "wc_limitations_avg": [ 12.25, 5.3091901453988255 ], "wc_review_avg": [ 401.25, 120.35650169392595 ], "wc_reply_reviewers_avg": [ 47.0, 48.83646178829912 ], "wc_reply_authors_avg": [ 627.25, 109.90763167314634 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5846774864854783055&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "de.bosch.com;bosch.com;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Robert Bosch GmbH", "aff_unique_dep": "", "aff_unique_url": "https://www.bosch.com", "aff_unique_abbr": "Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Tree ensemble kernels for Bayesian optimization with known constraints over mixed-feature spaces", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53330", "id": "-cBZMMTImxT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f3398b76d17792893ce6d4f660546353-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-cBZMMTImxT", "openreview": "https://openreview.net/forum?id=-cBZMMTImxT", "poster": "/media/PosterPDFs/NeurIPS%202022/53330.png?t=1670740100.3043733", "slides": "https://nips.cc/virtual/2022/poster/53330", "video": "https://nips.cc/virtual/2022/poster/53330", "author_site": "Alexander Thebelt, Calvin Tsay, Robert Lee, Nathan Sudermann-Merx, David Walz, Behrang Shafei, Ruth Misener", "tldr": "We use tree kernel Gaussian processes for Bayesian optimization to simultaneously incorporate: a reliable uncertainty metric in mixed features and known constraints.", "abstract": "Tree ensembles can be well-suited for black-box optimization tasks such as algorithm tuning and neural architecture search, as they achieve good predictive performance with little or no manual tuning, naturally handle discrete feature spaces, and are relatively insensitive to outliers in the training data. Two well-known challenges in using tree ensembles for black-box optimization are (i) effectively quantifying model uncertainty for exploration and (ii) optimizing over the piece-wise constant acquisition function. To address both points simultaneously, we propose using the kernel interpretation of tree ensembles as a Gaussian Process prior to obtain model variance estimates, and we develop a compatible optimization formulation for the acquisition function. The latter further allows us to seamlessly integrate known constraints to improve sampling efficiency by considering domain-knowledge in engineering settings and modeling search space symmetries, e.g., hierarchical relationships in neural architecture search. Our framework performs as well as state-of-the-art methods for unconstrained black-box optimization over continuous/discrete features and outperforms competing methods for problems combining mixed-variable feature spaces and known input constraints.", "keywords": "Bayesian Optimization;Tree Ensembles;Global Optimization;Known Constraints;Black-box Optimization;Mixed-Variable Spaces;Hybrid Spaces", "primary_area": "", "supplementary_material": "/attachment/b1285cf924f7b3e5e1d8040bf34e827a50869aff.pdf", "author": "Alexander Thebelt;Calvin Tsay;Robert Matthew Lee;Nathan Sudermann-Merx;David Walz;Behrang Shafei;Ruth Misener", "authorids": "~Alexander_Thebelt1;~Calvin_Tsay1;~Robert_Matthew_Lee1;~Nathan_Sudermann-Merx1;~David_Walz1;~Behrang_Shafei1;~Ruth_Misener1", "gender": ";;M;M;M;M;F", "homepage": ";https://www.imperial.ac.uk/people/c.tsay;http://basf.net;https://sites.google.com/site/nathansudermannmerx/home?authuser=0;;;https://wp.doc.ic.ac.uk/rmisener/", "dblp": ";204/0777;;;305/7982;;04/8800", "google_scholar": "4hb39y4AAAAJ;i59BQe0AAAAJ;;https://scholar.google.de/citations?user=DkPcFNwAAAAJ;SAzirToAAAAJ;;AQxtWHoAAAAJ", "orcid": ";;;;0000-0001-8126-5315;;0000-0001-5612-5417", "linkedin": ";;;;walzds;behrang-shafei/;ruth-misener/", "or_profile": "~Alexander_Thebelt1;~Calvin_Tsay1;~Robert_Matthew_Lee1;~Nathan_Sudermann-Merx1;~David_Walz1;~Behrang_Shafei1;~Ruth_Misener1", "aff": "Imperial College London;Imperial College London;BASF SE;Duale Hochschule Baden-Wuerttemberg Mannheim;BASF;BASF;Imperial College London", "aff_domain": "ic.ac.uk;imperial.ac.uk;basf.com;dhbw-mannheim.de;basf.com;basf.com;imperial.ac.uk", "position": "PhD student;Researcher;Researcher;Professor;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nthebelt2022tree,\ntitle={Tree ensemble kernels for Bayesian optimization with known constraints over mixed-feature spaces},\nauthor={Alexander Thebelt and Calvin Tsay and Robert Matthew Lee and Nathan Sudermann-Merx and David Walz and Behrang Shafei and Ruth Misener},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-cBZMMTImxT}\n}", "github": "", "project": "", "reviewers": "NYXt;94qs;PeTE", "pdf_size": 2035382, "rating": "5;6;8", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "4;2;4", "contribution": "2;3;4", "wc_summary": "63;60;151", "wc_strengths_and_weaknesses": "274;58;901", "wc_questions": "83;339;264", "wc_limitations": "22;6;14", "wc_review": "442;463;1330", "wc_reply_reviewers": "28;0;0", "wc_reply_authors": "325;727;674", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 91.33333333333333, 42.20847729491737 ], "wc_strengths_and_weaknesses_avg": [ 411.0, 357.5276213105779 ], "wc_questions_avg": [ 228.66666666666666, 107.45645112737014 ], "wc_limitations_avg": [ 14.0, 6.531972647421808 ], "wc_review_avg": [ 745.0, 413.7462990771035 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 13.199326582148887 ], "wc_reply_authors_avg": [ 575.3333333333334, 178.32990650913143 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3169264550141799964&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "ic.ac.uk;imperial.ac.uk;basf.com;dhbw-mannheim.de;basf.com;basf.com;imperial.ac.uk", "author_num": 7, "aff_unique_index": "0;0;1;2;1;1;0", "aff_unique_norm": "Imperial College London;BASF SE;Duale Hochschule Baden-W\u00fcrttemberg Mannheim", "aff_unique_dep": ";;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.basf.com;https://www.dhbw-mannheim.de", "aff_unique_abbr": "ICL;BASF;DHBW Mannheim", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mannheim", "aff_country_unique_index": "0;0;1;1;1;1;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "RankFeat: Rank-1 Feature Removal for Out-of-distribution Detection", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55298", "id": "-deKNiSOXLG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/71c9eb0913e6c7fda3afd69c914b1a0c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-deKNiSOXLG", "openreview": "https://openreview.net/forum?id=-deKNiSOXLG", "poster": "/media/PosterPDFs/NeurIPS%202022/024d7f84fff11dd7e8d9c510137a2381.png?t=1666097698.371804", "slides": "https://nips.cc/virtual/2022/poster/55298", "video": "https://nips.cc/virtual/2022/poster/55298", "author_site": "Yue Song, Nicu Sebe, Wei Wang", "tldr": "We propose a simple yet effective post hoc OOD detection method by removing the rank-1 matrix from the high-level feature.", "abstract": "The task of out-of-distribution (OOD) detection is crucial for deploying machine learning models in real-world settings. In this paper, we observe that the singular value distributions of the in-distribution (ID) and OOD features are quite different: the OOD feature matrix tends to have a larger dominant singular value than the ID feature, and the class predictions of OOD samples are largely determined by it. This observation motivates us to propose RankFeat, a simple yet effective post hoc approach for OOD detection by removing the rank-1 matrix composed of the largest singular value and the associated singular vectors from the high-level feature. RankFeat achieves state-of-the-art performance and reduces the average false positive rate (FPR95) by 17.90% compared with the previous best method. Extensive ablation studies and comprehensive theoretical analyses are presented to support the empirical results.", "keywords": "out-of-distribution detection;distribution shifts", "primary_area": "", "supplementary_material": "/attachment/a95b0bfce887b6ae2f67dcc3eab6061c0e6000e9.pdf", "author": "Yue Song;Nicu Sebe;Wei Wang", "authorids": "~Yue_Song1;~Nicu_Sebe1;~Wei_Wang43", "gender": "M;M;M", "homepage": "https://kingjamessong.github.io/;http://disi.unitn.it/~sebe/;https://weiwangtrento.github.io/", "dblp": "11/1346;20/3519;35/7092-108", "google_scholar": "Uza2i10AAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;https://scholar.google.com/citations?hl=en-US", "orcid": ";0000-0002-6597-7248;0000-0002-5477-1017", "linkedin": ";;", "or_profile": "~Yue_Song1;~Nicu_Sebe1;~Wei_Wang43", "aff": "University of Trento, Italy;University of Trento;University of Trento", "aff_domain": "unitn.it;unitn.it;unitn.it", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsong2022rankfeat,\ntitle={RankFeat: Rank-1 Feature Removal for Out-of-distribution Detection},\nauthor={Yue Song and Nicu Sebe and Wei Wang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-deKNiSOXLG}\n}", "github": "", "project": "", "reviewers": "dq6u;SMeo;Ft9S;kEL5", "pdf_size": 8201049, "rating": "3;5;6;6", "confidence": "5;4;5;4", "soundness": "1;1;3;3", "novelty": "2;1;3;3", "presentation": "3;3;2;3", "contribution": "2;1;3;3", "wc_summary": "34;104;45;77", "wc_strengths_and_weaknesses": "351;216;294;94", "wc_questions": "7;174;245;110", "wc_limitations": "1;76;76;11", "wc_review": "393;570;660;292", "wc_reply_reviewers": "977;711;831;0", "wc_reply_authors": "3642;2200;1734;881", "reply_reviewers": "4;10;3;0", "reply_authors": "8;9;5;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.0, 1.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 65.0, 27.504545078950134 ], "wc_strengths_and_weaknesses_avg": [ 238.75, 96.33632492471362 ], "wc_questions_avg": [ 134.0, 87.50142855976695 ], "wc_limitations_avg": [ 41.0, 35.17811819867572 ], "wc_review_avg": [ 478.75, 144.40113399831733 ], "wc_reply_reviewers_avg": [ 629.75, 375.5897862029797 ], "wc_reply_authors_avg": [ 2114.25, 1000.8582254745174 ], "reply_reviewers_avg": [ 4.25, 3.6314597615834874 ], "reply_authors_avg": [ 6.0, 2.7386127875258306 ], "replies_avg": [ 48, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15686388667832765832&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "unitn.it;unitn.it;unitn.it", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Trento", "aff_unique_dep": "", "aff_unique_url": "https://www.unitn.it", "aff_unique_abbr": "UniTN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "Adaptively Exploiting d-Separators with Causal Bandits", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55050", "id": "-e2SBzFDE8x", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/801ec05b0aae9fcd2ef35c168bd538e0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-e2SBzFDE8x", "openreview": "https://openreview.net/forum?id=-e2SBzFDE8x", "poster": "/media/PosterPDFs/NeurIPS%202022/55050.png?t=1669134332.8585856", "slides": "https://nips.cc/virtual/2022/poster/55050", "video": "https://nips.cc/virtual/2022/poster/55050", "author_site": "Blair Bilodeau, Linbo Wang, Dan Roy", "tldr": "We provide a novel algorithm that exploits causal structure when it exists while simultaneously achieving sub-linear regret in the worst case.", "abstract": "Multi-armed bandit problems provide a framework to identify the optimal intervention over a sequence of repeated experiments. Without additional assumptions, minimax optimal performance (measured by cumulative regret) is well-understood. With access to additional observed variables that d-separate the intervention from the outcome (i.e., they are a d-separator), recent \"causal bandit\" algorithms provably incur less regret. However, in practice it is desirable to be agnostic to whether observed variables are a d-separator. Ideally, an algorithm should be adaptive; that is, perform nearly as well as an algorithm with oracle knowledge of the presence or absence of a d-separator. In this work, we formalize and study this notion of adaptivity, and provide a novel algorithm that simultaneously achieves (a) optimal regret when a d-separator is observed, improving on classical minimax algorithms, and (b) significantly smaller regret than recent causal bandit algorithms when the observed variables are not a d-separator. Crucially, our algorithm does not require any oracle knowledge of whether a d-separator is observed. We also generalize this adaptivity to other conditions, such as the front-door criterion.", "keywords": "bandit;causal bandit;adaptive;d-separation;online", "primary_area": "", "supplementary_material": "/attachment/6df0795e520d87cf43bfefa893555c04a441a4d8.pdf", "author": "Blair Bilodeau;Linbo Wang;Daniel M. Roy", "authorids": "~Blair_Bilodeau1;~Linbo_Wang2;~Daniel_M._Roy1", "gender": "M;M;M", "homepage": "http://www.blairbilodeau.ca;https://sites.google.com/site/linbowangpku/;http://danroy.org", "dblp": ";73/10697.html;04/2068", "google_scholar": ";3Svu_OEAAAAJ;https://scholar.google.ca/citations?user=vA6ZQ_AAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Blair_Bilodeau1;~Linbo_Wang2;~Daniel_M_Roy1", "aff": "University of Toronto;University of Toronto;University of Toronto", "aff_domain": "toronto.edu;utoronto.ca;utoronto.ca", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbilodeau2022adaptively,\ntitle={Adaptively Exploiting d-Separators with Causal Bandits},\nauthor={Blair Bilodeau and Linbo Wang and Daniel M. Roy},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-e2SBzFDE8x}\n}", "github": "", "project": "", "reviewers": "GygT;tTqa;XZzq;KRsJ", "pdf_size": 604538, "rating": "7;7;7;8", "confidence": "3;4;3;4", "soundness": "3;4;3;4", "novelty": "3;4;3;4", "presentation": "3;3;3;4", "contribution": "3;4;3;4", "wc_summary": "131;125;224;136", "wc_strengths_and_weaknesses": "326;257;52;187", "wc_questions": "58;37;11;6", "wc_limitations": "25;1;10;33", "wc_review": "540;420;297;362", "wc_reply_reviewers": "110;0;0;17", "wc_reply_authors": "303;528;55;540", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 154.0, 40.601724101323576 ], "wc_strengths_and_weaknesses_avg": [ 205.5, 101.33730803608314 ], "wc_questions_avg": [ 28.0, 20.940391591371924 ], "wc_limitations_avg": [ 17.25, 12.497499749949988 ], "wc_review_avg": [ 404.75, 89.39064548374175 ], "wc_reply_reviewers_avg": [ 31.75, 45.70763065397286 ], "wc_reply_authors_avg": [ 356.5, 198.0208322374189 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10113006239041370847&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "toronto.edu;utoronto.ca;utoronto.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Causal Inference with Non-IID Data using Linear Graphical Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54378", "id": "-eHlU74N9E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5573c63e8a89e32086e5c71cf0cc8fe4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-eHlU74N9E", "openreview": "https://openreview.net/forum?id=-eHlU74N9E", "poster": "/media/PosterPDFs/NeurIPS%202022/54378.png?t=1669845315.2119298", "slides": "https://nips.cc/virtual/2022/poster/54378", "video": "https://nips.cc/virtual/2022/poster/54378", "author_site": "Chi Zhang, Karthika Mohan, Judea Pearl", "tldr": "", "abstract": "Traditional causal inference techniques assume data are independent and identically distributed (IID) and thus ignores interactions among units. However, a unit\u2019s treatment may affect another unit's outcome (interference), a unit\u2019s treatment may be correlated with another unit\u2019s outcome, or a unit\u2019s treatment and outcome may be spuriously correlated through another unit. To capture such nuances, we model the data generating process using causal graphs and conduct a systematic analysis of the bias caused by different types of interactions when computing causal effects. We derive theorems to detect and quantify the interaction bias, and derive conditions under which it is safe to ignore interactions. Put differently, we present conditions under which causal effects can be computed with negligible bias by assuming that samples are IID. Furthermore, we develop a method to eliminate bias in cases where blindly assuming IID is expected to yield a significantly biased estimate. Finally, we test the coverage and performance of our methods through simulations.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/d06221e2a6db0dc1a5390c66f4e3d23acd582a14.zip", "author": "Chi Zhang;Karthika Mohan;Judea Pearl", "authorids": "~Chi_Zhang23;~Karthika_Mohan1;~Judea_Pearl1", "gender": ";;", "homepage": "https://www.linkedin.com/in/zccc/;http://karthikamohan.com;", "dblp": "91/195-16;;p/JudeaPearl", "google_scholar": "f5z0A_0AAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chi_Zhang23;~Karthika_Mohan1;~Judea_Pearl1", "aff": "University of California, Los Angeles;Oregon State University;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;oregonstate.edu;ucla.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2022causal,\ntitle={Causal Inference with Non-{IID} Data using Linear Graphical Models},\nauthor={Chi Zhang and Karthika Mohan and Judea Pearl},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-eHlU74N9E}\n}", "github": "", "project": "", "reviewers": "xxrS;wxC8;bAan", "pdf_size": 525132, "rating": "3;6;7", "confidence": "1;3;2", "soundness": "2;3;3", "novelty": "2;4;3", "presentation": "1;2;2", "contribution": "2;4;3", "wc_summary": "57;60;60", "wc_strengths_and_weaknesses": "76;93;174", "wc_questions": "25;33;113", "wc_limitations": "46;15;21", "wc_review": "204;201;368", "wc_reply_reviewers": "20;19;84", "wc_reply_authors": "776;190;626", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 2.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 59.0, 1.4142135623730951 ], "wc_strengths_and_weaknesses_avg": [ 114.33333333333333, 42.7577153531643 ], "wc_questions_avg": [ 57.0, 39.73243846867024 ], "wc_limitations_avg": [ 27.333333333333332, 13.424687043734847 ], "wc_review_avg": [ 257.6666666666667, 78.02706083290734 ], "wc_reply_reviewers_avg": [ 41.0, 30.40833219146796 ], "wc_reply_authors_avg": [ 530.6666666666666, 248.54957028506183 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7205766921228921, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14413779145119174364&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.ucla.edu;oregonstate.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Los Angeles;Oregon State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://oregonstate.edu", "aff_unique_abbr": "UCLA;OSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Locating and Editing Factual Associations in GPT", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53864", "id": "-h6WAS6eE4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6f1d43d5a82a37e89b0665b33bf3a182-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-h6WAS6eE4", "openreview": "https://openreview.net/forum?id=-h6WAS6eE4", "poster": "/media/PosterPDFs/NeurIPS%202022/53864.png?t=1669613096.3705087", "slides": "https://nips.cc/virtual/2022/poster/53864", "video": "https://nips.cc/virtual/2022/poster/53864", "author_site": "Kevin Meng, David Bau, Alex Andonian, Yonatan Belinkov", "tldr": "We locate and edit the mechanisms underlying factual association within the activations and weights of large pretrained GPT models.", "abstract": "We analyze the storage and recall of factual associations in autoregressive transformer language models, finding evidence that these associations correspond to localized, directly-editable computations. We first develop a causal intervention for identifying neuron activations that are decisive in a model's factual predictions. This reveals a distinct set of steps in middle-layer feed-forward modules that mediate factual predictions while processing subject tokens. To test our hypothesis that these computations correspond to factual association recall, we modify feed-forward weights to update specific factual associations using Rank-One Model Editing (ROME). We find that ROME is effective on a standard zero-shot relation extraction (zsRE) model-editing task, comparable to existing methods. To perform a more sensitive evaluation, we also evaluate ROME on a new dataset of counterfactual assertions, on which it simultaneously maintains both specificity and generalization, whereas other methods sacrifice one or another. Our results confirm an important role for mid-layer feed-forward modules in storing factual associations and suggest that direct manipulation of computational mechanisms may be a feasible approach for model editing. The code, dataset, visualizations, and an interactive demo notebook are available in the supplemental materials.", "keywords": "interpretability;NLP;transformers;GPT", "primary_area": "", "supplementary_material": "/attachment/afce9ce0d17429564dda0c4d2dd20f0cbc171ac4.pdf", "author": "Kevin Meng;David Bau;Alex J Andonian;Yonatan Belinkov", "authorids": "~Kevin_Meng1;~David_Bau1;~Alex_J_Andonian1;~Yonatan_Belinkov1", "gender": "M;M;M;M", "homepage": "https://mengk.me/;https://baulab.info/;;https://www.belinkov.com", "dblp": "06/8478;47/3614;;136/8705", "google_scholar": "UcZbFroAAAAJ;CYI6cKgAAAAJ;;https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ", "orcid": ";0000-0003-1744-6765;;", "linkedin": "kmeng01/;david-bau-4b8130/;;", "or_profile": "~Kevin_Meng1;~David_Bau1;~Alex_J_Andonian1;~Yonatan_Belinkov1", "aff": "Northeastern University;Harvard University;Massachusetts Institute of Technology;Technion, Technion", "aff_domain": "neu.edu;harvard.edu;mit.edu;technion.ac.il", "position": "Researcher;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmeng2022locating,\ntitle={Locating and Editing Factual Associations in {GPT}},\nauthor={Kevin Meng and David Bau and Alex J Andonian and Yonatan Belinkov},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-h6WAS6eE4}\n}", "github": "", "project": "", "reviewers": "gUgP;avsv;1iyP", "pdf_size": 2014385, "rating": "4;7;7", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;4;4", "contribution": "2;3;3", "wc_summary": "49;66;271", "wc_strengths_and_weaknesses": "219;56;158", "wc_questions": "10;96;58", "wc_limitations": "21;13;35", "wc_review": "299;231;522", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "497;260;465", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 128.66666666666666, 100.8838716324644 ], "wc_strengths_and_weaknesses_avg": [ 144.33333333333334, 67.24251300743865 ], "wc_questions_avg": [ 54.666666666666664, 35.188381921057726 ], "wc_limitations_avg": [ 23.0, 9.092121131323903 ], "wc_review_avg": [ 350.6666666666667, 124.29087746983777 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 407.3333333333333, 104.99629623097293 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1226, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6676170860106418721&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "neu.edu;harvard.edu;mit.edu;technion.ac.il", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Northeastern University;Harvard University;Massachusetts Institute of Technology;Technion - Israel Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.northeastern.edu;https://www.harvard.edu;https://web.mit.edu;https://www.technion.ac.il/en/", "aff_unique_abbr": "NEU;Harvard;MIT;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Flowification: Everything is a normalizing flow", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53090", "id": "-jnE7sxuMm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/e6c5195dac675f03d0fcf3955bcdd3c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-jnE7sxuMm", "openreview": "https://openreview.net/forum?id=-jnE7sxuMm", "poster": "/media/PosterPDFs/NeurIPS%202022/a0d448ac4426dc3bdd609ed804e7af1a.png?t=1667475978.9410498", "slides": "https://nips.cc/virtual/2022/poster/53090", "video": "https://nips.cc/virtual/2022/poster/53090", "author_site": "B\u00e1lint M\u00e1t\u00e9, Samuel Klein, Tobias Golling, Fran\u00e7ois Fleuret", "tldr": "We show that multi-layer perceptrons and convolutional networks can be trained as normalizing flows to maximise the likelihood of data directly.", "abstract": "The two key characteristics of a normalizing flow is that it is invertible (in particular, dimension preserving) and that it monitors the amount by which it changes the likelihood of data points as samples are propagated along the network. Recently, multiple generalizations of normalizing flows have been introduced that relax these two conditions \\citep{nielsen2020survae,huang2020augmented}. On the other hand, neural networks only perform a forward pass on the input, there is neither a notion of an inverse of a neural network nor is there one of its likelihood contribution. In this paper we argue that certain neural network architectures can be enriched with a stochastic inverse pass and that their likelihood contribution can be monitored in a way that they fall under the generalized notion of a normalizing flow mentioned above. We term this enrichment \\emph{flowification}. We prove that neural networks only containing linear and convolutional layers and invertible activations such as LeakyReLU can be flowified and evaluate them in the generative setting on image datasets.", "keywords": "Normalizing flows", "primary_area": "", "supplementary_material": "/attachment/8b47a57cbe76fe4902b8b7b1ed04c5cfcee37990.zip", "author": "B\u00e1lint M\u00e1t\u00e9;Samuel Klein;Tobias Golling;Fran\u00e7ois Fleuret", "authorids": "~B\u00e1lint_M\u00e1t\u00e91;~Samuel_Klein1;tobias.golling@unige.ch;~Fran\u00e7ois_Fleuret2", "gender": ";M;;", "homepage": "https://balintmate.github.io;https://www.unige.ch/dpnc/en/members/samuel-klein/;;", "dblp": "301/7700;;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~B\u00e1lint_M\u00e1t\u00e91;~Samuel_Klein1;tobias.golling@unige.ch;~Fran\u00e7ois_Fleuret2", "aff": "University of Geneva;University of Geneva, Switzerland;;", "aff_domain": "unige.ch;unige.ch;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nm{\\'a}t{\\'e}2022flowification,\ntitle={Flowification: Everything is a normalizing flow},\nauthor={B{\\'a}lint M{\\'a}t{\\'e} and Samuel Klein and Tobias Golling and Fran{\\c{c}}ois Fleuret},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-jnE7sxuMm}\n}", "github": "", "project": "", "reviewers": "rJok;LKEV;PUhx;Kcoo", "pdf_size": 4455147, "rating": "4;5;6;7", "confidence": "3;4;3;2", "soundness": "1;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "contribution": "2;3;3;3", "wc_summary": "55;69;68;49", "wc_strengths_and_weaknesses": "388;584;100;135", "wc_questions": "62;3;39;104", "wc_limitations": "8;3;31;73", "wc_review": "513;659;238;361", "wc_reply_reviewers": "220;238;0;93", "wc_reply_authors": "734;947;266;273", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.25, 8.525696452489967 ], "wc_strengths_and_weaknesses_avg": [ 301.75, 197.23890970090054 ], "wc_questions_avg": [ 52.0, 36.65378561622251 ], "wc_limitations_avg": [ 28.75, 27.643941470058138 ], "wc_review_avg": [ 442.75, 158.35462576129564 ], "wc_reply_reviewers_avg": [ 137.75, 97.2017875350037 ], "wc_reply_authors_avg": [ 555.0, 295.2752952754429 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10643002561590578659&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "unige.ch;unige.ch;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Geneva", "aff_unique_dep": "", "aff_unique_url": "https://www.unige.ch", "aff_unique_abbr": "UNIGE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "-kS21GWVJU", "title": "Meta-sketch: A Neural Data Structure for Estimating Item Frequencies of Data Streams", "track": "main", "status": "Reject", "tldr": "", "abstract": "To estimate item frequencies of data streams with limited space, sketches are widely used in real applications, including real-time web analytics, network monitoring, and self-driving. Sketches can be viewed as a model which maps the identifier of a stream item to the corresponding frequency domain. Starting from the premise, we envision a neural data structure, which we term the meta-sketch, to go beyond the basic structure of conventional sketches. The meta-sketch learns basic sketching abilities from meta-tasks constituted with synthetic datasets following Zipf distributions in the pre-training phase and can be fast adapted to real (skewed) distributions in the adaption phase. Extensive experiments demonstrate the performance gains of the meta-sketch and offer insights into our proposals.\n", "keywords": "Data streams;Sketches;Meta-learning;Memory-augmented neural network", "primary_area": "", "supplementary_material": "/attachment/524f9954fd7288aaa351eb70f0a0bc791b53b1b2.zip", "author": "Yukun Cao;Yuan Feng;Xike Xie", "authorids": "~Yukun_Cao1;~Yuan_Feng4;~Xike_Xie1", "gender": "M;;M", "homepage": "https://caoyukunustc.github.io/;;http://staff.ustc.edu.cn/~xkxie", "dblp": "96/5464;;64/1308", "google_scholar": ";;", "orcid": ";;", "linkedin": ";https://www.linkedin.cn/incareer/in/%E6%BA%90-%E5%86%AF-a281021b4;", "or_profile": "~Yukun_Cao1;~Yuan_Feng4;~Xike_Xie1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;MS student;Research Professor", "bibtex": "@misc{\ncao2022metasketch,\ntitle={Meta-sketch: A Neural Data Structure for Estimating Item Frequencies of Data Streams},\nauthor={Yukun Cao and Yuan Feng and Xike Xie},\nyear={2022},\nurl={https://openreview.net/forum?id=-kS21GWVJU}\n}", "github": "", "project": "", "reviewers": "uz3r;4vDr;XQdP", "site": "https://openreview.net/forum?id=-kS21GWVJU", "pdf_size": 536343, "rating": "4;7;7", "confidence": "5;3;4", "soundness": "2;4;4", "novelty": "2;3;4", "presentation": "3;3;4", "contribution": "2;3;4", "wc_summary": "50;382;117", "wc_strengths_and_weaknesses": "138;58;129", "wc_questions": "42;97;127", "wc_limitations": "1;24;24", "wc_review": "231;561;397", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "774;600;701", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 183.0, 143.34806125883486 ], "wc_strengths_and_weaknesses_avg": [ 108.33333333333333, 35.78019315518325 ], "wc_questions_avg": [ 88.66666666666667, 35.19785346990479 ], "wc_limitations_avg": [ 16.333333333333332, 10.842303978193728 ], "wc_review_avg": [ 396.3333333333333, 134.72276059457147 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 691.6666666666666, 71.34112107021838 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11362209411586496261&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Explaining Preferences with Shapley Values", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55152", "id": "-me36V0os8P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b1656d20067ca7c84a33785c4083a75e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-me36V0os8P", "openreview": "https://openreview.net/forum?id=-me36V0os8P", "poster": "/media/PosterPDFs/NeurIPS%202022/55152.png?t=1668368368.4247785", "slides": "https://nips.cc/virtual/2022/poster/55152", "video": "https://nips.cc/virtual/2022/poster/55152", "author_site": "Robert Hu, Siu Lun Chau, Jaime Ferrando Huertas, Dino Sejdinovic", "tldr": "We propose Pref-SHAP to explain Preference Learning, even when data is not rankable", "abstract": "While preference modelling is becoming one of the pillars of machine learning, the problem of preference explanation remains challenging and underexplored. In this paper, we propose \\textsc{Pref-SHAP}, a Shapley value-based model explanation framework for pairwise comparison data. We derive the appropriate value functions for preference models and further extend the framework to model and explain \\emph{context specific} information, such as the surface type in a tennis game. To demonstrate the utility of \\textsc{Pref-SHAP}, we apply our method to a variety of synthetic and real-world datasets and show that richer and more insightful explanations can be obtained over the baseline.", "keywords": "Interpretability;Preference Learning;Kernel;Shapley Values;RKHS", "primary_area": "", "supplementary_material": "/attachment/ff65dd1dcd25fc091dfb05a19b0cc3a954f4abb7.pdf", "author": "Robert Hu;Siu Lun Chau;Jaime Ferrando Huertas;Dino Sejdinovic", "authorids": "~Robert_Hu1;~Siu_Lun_Chau1;~Jaime_Ferrando_Huertas1;~Dino_Sejdinovic1", "gender": "M;M;M;M", "homepage": "http://mlcs.stats.ox.ac.uk/people/hu_r/;https://chau999.github.io/;https://imjai.me;https://sejdino.github.io/", "dblp": ";264/9823;;31/1783", "google_scholar": "SaxR4ugAAAAJ;e7ZBlIsAAAAJ;;v8Dg1lIAAAAJ", "orcid": ";;;0000-0001-5547-9213", "linkedin": ";;;https://linkedin.com/in/dinosejdinovic", "or_profile": "~Robert_Hu1;~Siu_Lun_Chau1;~Jaime_Ferrando_Huertas1;~Dino_Sejdinovic1", "aff": "University of Oxford;University of Oxford;;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;;oxford.ac.uk", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nhu2022explaining,\ntitle={Explaining Preferences with Shapley Values},\nauthor={Robert Hu and Siu Lun Chau and Jaime Ferrando Huertas and Dino Sejdinovic},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-me36V0os8P}\n}", "github": "", "project": "", "reviewers": "wsLb;eoAJ;jJ4D;TiH3", "pdf_size": 1882811, "rating": "5;6;6;8", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;2;4", "presentation": "3;4;3;4", "contribution": "2;2;2;4", "wc_summary": "56;115;59;138", "wc_strengths_and_weaknesses": "300;972;288;128", "wc_questions": "82;319;86;12", "wc_limitations": "10;200;2;58", "wc_review": "448;1606;435;336", "wc_reply_reviewers": "0;120;83;0", "wc_reply_authors": "1172;566;741;165", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 92.0, 35.46124645299429 ], "wc_strengths_and_weaknesses_avg": [ 422.0, 324.7214190656354 ], "wc_questions_avg": [ 124.75, 115.94691673347765 ], "wc_limitations_avg": [ 67.5, 79.44022910339572 ], "wc_review_avg": [ 706.25, 521.2736205679316 ], "wc_reply_reviewers_avg": [ 50.75, 52.408849443581566 ], "wc_reply_authors_avg": [ 661.0, 361.44224988232907 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13809288685377851579&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;ox.ac.uk;;oxford.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Parallel Tempering With a Variational Reference", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54860", "id": "-o0kPsyzErW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/03cd3cf3f74d4f9ce5958de269960884-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-o0kPsyzErW", "openreview": "https://openreview.net/forum?id=-o0kPsyzErW", "poster": "/media/PosterPDFs/NeurIPS%202022/9edcc1391c208ba0b503fe9a22574251.png?t=1667066324.3901713", "slides": "https://nips.cc/virtual/2022/poster/54860", "video": "https://nips.cc/virtual/2022/poster/54860", "author_site": "Nikola Surjanovic, Saifuddin Syed, Alexandre Bouchard-C\u00f4t\u00e9, Trevor Campbell", "tldr": "To effectively sample from complex target distributions, we introduce parallel tempering with an annealing path starting from a variational reference. The reference is tuned to lie close to the target distribution using a gradient-free procedure.", "abstract": "Sampling from complex target distributions is a challenging task fundamental to Bayesian inference. Parallel tempering (PT) addresses this problem by constructing a Markov chain on the expanded state space of a sequence of distributions interpolating between the posterior distribution and a fixed reference distribution, which is typically chosen to be the prior. However, in the typical case where the prior and posterior are nearly mutually singular, PT methods are computationally prohibitive. In this work we address this challenge by constructing a generalized annealing path connecting the posterior to an adaptively tuned variational reference. The reference distribution is tuned to minimize the forward (inclusive) KL divergence to the posterior distribution using a simple, gradient-free moment-matching procedure. We show that our adaptive procedure converges to the forward KL minimizer, and that the forward KL divergence serves as a good proxy to a previously developed measure of PT performance. We also show that in the large-data limit in typical Bayesian models, the proposed method improves in performance, while traditional PT deteriorates arbitrarily. Finally, we introduce PT with two references---one fixed, one variational---with a novel split annealing path that ensures stable variational reference adaptation. The paper concludes with experiments that demonstrate the large empirical gains achieved by our method in a wide range of realistic Bayesian inference scenarios.", "keywords": "Bayesian inference;parallel tempering;variational inference;Markov chain Monte Carlo", "primary_area": "", "supplementary_material": "/attachment/956fa7772f7ac7cf529690ba6a5afcc016088453.pdf", "author": "Nikola Surjanovic;Saifuddin Syed;Alexandre Bouchard-Cote;Trevor Campbell", "authorids": "~Nikola_Surjanovic1;~Saifuddin_Syed1;~Alexandre_Bouchard-Cote1;~Trevor_Campbell1", "gender": "M;M;M;M", "homepage": "https://nikola-sur.netlify.app/;;https://www.stat.ubc.ca/~bouchard/papers.html;https://trevorcampbell.me", "dblp": "346/0912;;52/3912;130/3822", "google_scholar": "wjkTE9MAAAAJ;;;", "orcid": "; 0000-0002-8499-8255;;", "linkedin": ";;;", "or_profile": "~Nikola_Surjanovic1;~Saifuddin_Syed1;~Alexandre_Bouchard-Cote1;~Trevor_Campbell1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia", "aff_domain": "ubc.ca;ubc.ca;ubc.ca;ubc.ca", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsurjanovic2022parallel,\ntitle={Parallel Tempering With a Variational Reference},\nauthor={Nikola Surjanovic and Saifuddin Syed and Alexandre Bouchard-Cote and Trevor Campbell},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-o0kPsyzErW}\n}", "github": "", "project": "", "reviewers": "BxuQ;hxQQ;LMG3", "pdf_size": 3642167, "rating": "4;5;7", "confidence": "5;2;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;4;3", "contribution": "2;2;3", "wc_summary": "80;86;54", "wc_strengths_and_weaknesses": "226;184;122", "wc_questions": "24;154;196", "wc_limitations": "1;19;27", "wc_review": "331;443;399", "wc_reply_reviewers": "262;0;0", "wc_reply_authors": "1290;1026;864", "reply_reviewers": "3;0;0", "reply_authors": "4;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 73.33333333333333, 13.888444437333106 ], "wc_strengths_and_weaknesses_avg": [ 177.33333333333334, 42.71871824960211 ], "wc_questions_avg": [ 124.66666666666667, 73.21809126772487 ], "wc_limitations_avg": [ 15.666666666666666, 10.873004286866726 ], "wc_review_avg": [ 391.0, 46.07240678178932 ], "wc_reply_reviewers_avg": [ 87.33333333333333, 123.50798444725031 ], "wc_reply_authors_avg": [ 1060.0, 175.56765077883796 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.49999999999999994, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7643430986545795668&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "ubc.ca;ubc.ca;ubc.ca;ubc.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Fine-Tuning Pre-Trained Language Models Effectively by Optimizing Subnetworks Adaptively", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54017", "id": "-r6-WNKfyhW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/869bfd807a513755bef25e3896a19a21-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-r6-WNKfyhW", "openreview": "https://openreview.net/forum?id=-r6-WNKfyhW", "poster": "/media/PosterPDFs/NeurIPS%202022/195f15384c2a79cedf293e4a847ce85c.png?t=1666433451.509833", "slides": "https://nips.cc/virtual/2022/poster/54017", "video": "https://nips.cc/virtual/2022/poster/54017", "author_site": "Haojie Zhang, Ge Li, Jia Li, Zhongjin Zhang, YUQI ZHU, Zhi Jin", "tldr": "", "abstract": "Large-scale pre-trained language models have achieved impressive results on a wide range of downstream tasks recently. However, fine-tuning an extremely large-scale pre-trained language model on limited target datasets is often plagued by overfitting and representation degradation. In this paper, we propose a Dynamic Parameter Selection (DPS) algorithm for the large-scale pre-trained models during fine-tuning, which adaptively selects a more promising subnetwork to perform staging updates based on gradients of back-propagation. \nExperiments on the GLUE benchmark show that DPS outperforms previous fine-tuning methods in terms of overall performance and stability, and consistently achieves better results with variable pre-trained language models. In addition, DPS brings a large magnitude of improvement in out-of-domain transferring experiments and low-resource scenarios, which shows that it can maintain stable general contextual features and reduce the representation collapse. We release our code at \\url{https://github.com/ZhangHaojie077/DPS}.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5171898cd61f8cfc36fa119ac597862551a8bd30.pdf", "author": "Zhang Haojie;Ge Li;Jia Li;Zhongjin Zhang;YUQI ZHU;Zhi Jin", "authorids": "~Zhang_Haojie1;~Ge_Li4;~Jia_Li14;~Zhongjin_Zhang1;~YUQI_ZHU1;~Zhi_Jin1", "gender": "M;M;M;M;;F", "homepage": "http://jttdjs.com;https://ligechina.github.io;https://lj2lijia.github.io/;https://github.com/ZJZ1223;;http://faculty.pku.edu.cn/zhijin/en/index.htm", "dblp": ";24/712-1;23/6950-11.html;;;22/3510", "google_scholar": ";PPqcVRwAAAAJ;https://scholar.google.com.sg/citations?user=Us0ZgUcAAAAJ;;;https://scholar.google.com.tw/citations?user=ZC7SObAAAAAJ", "orcid": ";;0000-0002-5579-8852;;;0000-0003-1087-226X", "linkedin": ";;;;;", "or_profile": "~Zhang_Haojie1;~Ge_Li4;~Jia_Li14;~Zhongjin_Zhang1;~YUQI_ZHU1;~Zhi_Jin1", "aff": "Peking University;Peking University;Peking University;Peking University;;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;;pku.edu.cn", "position": "MS student;Full Professor;PhD student;MS student;;Full Professor", "bibtex": "@inproceedings{\nhaojie2022finetuning,\ntitle={Fine-Tuning Pre-Trained Language Models Effectively by Optimizing Subnetworks Adaptively},\nauthor={Zhang Haojie and Ge Li and Jia Li and Zhongjin Zhang and YUQI ZHU and Zhi Jin},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-r6-WNKfyhW}\n}", "github": "", "project": "", "reviewers": "SWtE;BfEh;kBeJ;sChX", "pdf_size": 367533, "rating": "6;7;7;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "contribution": "2;2;3;3", "wc_summary": "199;51;98;95", "wc_strengths_and_weaknesses": "260;166;86;145", "wc_questions": "170;114;208;51", "wc_limitations": "183;20;76;1", "wc_review": "812;351;468;292", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "860;463;522;344", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.75, 54.24193488436783 ], "wc_strengths_and_weaknesses_avg": [ 164.25, 62.57944950221278 ], "wc_questions_avg": [ 135.75, 59.26371149362821 ], "wc_limitations_avg": [ 70.0, 70.82725464113373 ], "wc_review_avg": [ 480.75, 201.46386152359932 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 547.25, 191.61077083504466 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=204679375623303358&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MOVE: Unsupervised Movable Object Segmentation and Detection", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54177", "id": "-t9FUWW5f3u", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d7eb232f196124894f2e65b9010a5c57-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-t9FUWW5f3u", "openreview": "https://openreview.net/forum?id=-t9FUWW5f3u", "poster": "/media/PosterPDFs/NeurIPS%202022/54177.png?t=1669204557.8995426", "slides": "https://nips.cc/virtual/2022/poster/54177", "video": "https://nips.cc/virtual/2022/poster/54177", "author_site": "Adam Bielski, Paolo Favaro", "tldr": "SotA on unsupervised: saliency segmentation, object discovery and class-agnostic object detection", "abstract": "We introduce MOVE, a novel method to segment objects without any form of supervision. MOVE exploits the fact that foreground objects can be shifted locally relative to their initial position and result in realistic (undistorted) new images. This property allows us to train a segmentation model on a dataset of images without annotation and to achieve state of the art (SotA) performance on several evaluation datasets for unsupervised salient object detection and segmentation. In unsupervised single object discovery, MOVE gives an average CorLoc improvement of 7.2% over the SotA, and in unsupervised class-agnostic object detection it gives a relative AP improvement of 53% on average. Our approach is built on top of self-supervised features (e.g. from DINO or MAE), an inpainting network (based on the Masked AutoEncoder) and adversarial training.", "keywords": "Object Discovery;Saliency Detection;Object Segmentation;Object Detection;Self-Supervised Learning;Unsupervised Learning", "primary_area": "", "supplementary_material": "/attachment/76f9b85abeb8b431ef0ae4f91b23c600d169afb0.pdf", "author": "Adam Bielski;Paolo Favaro", "authorids": "~Adam_Bielski1;~Paolo_Favaro1", "gender": ";M", "homepage": ";http://cvg.unibe.ch", "dblp": "215/3579;02/4162", "google_scholar": ";w_XDRRsAAAAJ", "orcid": ";0000-0003-3546-8247", "linkedin": ";paolo-favaro-25765b4", "or_profile": "~Adam_Bielski1;~Paolo_Favaro1", "aff": "Institute f\u00fcr Informatics, Universit\u00e4t Bern;Institute f\u00fcr Informatik, University of Bern", "aff_domain": "unibe.ch;unibe.ch", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nbielski2022move,\ntitle={{MOVE}: Unsupervised Movable Object Segmentation and Detection},\nauthor={Adam Bielski and Paolo Favaro},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-t9FUWW5f3u}\n}", "github": "", "project": "", "reviewers": "tM6J;tSAq;9LXw", "pdf_size": 4851935, "rating": "3;8;8", "confidence": "5;4;5", "soundness": "2;4;4", "novelty": "1;4;4", "presentation": "1;4;4", "contribution": "1;4;4", "wc_summary": "103;267;120", "wc_strengths_and_weaknesses": "107;222;124", "wc_questions": "161;49;115", "wc_limitations": "14;10;1", "wc_review": "385;548;360", "wc_reply_reviewers": "901;0;0", "wc_reply_authors": "2190;440;611", "reply_reviewers": "4;0;0", "reply_authors": "5;1;1", "rating_avg": [ 6.333333333333333, 2.357022603955158 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 1.4142135623730951 ], "presentation_avg": [ 3.0, 1.4142135623730951 ], "contribution_avg": [ 3.0, 1.4142135623730951 ], "wc_summary_avg": [ 163.33333333333334, 73.631213188128 ], "wc_strengths_and_weaknesses_avg": [ 151.0, 50.68201521907615 ], "wc_questions_avg": [ 108.33333333333333, 45.966171135835204 ], "wc_limitations_avg": [ 8.333333333333334, 5.436502143433364 ], "wc_review_avg": [ 431.0, 83.35866281717016 ], "wc_reply_reviewers_avg": [ 300.3333333333333, 424.7354732327196 ], "wc_reply_authors_avg": [ 1080.3333333333333, 787.752217106087 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8173455362624893467&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "unibe.ch;unibe.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e4t Bern;University of Bern", "aff_unique_dep": "Institute f\u00fcr Informatics;Institute for Computer Science", "aff_unique_url": "https://www.inf.unibe.ch;https://www.unibe.ch", "aff_unique_abbr": ";UniBE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Active Learning Polynomial Threshold Functions", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53868", "id": "-uezmSLXVoE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/99015a2974664cb9db56844d0f27b5a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-uezmSLXVoE", "openreview": "https://openreview.net/forum?id=-uezmSLXVoE", "poster": "/media/PosterPDFs/NeurIPS%202022/53868.png?t=1669671652.2190666", "slides": "https://nips.cc/virtual/2022/poster/53868", "video": "https://nips.cc/virtual/2022/poster/53868", "author_site": "Omri Ben-Eliezer, Max Hopkins, Chutong Yang, Hantao Yu", "tldr": "We study active learning polynomial threshold functions where the learner may query the sign of underlying derivatives.", "abstract": "We initiate the study of active learning polynomial threshold functions (PTFs). While traditional lower bounds imply that even univariate quadratics cannot be non-trivially actively learned, we show that allowing the learner basic access to the derivatives of the underlying classifier circumvents this issue and leads to a computationally efficient algorithm for active learning degree-$d$ univariate PTFs in $\\tilde{O}(d^3\\log(1/\\varepsilon\\delta))$ queries. We extend this result to the batch active setting, providing a smooth transition between query complexity and rounds of adaptivity, and also provide near-optimal algorithms for active learning PTFs in several average case settings. Finally, we prove that access to derivatives is insufficient for active learning multivariate PTFs, even those of just two variables.", "keywords": "Statistical Learning Theory;Active Learning;Polynomial Threshold Functions;Enriched Queries", "primary_area": "", "supplementary_material": "/attachment/8715aeb4163e1f9dd2d6e4e9a395d8fe2492234d.pdf", "author": "Omri Ben-Eliezer;Max Hopkins;Chutong Yang;Hantao Yu", "authorids": "~Omri_Ben-Eliezer1;~Max_Hopkins1;~Chutong_Yang1;~Hantao_Yu1", "gender": ";M;M;M", "homepage": ";http://cseweb.ucsd.edu/~nmhopkin/;https://chutongyang98.github.io/;https://www.hantaoyu.org/", "dblp": ";206/6755;241/1151;312/6165", "google_scholar": ";https://scholar.google.com/citations?hl=en;9BBSgO4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Omri_Ben-Eliezer1;~Max_Hopkins1;~Chutong_Yang1;~Hantao_Yu1", "aff": ";University of California, San Diego;Stanford University;University of California, San Diego", "aff_domain": ";ucsd.edu;stanford.edu;ucsd.edu", "position": ";PhD student;MS student;Undergrad student", "bibtex": "@inproceedings{\nben-eliezer2022active,\ntitle={Active Learning Polynomial Threshold Functions},\nauthor={Omri Ben-Eliezer and Max Hopkins and Chutong Yang and Hantao Yu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-uezmSLXVoE}\n}", "github": "", "project": "", "reviewers": "LNSA;up2u;SBJT;RjhJ", "pdf_size": 265261, "rating": "6;7;8;8", "confidence": "4;3;4;3", "soundness": "4;3;4;4", "novelty": "3;3;4;3", "presentation": "3;4;4;4", "contribution": "3;3;4;3", "wc_summary": "175;36;174;259", "wc_strengths_and_weaknesses": "160;137;136;215", "wc_questions": "12;160;71;27", "wc_limitations": "2;14;71;5", "wc_review": "349;347;452;506", "wc_reply_reviewers": "0;8;6;6", "wc_reply_authors": "701;156;207;256", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 161.0, 79.99062445061921 ], "wc_strengths_and_weaknesses_avg": [ 162.0, 32.07023542164915 ], "wc_questions_avg": [ 67.5, 57.638962516686576 ], "wc_limitations_avg": [ 23.0, 28.062430400804562 ], "wc_review_avg": [ 413.5, 68.22939249326495 ], "wc_reply_reviewers_avg": [ 5.0, 3.0 ], "wc_reply_authors_avg": [ 330.0, 217.0956010609151 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9948179586457087544&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";ucsd.edu;stanford.edu;ucsd.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, San Diego;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.stanford.edu", "aff_unique_abbr": "UCSD;Stanford", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "San Diego;Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Provable General Function Class Representation Learning in Multitask Bandits and MDP", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54246", "id": "-uxUxmlr3qT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/4b121e627d3c5683f312ad168988f3f0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-uxUxmlr3qT", "openreview": "https://openreview.net/forum?id=-uxUxmlr3qT", "poster": "/media/PosterPDFs/NeurIPS%202022/6b493230205f780e1bc26945df7481e5.png?t=1667543814.6672235", "slides": "https://nips.cc/virtual/2022/poster/54246", "video": "https://nips.cc/virtual/2022/poster/54246", "author_site": "Rui Lu, Andrew Zhao, Simon Du, Gao Huang", "tldr": "Extend the theoretical analysis from linear to general non-linear function classes for the benefit of multitask representation learning in bandits and MDPs.", "abstract": " While multitask representation learning has become a popular approach in reinforcement learning (RL) to boost the sample efficiency, the theoretical understanding of why and how it works is still limited. Most previous analytical works could only assume that the representation function is already known to the agent or from linear function class, since analyzing general function class representation encounters non-trivial technical obstacles such as generalization guarantee, formulation of confidence bound in abstract function space, etc. However, linear-case analysis heavily relies on the particularity of linear function class, while real-world practice usually adopts general non-linear representation functions like neural networks. This significantly reduces its applicability. In this work, we extend the analysis to general function class representations. Specifically, we consider an agent playing $M$ contextual bandits (or MDPs) concurrently and extracting a shared representation function $\\phi$ from a specific function class $\\Phi$ using our proposed Generalized Functional Upper Confidence Bound algorithm (GFUCB). We theoretically validate the benefit of multitask representation learning within general function class for bandits and linear MDP for the first time. Lastly, we conduct experiments to demonstrate the effectiveness of our algorithm with neural net representation.", "keywords": "reinforcement learning;multi-task;representation learning;theory", "primary_area": "", "supplementary_material": "/attachment/f332133262a23fc84bf74849639c89463d52db5c.pdf", "author": "Rui Lu;Andrew Zhao;Simon Shaolei Du;Gao Huang", "authorids": "~Rui_Lu2;~Andrew_Zhao1;~Simon_Shaolei_Du1;~Gao_Huang1", "gender": "M;M;M;M", "homepage": ";https://andrewzh112.github.io;http://simonshaoleidu.com;http://www.gaohuang.net", "dblp": ";170/0026;176/5602;", "google_scholar": "upMvIv4AAAAJ;Tlt5xsYAAAAJ;OttawxUAAAAJ;-P9LwcgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Rui_Lu2;~Andrew_Zhao1;~Simon_Shaolei_Du1;~Gao_Huang1", "aff": "Department of Automation, Tsinghua University;Automation, Tsinghua University, Tsinghua University;Meta Facebook;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;fb.com;tsinghua.edu.cn", "position": "PhD student;PhD student;Visiting Professor;Associate Professor", "bibtex": "@inproceedings{\nlu2022provable,\ntitle={Provable General Function Class Representation Learning in Multitask Bandits and {MDP}},\nauthor={Rui Lu and Andrew Zhao and Simon Shaolei Du and Gao Huang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-uxUxmlr3qT}\n}", "github": "", "project": "", "reviewers": "8wSD;1Kyf;jNGD;rCEX", "pdf_size": 399710, "rating": "3;6;6;7", "confidence": "3;4;2;2", "soundness": "2;3;3;4", "novelty": "2;2;4;3", "presentation": "2;4;4;3", "contribution": "2;2;4;3", "wc_summary": "86;56;147;166", "wc_strengths_and_weaknesses": "273;175;187;160", "wc_questions": "110;37;118;18", "wc_limitations": "17;25;124;14", "wc_review": "486;293;576;358", "wc_reply_reviewers": "528;25;64;181", "wc_reply_authors": "2375;755;354;394", "reply_reviewers": "3;1;1;1", "reply_authors": "5;2;1;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 113.75, 44.55544298960566 ], "wc_strengths_and_weaknesses_avg": [ 198.75, 43.92251700437943 ], "wc_questions_avg": [ 70.75, 43.859862060886606 ], "wc_limitations_avg": [ 45.0, 45.78755289377234 ], "wc_review_avg": [ 428.25, 109.99176105508994 ], "wc_reply_reviewers_avg": [ 199.5, 198.1571346179592 ], "wc_reply_authors_avg": [ 969.5, 826.3596372040445 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3015113445777637, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9083756560427278976&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;fb.com;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tsinghua University;Meta", "aff_unique_dep": "Department of Automation;Meta Platforms, Inc.", "aff_unique_url": "https://www.tsinghua.edu.cn;https://meta.com", "aff_unique_abbr": "THU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Inductive Logical Query Answering in Knowledge Graphs", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54713", "id": "-vXEN5rIABY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6246e04dcf42baf7c71e3a65d3d93b55-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-vXEN5rIABY", "openreview": "https://openreview.net/forum?id=-vXEN5rIABY", "poster": "/media/PosterPDFs/NeurIPS%202022/54713.png?t=1668117112.154275", "slides": "https://nips.cc/virtual/2022/poster/54713", "video": "https://nips.cc/virtual/2022/poster/54713", "author_site": "Michael Galkin, Zhaocheng Zhu, Hongyu Ren, Jian Tang", "tldr": "Answering complex logical queries over new, unseen entities in KGs at inference time with two inductive representation learning strategies", "abstract": "Formulating and answering logical queries is a standard communication interface for knowledge graphs (KGs). \nAlleviating the notorious incompleteness of real-world KGs, neural methods achieved impressive results in link prediction and complex query answering tasks by learning representations of entities, relations, and queries. Still, most existing query answering methods rely on transductive entity embeddings and cannot generalize to KGs containing new entities without retraining entity embeddings. \nIn this work, we study the inductive query answering task where inference is performed on a graph containing new entities with queries over both seen and unseen entities. To this end, we devise two mechanisms leveraging inductive node and relational structure representations powered by graph neural networks (GNNs).\nExperimentally, we show that inductive models are able to perform logical reasoning at inference time over unseen nodes generalizing to graphs up to 500% larger than training ones. Exploring the efficiency--effectiveness trade-off, we find the inductive relational structure representation method generally achieves higher performance, while the inductive node representation method is able to answer complex queries in the inference-only regime without any training on queries and scale to graphs of millions of nodes. Code is available at \nhttps://github.com/DeepGraphLearning/InductiveQE", "keywords": "inductive graph reasoning;complex query answering;logical queries;knowledge graphs;graph neural networks;inductive representation learning", "primary_area": "", "supplementary_material": "/attachment/c18745ed7205c3ba5f513d8d171dd2d71f18535b.pdf", "author": "Mikhail Galkin;Zhaocheng Zhu;Hongyu Ren;Jian Tang", "authorids": "~Mikhail_Galkin1;~Zhaocheng_Zhu1;~Hongyu_Ren1;~Jian_Tang1", "gender": "M;M;;", "homepage": "https://migalkin.github.io/;https://kiddozhu.github.io/;;http://www.jian-tang.com", "dblp": "160/8154;195/0435;30/10885;181/2667-5", "google_scholar": "yfYRbG4AAAAJ;Qd8JumkAAAAJ;;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mikhail_Galkin1;~Zhaocheng_Zhu1;~Hongyu_Ren1;~Jian_Tang1", "aff": "Mila & McGill University;Universit\u00e9 de Montr\u00e9al;Computer Science Department, Stanford University;Mila, HEC Montreal", "aff_domain": "mila.quebec;mila.quebec;cs.stanford.edu;hec.ca", "position": "Postdoc;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngalkin2022inductive,\ntitle={Inductive Logical Query Answering in Knowledge Graphs},\nauthor={Mikhail Galkin and Zhaocheng Zhu and Hongyu Ren and Jian Tang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-vXEN5rIABY}\n}", "github": "", "project": "", "reviewers": "tzzT;YfZT;PdEh;YzXG", "pdf_size": 776459, "rating": "4;5;6;8", "confidence": "4;5;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "contribution": "2;3;3;3", "wc_summary": "36;72;220;152", "wc_strengths_and_weaknesses": "148;192;89;194", "wc_questions": "87;44;1;152", "wc_limitations": "72;40;1;54", "wc_review": "343;348;311;552", "wc_reply_reviewers": "0;0;35;347", "wc_reply_authors": "2238;1414;52;2821", "reply_reviewers": "0;0;1;3", "reply_authors": "4;2;1;6", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.0, 71.386273190299 ], "wc_strengths_and_weaknesses_avg": [ 155.75, 42.69879974893908 ], "wc_questions_avg": [ 71.0, 55.780821076782296 ], "wc_limitations_avg": [ 41.75, 26.11871934073338 ], "wc_review_avg": [ 388.5, 95.45810599420041 ], "wc_reply_reviewers_avg": [ 95.5, 145.90493480345344 ], "wc_reply_authors_avg": [ 1631.25, 1039.8171413763096 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5559504316670029197&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": "mila.quebec;mila.quebec;cs.stanford.edu;hec.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "McGill University;Universit\u00e9 de Montr\u00e9al;Stanford University;HEC Montreal", "aff_unique_dep": "Mila;;Computer Science Department;HEC Business School", "aff_unique_url": "https://www.mcgill.ca;https://www.umontreal.ca;https://www.stanford.edu;https://www.hec.ca", "aff_unique_abbr": "McGill;UdeM;Stanford;HEC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Montreal", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Optimal Transport of Classifiers to Fairness", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54047", "id": "-welFirjMss", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/da75d2bbf862b86f10241d0887613b41-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-welFirjMss", "openreview": "https://openreview.net/forum?id=-welFirjMss", "poster": "/media/PosterPDFs/NeurIPS%202022/54047.png?t=1669065788.014616", "slides": "https://nips.cc/virtual/2022/poster/54047", "video": "https://nips.cc/virtual/2022/poster/54047", "author_site": "Maarten Buyl, Tijl De Bie", "tldr": "We use the cost of Optimal Transport to the set of fair classifiers as a differentiable fairness regularization term.", "abstract": "In past work on fairness in machine learning, the focus has been on forcing the prediction of classifiers to have similar statistical properties for people of different demographics. To reduce the violation of these properties, fairness methods usually simply rescale the classifier scores, ignoring similarities and dissimilarities between members of different groups. Yet, we hypothesize that such information is relevant in quantifying the unfairness of a given classifier. To validate this hypothesis, we introduce Optimal Transport to Fairness (OTF), a method that quantifies the violation of fairness constraints as the smallest Optimal Transport cost between a probabilistic classifier and any score function that satisfies these constraints. For a flexible class of linear fairness constraints, we construct a practical way to compute OTF as a differentiable fairness regularizer that can be added to any standard classification setting. Experiments show that OTF can be used to achieve an improved trade-off between predictive power and fairness.", "keywords": "fairness;optimal transport;projection;regularization;classification", "primary_area": "", "supplementary_material": "/attachment/1cc33a9e85c333933fdfa6539b7ff20ab0d4c804.pdf", "author": "Maarten Buyl;Tijl De Bie", "authorids": "~Maarten_Buyl1;~Tijl_De_Bie1", "gender": "M;M", "homepage": ";http://www.tijldebie.net", "dblp": "259/2365;49/2018", "google_scholar": "A5bU3BUAAAAJ;https://scholar.google.be/citations?user=eH_c4R4AAAAJ", "orcid": "0000-0002-5434-2386;0000-0002-2692-7504", "linkedin": "maarten-buyl-44a54715a/;tijldebie/", "or_profile": "~Maarten_Buyl1;~Tijl_De_Bie1", "aff": "Ghent University;Ghent University", "aff_domain": "ugent.be;ugent.be", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nbuyl2022optimal,\ntitle={Optimal Transport of Classifiers to Fairness},\nauthor={Maarten Buyl and Tijl De Bie},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-welFirjMss}\n}", "github": "", "project": "", "reviewers": "MHSV;cFmV;Sans", "pdf_size": 455328, "rating": "6;6;6", "confidence": "3;2;3", "soundness": "2;2;3", "novelty": "3;2;3", "presentation": "2;3;3", "contribution": "3;2;3", "wc_summary": "126;111;51", "wc_strengths_and_weaknesses": "162;53;147", "wc_questions": "495;41;46", "wc_limitations": "65;6;10", "wc_review": "848;211;254", "wc_reply_reviewers": "287;0;0", "wc_reply_authors": "1513;215;284", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 96.0, 32.4037034920393 ], "wc_strengths_and_weaknesses_avg": [ 120.66666666666667, 48.23783669370849 ], "wc_questions_avg": [ 194.0, 212.84892921193347 ], "wc_limitations_avg": [ 27.0, 26.919633479426622 ], "wc_review_avg": [ 437.6666666666667, 290.6800455636556 ], "wc_reply_reviewers_avg": [ 95.66666666666667, 135.2930974670261 ], "wc_reply_authors_avg": [ 670.6666666666666, 596.2853530166763 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16219423422077161743&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ugent.be;ugent.be", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Ghent University", "aff_unique_dep": "", "aff_unique_url": "https://www.ugent.be/en", "aff_unique_abbr": "UGent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Belgium" }, { "title": "Dance of SNN and ANN: Solving binding problem by combining spike timing and reconstructive attention", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54560", "id": "-yiZR4_Xhh", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/cba76ef96c4cd625631ab4d33285b045-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-yiZR4_Xhh", "openreview": "https://openreview.net/forum?id=-yiZR4_Xhh", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54560", "video": "https://nips.cc/virtual/2022/poster/54560", "author_site": "Hao Zheng, Hui Lin, Rong Zhao, Luping Shi", "tldr": "Unsupervised hybrid neural network incorporating spike timing dynamics into ANN field to solve binding problem.", "abstract": "The binding problem is one of the fundamental challenges that prevent the artificial neural network (ANNs) from a compositional understanding of the world like human perception, because disentangled and distributed representations of generative factors can interfere and lead to ambiguity when complex data with multiple objects are presented. In this paper, we propose a brain-inspired unsupervised hybrid neural network (HNN) that introduces temporal binding theory originated from neuroscience into ANNs by integrating spike timing dynamics (via spiking neural networks, SNNs) with reconstructive attention (by ANNs). Spike timing provides an additional dimension for grouping, while reconstructive feedback coordinates the spikes into temporal coherent states. Through iterative interaction of ANN and SNN, the model continuously binds multiple objects at alternative synchronous firing times in the SNN coding space. The effectiveness of the model is evaluated on five artificially generated datasets of binary images. By visualization and analysis, we demonstrate that the binding is explainable, soft, flexible, and hierarchical. Notably, the model is trained on single object datasets without explicit supervision on grouping, but can successfully bind multiple objects on test datasets, showing its compositional generalization capability. Further results show its binding ability in dynamic situations.", "keywords": "Perceptual grouping;Binding problem;Time coding;Neuronal synchrony;Top-down attention;Compositional generalization;Object learning;Hybrid neural network;Spiking neural network;Artificial neural network", "primary_area": "", "supplementary_material": "/attachment/48c563b870f190629ab19672f79a0219e5361915.pdf", "author": "Hao Zheng;Hui Lin;Rong Zhao;Luping Shi", "authorids": "~Hao_Zheng2;~Hui_Lin5;r_zhao@tsinghua.edu.cn;~Luping_Shi1", "gender": "M;M;;M", "homepage": "https://bcs.mit.edu/directory/hao-zheng;https://www.researchgate.net/profile/Hui-Lin-59;;", "dblp": ";;;84/7231.html", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hao_Zheng2;~Hui_Lin5;r_zhao@tsinghua.edu.cn;~Luping_Shi1", "aff": "Tsinghua University;Electronic Engineering, Tsinghua University, Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nzheng2022dance,\ntitle={Dance of {SNN} and {ANN}: Solving binding problem by combining spike timing and reconstructive attention},\nauthor={Hao Zheng and Hui Lin and Rong Zhao and Luping Shi},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-yiZR4_Xhh}\n}", "github": "", "project": "", "reviewers": "1EXz;QMxx;2YkF;Ybbh", "pdf_size": 2232309, "rating": "5;5;6;7", "confidence": "2;3;3;4", "soundness": "2;3;3;3", "novelty": "3;3;3;4", "presentation": "3;2;3;2", "contribution": "3;3;3;4", "wc_summary": "113;77;86;66", "wc_strengths_and_weaknesses": "146;231;53;136", "wc_questions": "274;106;121;400", "wc_limitations": "16;28;3;80", "wc_review": "549;442;263;682", "wc_reply_reviewers": "166;62;41;73", "wc_reply_authors": "1397;769;1024;817", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 17.38533865071371 ], "wc_strengths_and_weaknesses_avg": [ 141.5, 63.0337211340089 ], "wc_questions_avg": [ 225.25, 120.41880044245583 ], "wc_limitations_avg": [ 31.75, 29.22648627529488 ], "wc_review_avg": [ 484.0, 153.32481860416468 ], "wc_reply_reviewers_avg": [ 85.5, 47.877447718106275 ], "wc_reply_authors_avg": [ 1001.75, 247.4988636337549 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6935604356068693641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "How Sampling Impacts the Robustness of Stochastic Neural Networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53039", "id": "-zBN5sBzdvr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/429d69979c22b06d6baa65caf3ab1e10-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-zBN5sBzdvr", "openreview": "https://openreview.net/forum?id=-zBN5sBzdvr", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53039", "video": "https://nips.cc/virtual/2022/poster/53039", "author_site": "Sina D\u00e4ubener, Asja Fischer", "tldr": "", "abstract": "Stochastic neural networks (SNNs) are random functions whose predictions are gained by averaging over multiple realizations. \nConsequently, a gradient-based adversarial example is calculated based on one set of samples and its classification on another set. \nIn this paper, we derive a sufficient condition for such a stochastic prediction to be robust against a given sample-based attack. \nThis allows us to identify the factors that lead to an increased robustness of SNNs and gives theoretical explanations for: \n(i) the well known observation, that increasing the amount of samples drawn for the estimation of adversarial examples increases the attack's strength,\n(ii) why increasing the number of samples during an attack can not fully reduce the effect of stochasticity, \n(iii) why the sample size during inference does not influence the robustness, and\n(iv) why a higher gradient variance and a shorter expected value of the gradient relates to a higher robustness. \nOur theoretical findings give a unified view on the mechanisms underlying previously proposed approaches for increasing attack strengths or model robustness and are verified by an extensive empirical analysis.", "keywords": "Stochastic neural network;robustness;adversarial attacks", "primary_area": "", "supplementary_material": "/attachment/2a858a5307b4911902fd761084154a236e2fa6e2.zip", "author": "Sina D\u00e4ubener;Asja Fischer", "authorids": "~Sina_D\u00e4ubener1;~Asja_Fischer1", "gender": ";F", "homepage": ";", "dblp": ";76/8485", "google_scholar": ";FyZbyIUAAAAJ", "orcid": ";0000-0002-1916-7033", "linkedin": ";", "or_profile": "~Sina_D\u00e4ubener1;~Asja_Fischer1", "aff": ";Ruhr-Universit\u00e4t Bochum", "aff_domain": ";ruhr-uni-bochum.de", "position": ";Full Professor", "bibtex": "@inproceedings{\nd{\\\"a}ubener2022how,\ntitle={How Sampling Impacts the Robustness of Stochastic Neural Networks},\nauthor={Sina D{\\\"a}ubener and Asja Fischer},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-zBN5sBzdvr}\n}", "github": "", "project": "", "reviewers": "nbL1;izQZ;qGjN;kUU3", "pdf_size": 1930763, "rating": "3;6;6;7", "confidence": "4;3;2;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;4", "contribution": "1;3;3;3", "wc_summary": "230;53;106;108", "wc_strengths_and_weaknesses": "202;198;127;45", "wc_questions": "20;11;17;73", "wc_limitations": "48;1;8;17", "wc_review": "500;263;258;243", "wc_reply_reviewers": "1712;23;0;21", "wc_reply_authors": "1758;159;52;134", "reply_reviewers": "4;1;0;1", "reply_authors": "6;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 124.25, 64.91677364133248 ], "wc_strengths_and_weaknesses_avg": [ 143.0, 63.96483408873973 ], "wc_questions_avg": [ 30.25, 24.893523254051445 ], "wc_limitations_avg": [ 18.5, 17.95132307101624 ], "wc_review_avg": [ 316.0, 106.48708841920696 ], "wc_reply_reviewers_avg": [ 439.0, 735.0221085110297 ], "wc_reply_authors_avg": [ 525.75, 712.5399550200676 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3015113445777637, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1929356938830783835&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 7, "email": ";ruhr-uni-bochum.de", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Ruhr-Universit\u00e4t Bochum", "aff_unique_dep": "", "aff_unique_url": "https://www.ruhr-uni-bochum.de", "aff_unique_abbr": "RUB", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "CASA: Category-agnostic Skeletal Animal Reconstruction", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54950", "id": "-zYfrOl2I6O", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b709131d0a67f743915e12bc57947ddb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-zYfrOl2I6O", "openreview": "https://openreview.net/forum?id=-zYfrOl2I6O", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54950", "video": "https://nips.cc/virtual/2022/poster/54950", "author_site": "Yuefan Wu, Zeyuan Chen, Shaowei Liu, Zhongzheng Ren, Shenlong Wang", "tldr": "We present a pipeline to recover animatable shapes from a monocular video, by video-to-shape retrieval and neural inverse graphics.", "abstract": "Recovering a skeletal shape from a monocular video is a longstanding challenge. Prevailing nonrigid animal reconstruction methods often adopt a control-point driven animation model and optimize bone transforms individually without considering skeletal topology, yielding unsatisfactory shape and articulation. In contrast, humans can easily infer the articulation structure of an unknown character by associating it with a seen articulated object in their memory. Inspired by this fact, we present CASA, a novel category-agnostic articulated animal reconstruction method. Our method consists of two components, a video-to-shape retrieval process and a neural inverse graphics framework. During inference, CASA first finds a matched articulated shape from a 3D character assets bank so that the input video scores highly with the rendered image, according to a pretrained image-language model. It then integrates the retrieved character into an inverse graphics framework and jointly infers the shape deformation, skeleton structure, and skinning weights through optimization. Experiments validate the efficacy of our method in shape reconstruction and articulation. We further show that we can use the resulting skeletal-animated character for re-animation. \n", "keywords": "articulation;inverse graphics;3D reconstruction;animation", "primary_area": "", "supplementary_material": "/attachment/3737861d5b03e97553747b0b09e46079086e0907.zip", "author": "Yuefan Wu;Zeyuan Chen;Shaowei Liu;Zhongzheng Ren;Shenlong Wang", "authorids": "~Yuefan_Wu1;~Zeyuan_Chen2;~Shaowei_Liu2;~Zhongzheng_Ren2;~Shenlong_Wang1", "gender": "M;M;M;M;M", "homepage": "https://ivenwu.com;http://zeyuan-chen.com/;https://stevenlsw.github.io/;https://jason718.github.io/;https://shenlong.web.illinois.edu/", "dblp": "332/3825;;;https://dblp.uni-trier.de/pers/hd/r/Ren:Zhongzheng;117/4842", "google_scholar": "ialxYGYAAAAJ;dvplAJkAAAAJ;https://scholar.google.com/citations?view_op=list_works;iILS6kQAAAAJ;QFpswmcAAAAJ", "orcid": ";;;0000-0003-1033-5341;", "linkedin": ";;;;shenlong-wang-3496023b", "or_profile": "~Yuefan_Wu1;~Zeyuan_Chen2;~Shaowei_Liu2;~Zhongzheng_Ren2;~Shenlong_Wang1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "ustc.edu.cn;ustc.edu.cn;illinois.edu;uiuc.edu;illinois.edu", "position": "Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwu2022casa,\ntitle={{CASA}: Category-agnostic Skeletal Animal Reconstruction},\nauthor={Yuefan Wu and Zeyuan Chen and Shaowei Liu and Zhongzheng Ren and Shenlong Wang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-zYfrOl2I6O}\n}", "github": "", "project": "", "reviewers": "pF6x;4EwH;ziQq", "pdf_size": 13779434, "rating": "4;7;7", "confidence": "3;5;4", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "2;3;3", "contribution": "2;3;3", "wc_summary": "74;62;81", "wc_strengths_and_weaknesses": "187;422;233", "wc_questions": "98;92;114", "wc_limitations": "56;1;10", "wc_review": "415;577;438", "wc_reply_reviewers": "0;143;89", "wc_reply_authors": "573;1015;447", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.33333333333333, 7.845734863959881 ], "wc_strengths_and_weaknesses_avg": [ 280.6666666666667, 101.68688323585408 ], "wc_questions_avg": [ 101.33333333333333, 9.285592184789413 ], "wc_limitations_avg": [ 22.333333333333332, 24.087802353519557 ], "wc_review_avg": [ 476.6666666666667, 71.56504422939705 ], "wc_reply_reviewers_avg": [ 77.33333333333333, 58.95949645495815 ], "wc_reply_authors_avg": [ 678.3333333333334, 243.55332521282116 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2916566117216677252&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "ustc.edu.cn;ustc.edu.cn;illinois.edu;uiuc.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "University of Science and Technology of China;University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "USTC;UIUC;UIUC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Mask-based Latent Reconstruction for Reinforcement Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54345", "id": "-zlJOVc580", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a0709efe5139939ab69902884ecad9c1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=-zlJOVc580", "openreview": "https://openreview.net/forum?id=-zlJOVc580", "poster": "/media/PosterPDFs/NeurIPS%202022/54345.png?t=1669035948.2848942", "slides": "https://nips.cc/virtual/2022/poster/54345", "video": "https://nips.cc/virtual/2022/poster/54345", "author_site": "Tao Yu, Zhizheng Zhang, Cuiling Lan, Yan Lu, Zhibo Chen", "tldr": "We propose a latent-space mask-based modeling method for representation learning in RL, which significantly improves RL sample efficiency.", "abstract": "For deep reinforcement learning (RL) from pixels, learning effective state representations is crucial for achieving high performance. However, in practice, limited experience and high-dimensional inputs prevent effective representation learning. To address this, motivated by the success of mask-based modeling in other research fields, we introduce mask-based reconstruction to promote state representation learning in RL. Specifically, we propose a simple yet effective self-supervised method, Mask-based Latent Reconstruction (MLR), to predict complete state representations in the latent space from the observations with spatially and temporally masked pixels. MLR enables better use of context information when learning state representations to make them more informative, which facilitates the training of RL agents. Extensive experiments show that our MLR significantly improves the sample efficiency in RL and outperforms the state-of-the-art sample-efficient RL methods on multiple continuous and discrete control benchmarks. Our code is available at https://github.com/microsoft/Mask-based-Latent-Reconstruction.", "keywords": "Reinforcement learning;mask-based modeling;sample efficiency;representation learning", "primary_area": "", "supplementary_material": "/attachment/b35d48cfa02fb8de6da8c3d706f4d21d611978a6.pdf", "author": "Tao Yu;Zhizheng Zhang;Cuiling Lan;Yan Lu;Zhibo Chen", "authorids": "~Tao_Yu4;~Zhizheng_Zhang1;~Cuiling_Lan1;~Yan_Lu7;~Zhibo_Chen1", "gender": "M;M;F;M;M", "homepage": "https://geekyutao.github.io/;;https://www.microsoft.com/en-us/research/people/culan/;https://www.microsoft.com/en-us/research/people/yanlu/;https://faculty.ustc.edu.cn/chenzhibo", "dblp": "67/1014-12;67/4758;95/8115;15/4830-1;54/6561.html", "google_scholar": "c76x7k8AAAAJ;X7M0I8kAAAAJ;XZugqiwAAAAJ;djk5l-4AAAAJ;1ayDJfsAAAAJ", "orcid": ";;0000-0001-9145-9957;0000-0001-5383-6424;", "linkedin": ";;;;", "or_profile": "~Tao_Yu4;~Zhizheng_Zhang1;~Cuiling_Lan1;~Yan_Lu7;~Zhibo_Chen1", "aff": "University of Science and Technology of China;Microsoft Research;Microsoft;Microsoft Research Asia;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;microsoft.com;microsoft.com;microsoft.com;ustc.edu.cn", "position": "PhD student;Senior Researcher;Principal Researcher;Partner Research Manager;Full Professor", "bibtex": "@inproceedings{\nyu2022maskbased,\ntitle={Mask-based Latent Reconstruction for Reinforcement Learning},\nauthor={Tao Yu and Zhizheng Zhang and Cuiling Lan and Yan Lu and Zhibo Chen},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=-zlJOVc580}\n}", "github": "", "project": "", "reviewers": "du6d;uZPQ;ZSQ9;djAT", "pdf_size": 1207942, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;2;3", "novelty": "2;2;3;2", "presentation": "3;3;3;4", "contribution": "2;2;3;2", "wc_summary": "107;54;80;147", "wc_strengths_and_weaknesses": "443;286;261;76", "wc_questions": "279;45;133;83", "wc_limitations": "57;1;52;23", "wc_review": "886;386;526;329", "wc_reply_reviewers": "103;0;83;116", "wc_reply_authors": "1594;721;729;620", "reply_reviewers": "2;0;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.0, 34.416565778706044 ], "wc_strengths_and_weaknesses_avg": [ 266.5, 130.24304204064032 ], "wc_questions_avg": [ 135.0, 88.80315309717331 ], "wc_limitations_avg": [ 33.25, 22.69774217846348 ], "wc_review_avg": [ 531.75, 216.72375850376903 ], "wc_reply_reviewers_avg": [ 75.5, 45.14698218042929 ], "wc_reply_authors_avg": [ 916.0, 393.79372773064836 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11030675521552103190&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ustc.edu.cn;microsoft.com;microsoft.com;microsoft.com;ustc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Science and Technology of China;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "USTC;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "DTG-SSOD: Dense Teacher Guidance for Semi-Supervised Object Detection", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54944", "id": "0-uBrFiOVf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3a02b6df276223b68c69ca572cb3c4a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0-uBrFiOVf", "openreview": "https://openreview.net/forum?id=0-uBrFiOVf", "poster": "/media/PosterPDFs/NeurIPS%202022/453fadbd8a1a3af50a9df4df899537b5.png?t=1666449963.8492475", "slides": "https://nips.cc/virtual/2022/poster/54944", "video": "https://nips.cc/virtual/2022/poster/54944", "author_site": "Gang Li, Xiang Li, Yujie Wang, Wu Yichao, Ding Liang, Shanshan Zhang", "tldr": "We propose a brand new semi-supervised object detection paradigm, which employs dense teacher guidance as supervision signals, instead of sparse pseudo labels.", "abstract": "The Mean-Teacher (MT) scheme is widely adopted in semi-supervised object detection (SSOD). In MT, sparse pseudo labels, offered by the final predictions of the teacher (e.g., after Non Maximum Suppression (NMS) post-processing), are adopted for the dense supervision for the student via hand-crafted label assignment. However, the \"sparse-to-dense'' paradigm complicates the pipeline of SSOD, and simultaneously neglects the powerful direct, dense teacher supervision. In this paper, we attempt to directly leverage the dense guidance of teacher to supervise student training, i.e., the \"dense-to-dense'' paradigm. Specifically, we propose the Inverse NMS Clustering (INC) and Rank Matching (RM) to instantiate the dense supervision, without the widely used, conventional sparse pseudo labels. INC leads the student to group candidate boxes into clusters in NMS as the teacher does, which is implemented by learning grouping information revealed in NMS procedure of the teacher. After obtaining the same grouping scheme as the teacher via INC, the student further imitates the rank distribution of the teacher over clustered candidates through Rank Matching. With the proposed INC and RM, we integrate Dense Teacher Guidance into Semi-Supervised Object Detection (termed \"DTG-SSOD''), successfully abandoning sparse pseudo labels and enabling more informative learning on unlabeled data. On COCO benchmark, our DTG-SSOD achieves state-of-the-art performance under various labelling ratios. For example, under 10% labelling ratio, DTG-SSOD improves the supervised baseline from 26.9 to 35.9 mAP, outperforming the previous best method Soft Teacher by 1.9 points. ", "keywords": "object detection;semi-supervised learning;semi-supervised object detection", "primary_area": "", "supplementary_material": "/attachment/0a637bbfdde7e3bccd09d0929ead79b89b2652bd.pdf", "author": "Gang Li;Xiang Li;Yujie Wang;Yichao Wu;Ding Liang;Shanshan Zhang", "authorids": "~Gang_Li14;~Xiang_Li20;~Yujie_Wang2;~Yichao_Wu1;~Ding_Liang1;~Shanshan_Zhang1", "gender": "M;M;M;M;;F", "homepage": ";http://implus.github.io/;;;;https://sites.google.com/site/shanshanzhangshomepage/", "dblp": ";40/1491-41;;74/8429;;34/3535-1", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;oamjJdYAAAAJ;7CobseIAAAAJ;;;pOSMWfQAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Gang_Li14;~Xiang_Li20;~Yujie_Wang2;~Yichao_Wu1;~Ding_Liang1;~Shanshan_Zhang1", "aff": "Nanjing University of Science and Technology;Nankai University;SenseTime Research;SenseTime Group Limited;;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;nankai.edu.cn;sensetime.com;sensetime.com;;njust.edu.cn", "position": "PhD student;Associate Professor;Researcher;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nli2022dtgssod,\ntitle={{DTG}-{SSOD}: Dense Teacher Guidance for Semi-Supervised Object Detection},\nauthor={Gang Li and Xiang Li and Yujie Wang and Yichao Wu and Ding Liang and Shanshan Zhang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0-uBrFiOVf}\n}", "github": "", "project": "", "reviewers": "YaeS;K4Pn;2EWD;CxZW", "pdf_size": 1633420, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;2", "contribution": "3;3;3;3", "wc_summary": "68;76;95;118", "wc_strengths_and_weaknesses": "72;99;122;291", "wc_questions": "45;22;10;14", "wc_limitations": "27;22;17;82", "wc_review": "212;219;244;505", "wc_reply_reviewers": "28;0;0;29", "wc_reply_authors": "518;505;734;826", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.25, 19.279198634798075 ], "wc_strengths_and_weaknesses_avg": [ 146.0, 85.56576418170997 ], "wc_questions_avg": [ 22.75, 13.5531361684298 ], "wc_limitations_avg": [ 37.0, 26.22022120425379 ], "wc_review_avg": [ 295.0, 121.82569515500414 ], "wc_reply_reviewers_avg": [ 14.25, 14.254385290148432 ], "wc_reply_authors_avg": [ 645.75, 138.21066348151288 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1047156362824031830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "njust.edu.cn;nankai.edu.cn;sensetime.com;sensetime.com;;njust.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Nanjing University of Science and Technology;Nankai University;SenseTime;SenseTime Group Limited", "aff_unique_dep": ";;SenseTime Research;", "aff_unique_url": "http://www.nust.edu.cn/;http://www.nankai.edu.cn;https://www.sensetime.com;https://www.sensetime.com", "aff_unique_abbr": "NUST;NKU;SenseTime;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Anonymous Bandits for Multi-User Systems", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53148", "id": "00jwOr7UA4P", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/50a057e9fe79ffa3f4120fb6fb88071a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=00jwOr7UA4P", "openreview": "https://openreview.net/forum?id=00jwOr7UA4P", "poster": "/media/PosterPDFs/NeurIPS%202022/53148.png?t=1669580647.0252273", "slides": "https://nips.cc/virtual/2022/poster/53148", "video": "https://nips.cc/virtual/2022/poster/53148", "author_site": "Hossein Esfandiari, Vahab Mirrokni, Jon Schneider", "tldr": "We study multi-user multi-armed bandits under a k-anonymity constraint.", "abstract": "In this work, we present and study a new framework for online learning in systems with multiple users that provide user anonymity. Specifically, we extend the notion of bandits to obey the standard $k$-anonymity constraint by requiring each observation to be an aggregation of rewards for at least $k$ users. This provides a simple yet effective framework where one can learn a clustering of users in an online fashion without observing any user's individual decision. We initiate the study of anonymous bandits and provide the first sublinear regret algorithms and lower bounds for this setting.", "keywords": "anonymity;multi-armed bandits;online learning", "primary_area": "", "supplementary_material": "/attachment/de5a25442c9434e5d781d0e5caf020dc51c44da6.zip", "author": "Hossein Esfandiari;Vahab Mirrokni;Jon Schneider", "authorids": "~Hossein_Esfandiari1;~Vahab_Mirrokni2;~Jon_Schneider1", "gender": ";M;M", "homepage": "https://sites.google.com/corp/view/hossein-esfandiari;https://people.csail.mit.edu/mirrokni/Welcome.html;https://jschnei.github.io", "dblp": "146/7746;m/VahabSMirrokni;146/0503", "google_scholar": "Rt8ppJsAAAAJ;opbZfw0AAAAJ;Jc97EyAAAAAJ", "orcid": "0000-0001-8130-6631;;", "linkedin": "hossein-esfandiari-10bb0281;;", "or_profile": "~Hossein_Esfandiari1;~Vahab_Mirrokni2;~Jon_Schneider1", "aff": "Google;Google Research;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;VP, Google Fellow;Researcher", "bibtex": "@inproceedings{\nesfandiari2022anonymous,\ntitle={Anonymous Bandits for Multi-User Systems},\nauthor={Hossein Esfandiari and Vahab Mirrokni and Jon Schneider},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=00jwOr7UA4P}\n}", "github": "", "project": "", "reviewers": "f33e;ex8q;TxQ1;1FZP", "pdf_size": 295984, "rating": "6;7;7;7", "confidence": "3;2;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "contribution": "3;3;3;3", "wc_summary": "167;259;222;70", "wc_strengths_and_weaknesses": "503;208;220;195", "wc_questions": "45;62;101;199", "wc_limitations": "2;45;1;26", "wc_review": "717;574;544;490", "wc_reply_reviewers": "0;85;24;15", "wc_reply_authors": "336;292;204;439", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 179.5, 71.19164276795416 ], "wc_strengths_and_weaknesses_avg": [ 281.5, 128.18833800311165 ], "wc_questions_avg": [ 101.75, 59.70500397789117 ], "wc_limitations_avg": [ 18.5, 18.282505298782223 ], "wc_review_avg": [ 581.25, 83.95646193117001 ], "wc_reply_reviewers_avg": [ 31.0, 32.334192428449484 ], "wc_reply_authors_avg": [ 317.75, 84.61198201200584 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uo3LnFnwAeMJ:scholar.google.com/&scioq=Anonymous+Bandits+for+Multi-User+Systems&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Eliciting Thinking Hierarchy without a Prior", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54636", "id": "02YXg0OZdG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/56d7585405a534b3af91905650ce7f9e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=02YXg0OZdG", "openreview": "https://openreview.net/forum?id=02YXg0OZdG", "poster": "/media/PosterPDFs/NeurIPS%202022/540393ae7f8b7a7fd6cdf47250b05679.png?t=1666161548.1800694", "slides": "https://nips.cc/virtual/2022/poster/54636", "video": "https://nips.cc/virtual/2022/poster/54636", "author_site": "Yuqing Kong, Yunqi Li, Yubo Zhang, Zhihuan Huang, Jinzhao Wu", "tldr": "", "abstract": "When we use the wisdom of the crowds, we usually rank the answers according to their popularity, especially when we cannot verify the answers. However, this can be very dangerous when the majority make systematic mistakes. A fundamental question arises: can we build a hierarchy among the answers without any prior where the higher-ranking answers, which may not be supported by the majority, are from more sophisticated people? To address the question, we propose 1) a novel model to describe people's thinking hierarchy; 2) two algorithms to learn the thinking hierarchy without any prior; 3) a novel open-response based crowdsourcing approach based on the above theoretic framework. In addition to theoretic justifications, we conduct four empirical crowdsourcing studies and show that a) the accuracy of the top-ranking answers learned by our approach is much higher than that of plurality voting (In one question, the plurality answer is supported by 74 respondents but the correct answer is only supported by 3 respondents. Our approach ranks the correct answer the highest without any prior); b) our model has a high goodness-of-fit, especially for the questions where our top-ranking answer is correct. To the best of our knowledge, we are the first to propose a thinking hierarchy model with empirical validations in the general problem-solving scenarios; and the first to propose a practical open-response-based crowdsourcing approach that beats plurality voting without any prior. ", "keywords": "crowdsourcing;information elicitation;peer prediction;cognitive hierarchy;bounded rationality", "primary_area": "", "supplementary_material": "/attachment/47c6f063426f8da550f80aea5ff0fcd4a3aff53a.zip", "author": "Yuqing Kong;Yunqi Li;Yubo Zhang;Zhihuan Huang;Jinzhao Wu", "authorids": "~Yuqing_Kong1;~Yunqi_Li2;~Yubo_Zhang4;~Zhihuan_Huang1;~Jinzhao_Wu1", "gender": "F;F;M;M;M", "homepage": "https://cfcs.pku.edu.cn/yuqkong/;;http://saigyouji.github.io/;https://nbdhhzh.github.io;", "dblp": "https://dblp.uni-trier.de/pers/k/Kong:Yuqing.html;;;;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";my-orcid?orcid=0000-0002-1411-4602;;;", "linkedin": ";;;;", "or_profile": "~Yuqing_Kong1;~Yunqi_Li2;~Yubo_Zhang4;~Zhihuan_Huang1;~Jinzhao_Wu1", "aff": "Peking University;Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Assistant Professor;Undergrad student;Undergrad student;PhD student;Undergrad student", "bibtex": "@inproceedings{\nkong2022eliciting,\ntitle={Eliciting Thinking Hierarchy without a Prior},\nauthor={Yuqing Kong and Yunqi Li and Yubo Zhang and Zhihuan Huang and Jinzhao Wu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=02YXg0OZdG}\n}", "github": "", "project": "", "reviewers": "J65o;239z;93Te;SfLf", "pdf_size": 9679252, "rating": "5;6;6;7", "confidence": "3;4;3;1", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;2", "contribution": "3;3;3;3", "wc_summary": "120;176;68;88", "wc_strengths_and_weaknesses": "188;148;60;280", "wc_questions": "75;38;10;2", "wc_limitations": "24;1;20;47", "wc_review": "407;363;158;417", "wc_reply_reviewers": "102;32;0;0", "wc_reply_authors": "1000;860;97;112", "reply_reviewers": "2;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.0, 40.82891132518721 ], "wc_strengths_and_weaknesses_avg": [ 169.0, 79.06326580656784 ], "wc_questions_avg": [ 31.25, 28.577744837547975 ], "wc_limitations_avg": [ 23.0, 16.355427233796124 ], "wc_review_avg": [ 336.25, 104.89846281047211 ], "wc_reply_reviewers_avg": [ 33.5, 41.650330130744464 ], "wc_reply_authors_avg": [ 517.25, 415.74113039245947 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6155852059158878342&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Decoupled Context Processing for Context Augmented Language Modeling", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52783", "id": "02dbnEbEFn", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/882d801fb1017f955547d5a816ade0fc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=02dbnEbEFn", "openreview": "https://openreview.net/forum?id=02dbnEbEFn", "poster": "/media/PosterPDFs/NeurIPS%202022/52783.png?t=1668220520.4372997", "slides": "https://nips.cc/virtual/2022/poster/52783", "video": "https://nips.cc/virtual/2022/poster/52783", "author_site": "Zonglin Li, Ruiqi Guo, Sanjiv Kumar", "tldr": "", "abstract": "Language models can be augmented with context retriever to incorporate knowledge from large external databases. By leveraging retrieved context, the neural network does not have to memorize the massive amount of world knowledge within its internal parameters, leading to better parameter efficiency, interpretability and modularity. In this paper we examined a simple yet effective architecture for incorporating external context into language models based on decoupled $\\texttt{Encoder-Decoder}$ architecture. We showed that such a simple architecture achieves competitive results on auto-regressive language modeling and open domain question answering tasks. We also analyzed the behavior of the proposed model which performs grounded context transfer. Finally we discussed the computational implications of such retrieval augmented models.", "keywords": "Retrieval Augmentation;Encoder-Decoder;Language Modeling;Efficiency", "primary_area": "", "supplementary_material": "/attachment/333f4b8274df8a0e50c1218b5370c1d0aa061872.zip", "author": "Zonglin Li;Ruiqi Guo;Sanjiv Kumar", "authorids": "~Zonglin_Li2;~Ruiqi_Guo3;~Sanjiv_Kumar1", "gender": "M;M;", "homepage": ";http://aqua.cs.uiuc.edu/site/;http://www.sanjivk.com/", "dblp": "142/9188;78/7198;", "google_scholar": ";Cgb68qkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "lizonglin;;", "or_profile": "~Zonglin_Li2;~Ruiqi_Guo3;~Sanjiv_Kumar1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nli2022decoupled,\ntitle={Decoupled Context Processing for Context Augmented Language Modeling},\nauthor={Zonglin Li and Ruiqi Guo and Sanjiv Kumar},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=02dbnEbEFn}\n}", "github": "", "project": "", "reviewers": "aAnM;DW3N;PRTw", "pdf_size": 568656, "rating": "4;5;8", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;3", "contribution": "2;3;3", "wc_summary": "96;79;171", "wc_strengths_and_weaknesses": "75;208;323", "wc_questions": "14;33;37", "wc_limitations": "62;19;10", "wc_review": "247;339;541", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "557;261;603", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 115.33333333333333, 39.96943276499625 ], "wc_strengths_and_weaknesses_avg": [ 202.0, 101.3344298186291 ], "wc_questions_avg": [ 28.0, 10.03327796219494 ], "wc_limitations_avg": [ 30.333333333333332, 22.69116323349001 ], "wc_review_avg": [ 375.6666666666667, 122.79341278025554 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 473.6666666666667, 151.54610592892917 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10542924530927366978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarially Robust Learning: A Generic Minimax Optimal Learner and Characterization", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53925", "id": "03Qml_SaPqV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f392c6bbb14548df50092f10c9db440f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=03Qml_SaPqV", "openreview": "https://openreview.net/forum?id=03Qml_SaPqV", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53925", "video": "https://nips.cc/virtual/2022/poster/53925", "author_site": "Omar Montasser, Steve Hanneke, Nati Srebro", "tldr": "We present a minimax optimal learner for the problem of learning predictors robust to adversarial examples at test-time.", "abstract": "We present a minimax optimal learner for the problem of learning predictors robust to adversarial examples at test-time. Interestingly, we find that this requires new algorithmic ideas and approaches to adversarially robust learning. In particular, we show, in a strong negative sense, the suboptimality of the robust learner proposed by Montasser, Hanneke, and Srebro [2019] and a broader family of learners we identify as local learners. Our results are enabled by adopting a global perspective, specifically, through a key technical contribution: the the global one-inclusion graph, which may be of independent interest, that generalizes the classical one-inclusion graph due to Haussler, Littlestone, and Warmuth [1994]. Finally, as a byproduct, we identify a dimension characterizing qualitatively and quantitatively what classes of predictors $\\mathcal{H}$ are robustly learnable. This resolves an open problem due to Montasser et al. [2019], and closes a (potentially) infinite gap between the established upper and lower bounds on the sample complexity of adversarially robust learning. ", "keywords": "adversarially robust PAC learning;sample complexity", "primary_area": "", "supplementary_material": "/attachment/5054de513b72537c05102ea0dbe7550e4305b4d1.pdf", "author": "Omar Montasser;Steve Hanneke;Nathan Srebro", "authorids": "~Omar_Montasser1;~Steve_Hanneke1;~Nathan_Srebro1", "gender": "M;M;M", "homepage": "https://ttic.uchicago.edu/~omar/;http://www.stevehanneke.com;http://ttic.uchicago.edu/~nati/", "dblp": "194/3002;40/154;50/3633", "google_scholar": "u455rGAAAAAJ;fEhNO7YAAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Omar_Montasser1;~Steve_Hanneke1;~Nathan_Srebro1", "aff": "Toyota Technological Institute at Chicago;Purdue University;University of Chicago", "aff_domain": "ttic.edu;purdue.edu;uchicago.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmontasser2022adversarially,\ntitle={Adversarially Robust Learning: A Generic Minimax Optimal Learner and Characterization},\nauthor={Omar Montasser and Steve Hanneke and Nathan Srebro},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=03Qml_SaPqV}\n}", "github": "", "project": "", "reviewers": "VT2M;pb28;bV2J", "pdf_size": 650200, "rating": "7;8;9", "confidence": "4;4;4", "soundness": "4;4;4", "novelty": "4;4;4", "presentation": "4;4;4", "contribution": "4;4;4", "wc_summary": "134;300;155", "wc_strengths_and_weaknesses": "141;144;78", "wc_questions": "24;47;5", "wc_limitations": "13;1;10", "wc_review": "312;492;248", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "165;39;108", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 8.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 4.0, 0.0 ], "contribution_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 196.33333333333334, 73.80304118997326 ], "wc_strengths_and_weaknesses_avg": [ 121.0, 30.430248109405877 ], "wc_questions_avg": [ 25.333333333333332, 17.172329163188344 ], "wc_limitations_avg": [ 8.0, 5.0990195135927845 ], "wc_review_avg": [ 350.6666666666667, 103.29676772398167 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 104.0, 51.51698748956503 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8673222166818090354&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ttic.edu;purdue.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Toyota Technological Institute at Chicago;Purdue University;University of Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tti-chicago.org;https://www.purdue.edu;https://www.uchicago.edu", "aff_unique_abbr": "TTI Chicago;Purdue;UChicago", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "AnimeRun: 2D Animation Visual Correspondence from Open Source 3D Movies", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55761", "id": "04OPxj0jGN_", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/78b23d272f58fe3789ab490ebf080fa5-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=04OPxj0jGN_", "openreview": "https://openreview.net/forum?id=04OPxj0jGN_", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55761", "video": "https://nips.cc/virtual/2022/poster/55761", "author_site": "Li Siyao, Yuhang Li, Bo Li, Chao Dong, Ziwei Liu, Chen Change Loy", "tldr": "We use open source 3D movies to make a new 2D animation dataset with ground truth optical flow and segment-wise correspondence label.", "abstract": "Visual correspondence of 2D animation is the core of many applications and deserves careful study. Existing correspondence datasets for 2D cartoon suffer from simple frame composition and monotonic movements, making them insufficient to simulate real animations. In this work, we present a new 2D animation visual correspondence dataset, AnimeRun, by converting open source 3D movies to full scenes in 2D style, including simultaneous moving background and interactions of multiple subjects. Statistics show that our proposed dataset not only resembles real anime more in image composition, but also possesses richer and more complex motion patterns compared to existing datasets. With this dataset, we establish a comprehensive benchmark by evaluating several existing optical flow and segment matching methods, and analyze shortcomings of these methods on animation data. Data are available at https://lisiyao21.github.io/projects/AnimeRun.", "keywords": "2D animation;cartoon;correspondence;optical flow;matching", "primary_area": "", "supplementary_material": "/attachment/52c4d9b77c084f4753b321ac1cd427729830f6d7.zip", "author": "Li Siyao;Yuhang Li;Bo Li;Chao Dong;Ziwei Liu;Chen Change Loy", "authorids": "~Li_Siyao1;~Yuhang_Li4;~Bo_Li23;~Chao_Dong4;~Ziwei_Liu1;~Chen_Change_Loy2", "gender": "M;F;M;M;M;M", "homepage": "https://lisiyao21.github.io;https://xpixel.group/2010/03/25/yuhangli.html;https://www.brianboli.com/;http://xpixel.group/2010/01/20/chaodong.html;https://liuziwei7.github.io/;https://www.mmlab-ntu.com/person/ccloy/index.html", "dblp": ";;50/3402-80;16/1278-5;05/6300-2;01/5855", "google_scholar": "83WWEs4AAAAJ;;1_zc1-IAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ", "orcid": ";;;;;0000-0001-5345-1591", "linkedin": ";;brianbo1121/;;;", "or_profile": "~Li_Siyao1;~Yuhang_Li4;~Bo_Li23;~Chao_Dong4;~Ziwei_Liu1;~Chen_Change_Loy2", "aff": "Nanyang Technological University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Nanyang Technological University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;siat.ac.cn;ntu.edu.sg;siat.ac.cn;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;MS student;PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsiyao2022animerun,\ntitle={AnimeRun: 2D Animation Visual Correspondence from Open Source 3D Movies},\nauthor={Li Siyao and Yuhang Li and Bo Li and Chao Dong and Ziwei Liu and Chen Change Loy},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=04OPxj0jGN_}\n}", "github": "", "project": "", "reviewers": "BxAN;HfEh;3aN8;uRTc;WKh3", "pdf_size": 7857592, "rating": "5;6;7;7;8", "confidence": "3;4;4;3;3", "wc_summary_and_contributions": "38;59;64;62;86", "wc_strengths": "27;44;152;106;202", "wc_weaknesses": "65;218;381;223;117", "wc_correctness": "41;7;50;1;56", "wc_clarity": "641;1;13;13;16", "wc_relation_to_prior_work": "18;15;150;1;23", "wc_documentation": "135;1;20;43;24", "wc_additional_feedback": "42;35;567;25;9", "wc_review": "1007;380;1397;474;533", "wc_reply_reviewers": "0;34;160;92;38", "wc_reply_authors": "159;569;349;725;439", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 61.8, 15.26302722267113 ], "wc_strengths_avg": [ 106.2, 65.44738344655192 ], "wc_weaknesses_avg": [ 200.8, 108.37416666346274 ], "wc_correctness_avg": [ 31.0, 22.63625410707346 ], "wc_clarity_avg": [ 136.8, 252.15265217720793 ], "wc_relation_to_prior_work_avg": [ 41.4, 54.78905000088977 ], "wc_documentation_avg": [ 44.6, 47.12791105067145 ], "wc_additional_feedback_avg": [ 135.6, 215.98481428100447 ], "wc_review_avg": [ 758.2, 385.8722068250058 ], "wc_reply_reviewers_avg": [ 64.8, 55.97999642729535 ], "wc_reply_authors_avg": [ 448.2, 192.2356886740857 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.08006407690254361, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2206932835628309531&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ntu.edu.sg;siat.ac.cn;ntu.edu.sg;siat.ac.cn;ntu.edu.sg;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;1;0;1;0;0", "aff_unique_norm": "Nanyang Technological University;Chinese Academy of Sciences", "aff_unique_dep": ";Shenzhen Institutes of Advanced Technology", "aff_unique_url": "https://www.ntu.edu.sg;http://www.cas.cn", "aff_unique_abbr": "NTU;CAS", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Identifying good directions to escape the NTK regime and efficiently learn low-degree plus sparse polynomials", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53708", "id": "052QkenIdSI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5d6ae8ba43ecb378030753c4408ef9bd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=052QkenIdSI", "openreview": "https://openreview.net/forum?id=052QkenIdSI", "poster": "/media/PosterPDFs/NeurIPS%202022/53708.png?t=1669619848.7166128", "slides": "https://nips.cc/virtual/2022/poster/53708", "video": "https://nips.cc/virtual/2022/poster/53708", "author_site": "Eshaan Nichani, Yu Bai, Jason Lee", "tldr": "We investigate which directions the parameters of a two-layer neural network can move in to escape the NTK regime, and show that a network trained with a regularized loss can learn low-degree plus sparse polynomials with optimal sample complexity.", "abstract": "A recent goal in the theory of deep learning is to identify how neural networks can escape the \u201clazy training,\u201d or Neural Tangent Kernel (NTK) regime, where the network is coupled with its first order Taylor expansion at initialization. While the NTK is minimax optimal for learning dense polynomials (Ghorbani et al, 2021), it cannot learn features, and hence has poor sample complexity for learning many classes of functions including sparse polynomials. Recent works have thus aimed to identify settings where gradient based algorithms provably generalize better than the NTK. One such example is the \u201cQuadNTK\u201d approach of Bai & Lee (2020), which analyzes the second-order term in the Taylor expansion. Bai & Lee (2020) show that the second-order term can learn sparse polynomials efficiently; however, it sacrifices the ability to learn general dense polynomials.\n\nIn this paper, we analyze how gradient descent on a two-layer neural network can escape the NTK regime by utilizing a spectral characterization of the NTK (Montanari & Zhong, 2020) and building on the QuadNTK approach. We first expand upon the spectral analysis to identify \u201cgood\u201d directions in parameter space in which we can move without harming generalization. Next, we show that a wide two-layer neural network can jointly use the NTK and QuadNTK to fit target functions consisting of a dense low-degree term and a sparse high-degree term -- something neither the NTK nor the QuadNTK can do on their own. Finally, we construct a regularizer which encourages the parameter vector to move in the \u201cgood\" directions, and show that gradient descent on the regularized loss will converge to a global minimizer, which also has low test error. This yields an end to end convergence and generalization guarantee with provable sample complexity improvement over both the NTK and QuadNTK on their own.", "keywords": "deep learning theory;neural tangent kernel;beyond NTK;optimization landscape;learning polynomials", "primary_area": "", "supplementary_material": "/attachment/cf83e408617dbc8d55cc04a5cfa83cbe2b5ab79f.zip", "author": "Eshaan Nichani;Yu Bai;Jason D. Lee", "authorids": "~Eshaan_Nichani1;~Yu_Bai1;~Jason_D._Lee1", "gender": ";;M", "homepage": "https://eshaannichani.com/;https://yubai.org;https://jasondlee88.github.io/", "dblp": "260/6510;03/6325-17.html;88/3262", "google_scholar": ";owqhKD8AAAAJ;GR_DsT0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Eshaan_Nichani1;~Yu_Bai1;~Jason_D._Lee1", "aff": "Princeton University;Salesforce Research;Princeton University", "aff_domain": "princeton.edu;salesforce.com;princeton.edu", "position": "PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nnichani2022identifying,\ntitle={Identifying good directions to escape the {NTK} regime and efficiently learn low-degree plus sparse polynomials },\nauthor={Eshaan Nichani and Yu Bai and Jason D. Lee},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=052QkenIdSI}\n}", "github": "", "project": "", "reviewers": "g4sF;nemz;t53d", "pdf_size": 8765876, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "3;4;4", "contribution": "3;2;3", "wc_summary": "55;172;101", "wc_strengths_and_weaknesses": "280;550;264", "wc_questions": "5;3;6", "wc_limitations": "5;24;27", "wc_review": "345;749;398", "wc_reply_reviewers": "278;356;35", "wc_reply_authors": "821;1465;158", "reply_reviewers": "1;2;1", "reply_authors": "2;3;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 109.33333333333333, 48.12714641678044 ], "wc_strengths_and_weaknesses_avg": [ 364.6666666666667, 131.21314297313697 ], "wc_questions_avg": [ 4.666666666666667, 1.247219128924647 ], "wc_limitations_avg": [ 18.666666666666668, 9.741092797468305 ], "wc_review_avg": [ 497.3333333333333, 179.26578653558582 ], "wc_reply_reviewers_avg": [ 223.0, 136.69674465765453 ], "wc_reply_authors_avg": [ 814.6666666666666, 533.599308678546 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9098044485141039309&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "princeton.edu;salesforce.com;princeton.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Princeton University;Salesforce", "aff_unique_dep": ";Salesforce Research", "aff_unique_url": "https://www.princeton.edu;https://research.salesforce.com", "aff_unique_abbr": "Princeton;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Class $H$-Consistency Bounds", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52988", "id": "06OVtS901hF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/051f3997af1dd65da8e14397b6a72f8e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=06OVtS901hF", "openreview": "https://openreview.net/forum?id=06OVtS901hF", "poster": "/media/PosterPDFs/NeurIPS%202022/52988.png?t=1669830286.4386005", "slides": "https://nips.cc/virtual/2022/poster/52988", "video": "https://nips.cc/virtual/2022/poster/52988", "author_site": "Pranjal Awasthi, Anqi Mao, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "We present an extensive study of $H$-consistency bounds for multi-class classification. These are upper bounds on the target loss estimation error of a predictor in a hypothesis set $H$, expressed in terms of the surrogate loss estimation error of that predictor. They are stronger and more significant guarantees than Bayes-consistency, $H$-calibration or $H$-consistency, and more informative than excess error bounds derived for $H$ being the family of all measurable functions. We give a series of new $H$-consistency bounds for surrogate multi-class losses, including max losses, sum losses, and constrained losses, both in the non-adversarial and adversarial cases, and for different differentiable or convex auxiliary functions used. We also prove that no non-trivial $H$-consistency bound can be given in some cases. To our knowledge, these are the first $H$-consistency bounds proven for the multi-class setting. Our proof techniques are also novel and likely to be useful in the analysis of other such guarantees.", "keywords": "multi-class classification;consistency;surrogate losses;adversarial learning", "primary_area": "", "supplementary_material": "/attachment/33862f27f2aecbbef9b34596b2f492744846870a.pdf", "author": "Pranjal Awasthi;Anqi Mao;Mehryar Mohri;Yutao Zhong", "authorids": "~Pranjal_Awasthi3;~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": ";F;M;", "homepage": "https://www.cs.rutgers.edu/~pa336/;https://anqi-mao.github.io;https://cs.nyu.edu/~mohri/;", "dblp": "57/679;241/6864;03/5448;51/3178-2", "google_scholar": ";nkjIZ-oAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;;", "linkedin": ";;mehryar-mohri-3737b981/;", "or_profile": "~Pranjal_Awasthi3;~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Rutgers University;Courant Institute of Mathematical Sciences, NYU;Google Research;Google", "aff_domain": "rutgers.edu;cims.nyu.edu;google.com;google.com", "position": "Assistant Professor;PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nawasthi2022multiclass,\ntitle={Multi-Class \\$H\\$-Consistency Bounds},\nauthor={Pranjal Awasthi and Anqi Mao and Mehryar Mohri and Yutao Zhong},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=06OVtS901hF}\n}", "github": "", "project": "", "reviewers": "haLt;UKY2;HTLU", "pdf_size": 371521, "rating": "4;7;7", "confidence": "3;1;3", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;3;4", "contribution": "2;3;3", "wc_summary": "57;117;72", "wc_strengths_and_weaknesses": "100;129;92", "wc_questions": "29;1;86", "wc_limitations": "1;1;14", "wc_review": "187;248;264", "wc_reply_reviewers": "0;0;32", "wc_reply_authors": "733;345;574", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.0, 25.495097567963924 ], "wc_strengths_and_weaknesses_avg": [ 107.0, 15.895492023421818 ], "wc_questions_avg": [ 38.666666666666664, 35.3679076125361 ], "wc_limitations_avg": [ 5.333333333333333, 6.128258770283413 ], "wc_review_avg": [ 233.0, 33.1762967593833 ], "wc_reply_reviewers_avg": [ 10.666666666666666, 15.084944665313014 ], "wc_reply_authors_avg": [ 550.6666666666666, 159.25730403623217 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5000000000000001, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9760156501356992220&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "rutgers.edu;cims.nyu.edu;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Rutgers University;New York University;Google", "aff_unique_dep": ";Courant Institute of Mathematical Sciences;Google Research", "aff_unique_url": "https://www.rutgers.edu;https://www.courant.nyu.edu;https://research.google", "aff_unique_abbr": "Rutgers;NYU;Google Research", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";New York;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55018", "id": "08Yk-n5l2Al", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ec795aeadae0b7d230fa35cbaf04c041-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=08Yk-n5l2Al", "openreview": "https://openreview.net/forum?id=08Yk-n5l2Al", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55018", "video": "https://nips.cc/virtual/2022/poster/55018", "author_site": "Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, Jonathan Ho, David Fleet, Mohammad Norouzi", "tldr": "We present Imagen, a simple approach to text-to-image synthesis using diffusion models.", "abstract": "We present Imagen, a text-to-image diffusion model with an unprecedented degree of photorealism and a deep level of language understanding. Imagen builds on the power of large transformer language models in understanding text and hinges on the strength of diffusion models in high-fidelity image generation. Our key discovery is that generic large language models (e.g., T5), pretrained on text-only corpora, are surprisingly effective at encoding text for image synthesis: increasing the size of the language model in Imagen boosts both sample fidelity and image-text alignment much more than increasing the size of the image diffusion model. Imagen achieves a new state-of-the-art FID score of 7.27 on the COCO dataset, without ever training on COCO, and human raters find Imagen samples to be on par with the COCO data itself in image-text alignment. To assess text-to-image models in greater depth, we introduce DrawBench, a comprehensive and challenging benchmark for text-to-image models. With DrawBench, we compare Imagen with recent methods including VQ-GAN+CLIP, Latent Diffusion Models, and DALL-E 2, and find that human raters prefer Imagen over other models in side-by-side comparisons, both in terms of sample quality and image-text alignment.", "keywords": "text-to-image;generative models;diffusion models", "primary_area": "", "supplementary_material": "/attachment/09a62c23c50cb8f4b77670e9db4bcdacaeaa30a9.pdf", "author": "Chitwan Saharia;William Chan;Saurabh Saxena;Lala Li;Jay Whang;Emily Denton;Seyed Kamyar Seyed Ghasemipour;Raphael Gontijo-Lopes;Burcu Karagol Ayan\u200e;Tim Salimans;Jonathan Ho;David J. Fleet;Mohammad Norouzi", "authorids": "~Chitwan_Saharia1;~William_Chan1;~Saurabh_Saxena1;~Lala_Li1;~Jay_Whang1;~Emily_Denton2;~Seyed_Kamyar_Seyed_Ghasemipour1;~Raphael_Gontijo-Lopes1;burcuka@google.com;~Tim_Salimans1;~Jonathan_Ho1;~David_J._Fleet1;~Mohammad_Norouzi1", "gender": "M;;M;;;Non-Binary;M;;;M;;M;M", "homepage": "https://www.chitwansaharia.github.io;http://williamchan.ca;;;;https://www.cephaloponderer.com/;http://www.cs.utoronto.ca/~kamyar/;;;;;http://www.cs.toronto.edu/~fleet/index.html;https://norouzi.github.io/", "dblp": "228/8172;58/2301;;49/7563;;;238/2555;;;116/2791;80/8677;07/2099;https://dblp.org/pers/hd/n/Norouzi_0002:Mohammad", "google_scholar": ";Nla9qfUAAAAJ;WTz38osAAAAJ;;;;LHvso9QAAAAJ;;;;iVLAQysAAAAJ;https://scholar.google.com.tw/citations?user=njOmQFsAAAAJ;Lncr-VoAAAAJ", "orcid": ";;;;;;;;;;;;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Chitwan_Saharia1;~William_Chan1;~Saurabh_Saxena1;~Lala_Li1;~Jay_Whang1;~Emily_Denton2;~Seyed_Kamyar_Seyed_Ghasemipour1;~Raphael_Gontijo-Lopes1;burcuka@google.com;~Tim_Salimans1;~Jonathan_Ho1;~David_J._Fleet1;~Mohammad_Norouzi1", "aff": "Google;Google Brain;Google;Google;;Google;Google DeepMind Robotics;;;Google;Google;Department of Computer Science, University of Toronto;Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;;google.com;google.com;;;google.com;google.com;cs.toronto.edu;google.com", "position": "AI Resident;Research Scientist;Researcher;Software Engineer;;Research Scientist;Student Researcher;;;Research Scientist;Researcher;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nsaharia2022photorealistic,\ntitle={Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding},\nauthor={Chitwan Saharia and William Chan and Saurabh Saxena and Lala Li and Jay Whang and Emily Denton and Seyed Kamyar Seyed Ghasemipour and Raphael Gontijo-Lopes and Burcu Karagol Ayan\u200e and Tim Salimans and Jonathan Ho and David J. Fleet and Mohammad Norouzi},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=08Yk-n5l2Al}\n}", "github": "", "project": "", "reviewers": "9tx5;EZNR;uoqR", "pdf_size": 3081347, "rating": "5;7;8", "confidence": "5;4;4", "soundness": "2;4;4", "novelty": "2;3;4", "presentation": "3;4;4", "contribution": "2;3;4", "wc_summary": "57;77;589", "wc_strengths_and_weaknesses": "134;150;5", "wc_questions": "161;8;5", "wc_limitations": "87;4;5", "wc_review": "439;239;604", "wc_reply_reviewers": "131;88;27", "wc_reply_authors": "793;373;342", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 241.0, 246.20858365757005 ], "wc_strengths_and_weaknesses_avg": [ 96.33333333333333, 64.91190611556216 ], "wc_questions_avg": [ 58.0, 72.84229540589725 ], "wc_limitations_avg": [ 32.0, 38.89301565405628 ], "wc_review_avg": [ 427.3333333333333, 149.23880937015753 ], "wc_reply_reviewers_avg": [ 82.0, 42.66927075386533 ], "wc_reply_authors_avg": [ 502.6666666666667, 205.68638479220954 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 6404, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2130901831690841916&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 12, "email": "google.com;google.com;google.com;google.com;;google.com;google.com;;;google.com;google.com;cs.toronto.edu;google.com", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;0;0;1;0", "aff_unique_norm": "Google;University of Toronto", "aff_unique_dep": "Google;Department of Computer Science", "aff_unique_url": "https://www.google.com;https://www.utoronto.ca", "aff_unique_abbr": "Google;U of T", "aff_campus_unique_index": "0;0;0;0;0;0;0;2;0", "aff_campus_unique": "Mountain View;;Toronto", "aff_country_unique_index": "0;0;0;0;0;1;0;0;2;0", "aff_country_unique": "United States;United Kingdom;Canada" }, { "title": "Statistical Learning and Inverse Problems: A Stochastic Gradient Approach", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52776", "id": "09QFnDWPF8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3e8b1835833ef809059efa74b9df6805-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=09QFnDWPF8", "openreview": "https://openreview.net/forum?id=09QFnDWPF8", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/52776", "video": "https://nips.cc/virtual/2022/poster/52776", "author_site": "Yuri Fonseca, Yuri Saporito", "tldr": "An algorithm based on stochastic gradient descent for solving linear Inverse Problems under a statistical learning framework.", "abstract": "Inverse problems are paramount in Science and Engineering. In this paper, we consider the setup of Statistical Inverse Problem (SIP) and demonstrate how Stochastic Gradient Descent (SGD) algorithms can be used to solve linear SIP. We provide consistency and finite sample bounds for the excess risk. We also propose a modification for the SGD algorithm where we leverage machine learning methods to smooth the stochastic gradients and improve empirical performance. We exemplify the algorithm in a setting of great interest nowadays: the Functional Linear Regression model. In this case we consider a synthetic data example and a classification problem for predicting the main activity of bitcoin addresses based on their balances. ", "keywords": "Statistical Learning;Inverse Problems;Stochastic Gradient Descent", "primary_area": "", "supplementary_material": "/attachment/dd39daf7aeb301b2e239948692ae20503a69149c.zip", "author": "Yuri Fonseca;Yuri Saporito", "authorids": "~Yuri_Fonseca1;~Yuri_Saporito1", "gender": ";M", "homepage": ";https://www.yurisaporito.com", "dblp": "295/9443;", "google_scholar": "https://scholar.google.com.br/citations?user=hr1PnUkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Yuri_Fonseca1;~Yuri_Saporito1", "aff": "Columbia University;FGV EMAp", "aff_domain": "columbia.edu;emap.fgv.br", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfonseca2022statistical,\ntitle={Statistical Learning and Inverse Problems: A Stochastic Gradient Approach},\nauthor={Yuri Fonseca and Yuri Saporito},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=09QFnDWPF8}\n}", "github": "", "project": "", "reviewers": "ea5U;Vx5e;m9Kg", "pdf_size": 396687, "rating": "5;7;8", "confidence": "3;3;3", "soundness": "2;3;4", "novelty": "2;2;3", "presentation": "2;3;4", "contribution": "2;2;3", "wc_summary": "172;96;196", "wc_strengths_and_weaknesses": "190;72;38", "wc_questions": "265;26;492", "wc_limitations": "22;20;149", "wc_review": "649;214;875", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "439;378;599", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 154.66666666666666, 42.62497963505541 ], "wc_strengths_and_weaknesses_avg": [ 100.0, 65.13575566972925 ], "wc_questions_avg": [ 261.0, 190.26472785744252 ], "wc_limitations_avg": [ 63.666666666666664, 60.34530268012214 ], "wc_review_avg": [ 579.3333333333334, 274.31166378571817 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 472.0, 93.19155898828319 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5799723726138023427&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "columbia.edu;emap.fgv.br", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;Funda\u00e7\u00e3o Getulio Vargas", "aff_unique_dep": ";Escola de Matem\u00e1tica Aplicada", "aff_unique_url": "https://www.columbia.edu;https://www.fgv.br", "aff_unique_abbr": "Columbia;FGV", "aff_campus_unique_index": "1", "aff_campus_unique": ";S\u00e3o Paulo", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Brazil" }, { "title": "Equivariant Networks for Crystal Structures", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52781", "id": "0Dh8dz4snu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1abed6ee581b9ceb4e2ddf37822c7fcb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Dh8dz4snu", "openreview": "https://openreview.net/forum?id=0Dh8dz4snu", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/52781", "video": "https://nips.cc/virtual/2022/poster/52781", "author_site": "Oumar Kaba, Siamak Ravanbakhsh", "tldr": "A deep model for materials", "abstract": "Supervised learning with deep models has tremendous potential for applications in materials science. Recently, graph neural networks have been used in this context, drawing direct inspiration from models for molecules. However, materials are typically much more structured than molecules, which is a feature that these models do not leverage. In this work, we introduce a class of models that are equivariant with respect to crystalline symmetry groups. We do this by defining a generalization of the message passing operations that can be used with more general permutation groups, or that can alternatively be seen as defining an expressive convolution operation on the crystal graph. Empirically, these models achieve competitive results with state-of-the-art on the Materials Project dataset.", "keywords": "materials;deep learning;symmetry;equivariance;crystals;graph neural networks;geometric deep learning", "primary_area": "", "supplementary_material": "/attachment/ee5d6c2e0ace97eb6526b8de0fcc736066b2c69f.pdf", "author": "S\u00e9kou-Oumar Kaba;Siamak Ravanbakhsh", "authorids": "~S\u00e9kou-Oumar_Kaba1;~Siamak_Ravanbakhsh1", "gender": "M;", "homepage": "https://oumarkaba.github.io;", "dblp": "279/3144;", "google_scholar": "https://scholar.google.ca/citations?user=jKqh8jAAAAAJ;", "orcid": "0000-0002-7258-4696;", "linkedin": "oumar-kaba/;", "or_profile": "~S\u00e9kou-Oumar_Kaba1;~Siamak_Ravanbakhsh1", "aff": "McGill University;", "aff_domain": "mcgill.ca;", "position": "PhD student;", "bibtex": "@inproceedings{\nkaba2022equivariant,\ntitle={Equivariant Networks for Crystal Structures},\nauthor={S{\\'e}kou-Oumar Kaba and Siamak Ravanbakhsh},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0Dh8dz4snu}\n}", "github": "", "project": "", "reviewers": "PzGa;MuHL;os5D", "pdf_size": 10197566, "rating": "4;6;7", "confidence": "2;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;4", "contribution": "2;3;3", "wc_summary": "105;147;88", "wc_strengths_and_weaknesses": "226;222;390", "wc_questions": "162;683;128", "wc_limitations": "2;1;2", "wc_review": "495;1053;608", "wc_reply_reviewers": "0;0;38", "wc_reply_authors": "383;810;607", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 113.33333333333333, 24.796953217863052 ], "wc_strengths_and_weaknesses_avg": [ 279.3333333333333, 78.27018731434225 ], "wc_questions_avg": [ 324.3333333333333, 253.995188055907 ], "wc_limitations_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_review_avg": [ 718.6666666666666, 240.86833655662497 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 17.913371790059205 ], "wc_reply_authors_avg": [ 600.0, 174.39227811651142 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2066000916718401741&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mcgill.ca;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Supervising the Multi-Fidelity Race of Hyperparameter Configurations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53171", "id": "0Fe7bAWmJr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/57b694fef23ae7b9308eb4d46342595d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Fe7bAWmJr", "openreview": "https://openreview.net/forum?id=0Fe7bAWmJr", "poster": "/media/PosterPDFs/NeurIPS%202022/97e49161287e7a4f9b745366e4f9431b.png?t=1666878088.233249", "slides": "https://nips.cc/virtual/2022/poster/53171", "video": "https://nips.cc/virtual/2022/poster/53171", "author_site": "Martin Wistuba, Arlind Kadra, Josif Grabocka", "tldr": "Efficient hyperparameter optimization by dynamically supervising the race of competing hyperparameter configurations.", "abstract": "Multi-fidelity (gray-box) hyperparameter optimization techniques (HPO) have recently emerged as a promising direction for tuning Deep Learning methods. However, existing methods suffer from a sub-optimal allocation of the HPO budget to the hyperparameter configurations. In this work, we introduce DyHPO, a Bayesian Optimization method that learns to decide which hyperparameter configuration to train further in a dynamic race among all feasible configurations. We propose a new deep kernel for Gaussian Processes that embeds the learning curve dynamics, and an acquisition function that incorporates multi-budget information. We demonstrate the significant superiority of DyHPO against state-of-the-art hyperparameter optimization methods through large-scale experiments comprising 50 datasets (Tabular, Image, NLP) and diverse architectures (MLP, CNN/NAS, RNN).", "keywords": "hyperparameter optimization", "primary_area": "", "supplementary_material": "/attachment/877fcf8e9f309c1fb42ac6a16727a50c11ef91ab.pdf", "author": "Martin Wistuba;Arlind Kadra;Josif Grabocka", "authorids": "~Martin_Wistuba1;~Arlind_Kadra1;~Josif_Grabocka1", "gender": "M;M;M", "homepage": ";;https://www.utn.de/departments/department-engineering/machine-learning-lab/", "dblp": "https://dblp.uni-trier.de/pers/hd/w/Wistuba:Martin;252/5295;117/4936", "google_scholar": "https://scholar.google.co.uk/citations?user=pTULHVsAAAAJ;bMa0KUcAAAAJ;KRy27XcAAAAJ", "orcid": ";0000-0001-9308-6576;", "linkedin": "https://linkedin.com/in/wistuba/;;", "or_profile": "~Martin_Wistuba1;~Arlind_Kadra1;~Josif_Grabocka1", "aff": "Amazon;Universit\u00e4t Freiburg;Universit\u00e4t Freiburg", "aff_domain": "amazon.com;uni-freiburg.de;uni-freiburg.de", "position": "Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwistuba2022supervising,\ntitle={Supervising the Multi-Fidelity Race of Hyperparameter Configurations},\nauthor={Martin Wistuba and Arlind Kadra and Josif Grabocka},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0Fe7bAWmJr}\n}", "github": "", "project": "", "reviewers": "NJxE;1bWR;XM9p;MHNh", "pdf_size": 6375852, "rating": "6;7;7;8", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "contribution": "3;3;3;4", "wc_summary": "73;101;15;73", "wc_strengths_and_weaknesses": "327;252;20;388", "wc_questions": "64;408;11;30", "wc_limitations": "44;95;1;26", "wc_review": "508;856;47;517", "wc_reply_reviewers": "306;40;14;8", "wc_reply_authors": "741;551;82;182", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 31.316928329579195 ], "wc_strengths_and_weaknesses_avg": [ 246.75, 139.49439952915674 ], "wc_questions_avg": [ 128.25, 162.62591275685435 ], "wc_limitations_avg": [ 41.5, 34.4564943080401 ], "wc_review_avg": [ 482.0, 287.66386634403705 ], "wc_reply_reviewers_avg": [ 92.0, 124.1370210694618 ], "wc_reply_authors_avg": [ 389.0, 267.9766780897173 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5887646758995146862&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "amazon.com;uni-freiburg.de;uni-freiburg.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Amazon;University of Freiburg", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.uni-freiburg.de", "aff_unique_abbr": "Amazon;Uni Freiburg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Germany" }, { "title": "A Fast Post-Training Pruning Framework for Transformers", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54447", "id": "0GRBKLBjJE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/987bed997ab668f91c822a09bce3ea12-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0GRBKLBjJE", "openreview": "https://openreview.net/forum?id=0GRBKLBjJE", "poster": "/media/PosterPDFs/NeurIPS%202022/54447.png?t=1669873890.2661953", "slides": "https://nips.cc/virtual/2022/poster/54447", "video": "https://nips.cc/virtual/2022/poster/54447", "author_site": "Woosuk Kwon, Sehoon Kim, Michael Mahoney, Joseph Hassoun, Kurt Keutzer, Amir Gholami", "tldr": "We propose a framework to automatically prune Transformers in a few minutes.", "abstract": "Pruning is an effective way to reduce the huge inference cost of Transformer models. However, prior work on pruning Transformers requires retraining the models. This can add high training cost and high complexity to model deployment, making it difficult to use in many practical situations. To address this, we propose a fast post-training pruning framework for Transformers that does not require any retraining. Given a resource constraint and a sample dataset, our framework automatically prunes the Transformer model using structured sparsity methods. To retain high accuracy without retraining, we introduce three novel techniques: (i) a lightweight mask search algorithm that finds which heads and filters to prune based on the Fisher information; (ii) mask rearrangement that complements the search algorithm; and (iii) mask tuning that reconstructs the output activations for each layer. We apply our method to BERT-base and DistilBERT, and we evaluate its effectiveness on GLUE and SQuAD benchmarks. Our framework achieves up to 2.0x reduction in FLOPs and 1.56x speedup in inference latency, while maintaining < 1% loss in accuracy. Importantly, our framework prunes Transformers in less than 3 minutes on a single GPU, which is over two orders of magnitude faster than existing pruning approaches that retrain the models.", "keywords": "Pruning;Compression;Transformers", "primary_area": "", "supplementary_material": "/attachment/5904abdd4ec605f7d18ce2efb119b9c90302f1bd.pdf", "author": "Woosuk Kwon;Sehoon Kim;Michael W. Mahoney;Joseph Hassoun;Kurt Keutzer;Amir Gholami", "authorids": "~Woosuk_Kwon1;~Sehoon_Kim1;~Michael_W._Mahoney1;~Joseph_Hassoun1;~Kurt_Keutzer1;~Amir_Gholami2", "gender": "M;M;;M;M;", "homepage": ";https://sehoonkim.org;;https://www.linkedin.com/in/joseph-hassoun/;https://people.eecs.berkeley.edu/~keutzer/;", "dblp": ";;;;k/KurtKeutzer.html;", "google_scholar": "_AT3eUcAAAAJ;zQABr7QAAAAJ;;https://scholar.google.com/citations?hl=en;ID9QePIAAAAJ;", "orcid": ";;;;0000-0003-3868-8501;", "linkedin": ";sehoon-kim-13a1b51b1/;;joseph-hassoun/;kurtkeutzer/;", "or_profile": "~Woosuk_Kwon1;~Sehoon_Kim1;~Michael_W._Mahoney1;~Joseph_Hassoun1;~Kurt_Keutzer1;~Amir_Gholami2", "aff": "University of California, Berkeley;University of California, Berkeley;;;University of California, Berkeley;", "aff_domain": "berkeley.edu;berkeley.edu;;;berkeley.edu;", "position": "PhD student;PhD student;;;Full Professor;", "bibtex": "@inproceedings{\nkwon2022a,\ntitle={A Fast Post-Training Pruning Framework for Transformers},\nauthor={Woosuk Kwon and Sehoon Kim and Michael W. Mahoney and Joseph Hassoun and Kurt Keutzer and Amir Gholami},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0GRBKLBjJE}\n}", "github": "", "project": "", "reviewers": "Lp5s;hRDe;92Qy;CMhk", "pdf_size": 1096063, "rating": "6;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;4;3", "contribution": "2;3;3;3", "wc_summary": "102;70;60;66", "wc_strengths_and_weaknesses": "56;226;68;73", "wc_questions": "264;47;43;19", "wc_limitations": "8;26;16;4", "wc_review": "430;369;187;162", "wc_reply_reviewers": "12;18;12;0", "wc_reply_authors": "1012;627;111;299", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.5, 16.27114009527298 ], "wc_strengths_and_weaknesses_avg": [ 105.75, 69.70069942260264 ], "wc_questions_avg": [ 93.25, 99.16242988148284 ], "wc_limitations_avg": [ 13.5, 8.411301920630361 ], "wc_review_avg": [ 287.0, 114.88907693945495 ], "wc_reply_reviewers_avg": [ 10.5, 6.5383484153110105 ], "wc_reply_authors_avg": [ 512.25, 342.5619469526643 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8295752471626103240&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "berkeley.edu;berkeley.edu;;;berkeley.edu;", "author_num": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Knowledge Distillation: Bad Models Can Be Good Role Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54011", "id": "0ISChqjlrq", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b88edf805e96654a4f9e7b783e854ae3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0ISChqjlrq", "openreview": "https://openreview.net/forum?id=0ISChqjlrq", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54011", "video": "https://nips.cc/virtual/2022/poster/54011", "author_site": "Gal Kaplun, Eran Malach, Preetum Nakkiran, Shai Shalev-Shwartz", "tldr": "We show that a model trained on noisy data can be a good teacher when unlabeled data is ample even when the teacher has noisy predictions.", "abstract": "Large neural networks trained in the overparameterized regime are able to fit noise to zero train error. Recent work of Nakkiran and Bansal has empirically observed that such networks behave as \u201cconditional samplers\u201d from the noisy distribution. That is, they replicate the noise in the train data to unseen examples. We give a theoretical framework for studying this conditional sampling behavior in the context of learning theory. We relate the notion of such samplers to knowledge distillation, where a student network imitates the outputs of a teacher on unlabeled data. We show that samplers, while being bad classifiers, can be good teachers. Concretely, we prove that distillation from samplers is guaranteed to produce a student which approximates the Bayes optimal classifier. Finally, we show that some common learning algorithms (e.g., Nearest-Neighbours and Kernel Machines) can often generate samplers when applied in the overparameterized regime.", "keywords": "Knowledge Distillation;Teacher-Student;Learning Theory;Learning to Sample;Ensembling", "primary_area": "", "supplementary_material": "/attachment/b47d6908f6dd27fa12173b82b333c10ac4c51e69.pdf", "author": "Gal Kaplun;eran malach;Preetum Nakkiran;Shai Shalev-Shwartz", "authorids": "~Gal_Kaplun1;~eran_malach1;~Preetum_Nakkiran1;~Shai_Shalev-Shwartz1", "gender": "M;M;;M", "homepage": "http://www.galkaplun.com;;http://preetum.nakkiran.org;http://www.cs.huji.ac.il/~shais/", "dblp": "237/9816;202/2566;151/6343;95/2750", "google_scholar": "y4BzFYsAAAAJ;I15dUOwAAAAJ;zithBbUAAAAJ;https://scholar.google.co.il/citations?user=uYVc9koAAAAJ", "orcid": ";;;", "linkedin": "gal-kaplun-865496151/;;;", "or_profile": "~Gal_Kaplun1;~eran_malach1;~Preetum_Nakkiran1;~Shai_Shalev-Shwartz1", "aff": "Harvard University;Hebrew University of Jerusalem, Israel;University of California, San Diego;Hebrew University, Hebrew University of Jerusalem", "aff_domain": "harvard.edu;huji.ac.il;ucsd.edu;cs.huji", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkaplun2022knowledge,\ntitle={Knowledge Distillation: Bad Models Can Be Good Role Models},\nauthor={Gal Kaplun and eran malach and Preetum Nakkiran and Shai Shalev-Shwartz},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0ISChqjlrq}\n}", "github": "", "project": "", "reviewers": "mfji;5UHV;ANZb;Yejy", "pdf_size": 677972, "rating": "6;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;2;3", "contribution": "3;2;2;3", "wc_summary": "75;113;96;290", "wc_strengths_and_weaknesses": "427;72;88;51", "wc_questions": "42;260;173;26", "wc_limitations": "42;34;3;7", "wc_review": "586;479;360;374", "wc_reply_reviewers": "0;23;0;0", "wc_reply_authors": "401;183;330;118", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 143.5, 85.64607404896036 ], "wc_strengths_and_weaknesses_avg": [ 159.5, 154.99758062627944 ], "wc_questions_avg": [ 125.25, 96.46080810360236 ], "wc_limitations_avg": [ 21.5, 16.80029761641144 ], "wc_review_avg": [ 449.75, 91.12182779115003 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 258.0, 112.75859169038961 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7217837594518942513&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "harvard.edu;huji.ac.il;ucsd.edu;cs.huji", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Harvard University;Hebrew University of Jerusalem;University of California, San Diego", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.huji.ac.il;https://www.ucsd.edu", "aff_unique_abbr": "Harvard;HUJI;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Israel" }, { "id": "0IywQ8uxJx", "title": "Graph Neural Networks as Gradient Flows", "track": "main", "status": "Reject", "tldr": "Explainable framework for graph neural networks based on the gradient flow of a parametric energy function", "abstract": "Dynamical systems minimizing an energy are ubiquitous in geometry and physics. We propose a gradient flow framework for GNNs where the equations follow the direction of steepest descent of a learnable energy. This approach allows to analyse the GNN evolution from a multi-particle perspective as learning attractive and repulsive forces in feature space via the positive and negative eigenvalues of a symmetric `channel-mixing' matrix. We perform spectral analysis of the solutions and conclude that gradient flow graph convolutional models can induce a dynamics dominated by the graph high frequencies which is desirable for heterophilic datasets. We also describe structural constraints on common GNN architectures allowing to interpret them as gradient flows. We perform thorough ablation studies corroborating our theoretical analysis and show competitive performance of simple and lightweight models on real-world homophilic and heterophilic datasets.", "keywords": "Graph Neural Networks;Spectral analysis;Over-smoothing;Energy;Differential equations", "primary_area": "", "supplementary_material": "/attachment/449d06c2a24a17b98942e23e0842f4d4310c1987.zip", "author": "Francesco Di Giovanni;James Rowbottom;Benjamin Paul Chamberlain;Thomas Markovich;Michael M. Bronstein", "authorids": "~Francesco_Di_Giovanni1;~James_Rowbottom1;~Benjamin_Paul_Chamberlain1;~Thomas_Markovich1;~Michael_M._Bronstein1", "gender": "M;;M;;M", "homepage": "https://francescodgv.github.io/;;;http://thomasmarkovich.com;http://www.inf.usi.ch/bronstein/", "dblp": ";295/8782;;;07/2668", "google_scholar": "yzjjeqsAAAAJ;;https://scholar.google.co.uk/citations?user=Tr8LSOEAAAAJ;;UU3N6-UAAAAJ", "orcid": ";;;;", "linkedin": ";https://linkedin.com/in/jamesrowbottom;;;mbronstein/", "or_profile": "~Francesco_Di_Giovanni1;~James_Rowbottom1;~Benjamin_Paul_Chamberlain1;~Thomas_Markovich1;~Michael_M._Bronstein1", "aff": "Twitter;University of Cambridge;Twitter;Twitter;Twitter", "aff_domain": "twitter.com;cam.ac.uk;twitter.com;twitter.com;twitter.com", "position": "Postdoc;PhD student;ML Researcher;Researcher;Head of Graph ML", "bibtex": "@misc{\ngiovanni2022graph,\ntitle={Graph Neural Networks as Gradient Flows},\nauthor={Francesco Di Giovanni and James Rowbottom and Benjamin Paul Chamberlain and Thomas Markovich and Michael M. Bronstein},\nyear={2022},\nurl={https://openreview.net/forum?id=0IywQ8uxJx}\n}", "github": "", "project": "", "reviewers": "4YFR;13DY;PPbd;c28L", "site": "https://openreview.net/forum?id=0IywQ8uxJx", "pdf_size": 709990, "rating": "4;4;6;6", "confidence": "3;5;4;4", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "contribution": "2;2;3;3", "wc_summary": "53;54;130;102", "wc_strengths_and_weaknesses": "95;410;652;98", "wc_questions": "104;697;113;43", "wc_limitations": "54;1;1;12", "wc_review": "306;1162;896;255", "wc_reply_reviewers": "289;521;352;0", "wc_reply_authors": "1158;4345;1967;532", "reply_reviewers": "1;1;1;0", "reply_authors": "2;6;4;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.75, 32.78242669480098 ], "wc_strengths_and_weaknesses_avg": [ 313.75, 233.4934420920639 ], "wc_questions_avg": [ 239.25, 265.6504987761175 ], "wc_limitations_avg": [ 17.0, 21.828879952943073 ], "wc_review_avg": [ 654.75, 386.30646836417327 ], "wc_reply_reviewers_avg": [ 290.5, 187.95278662472657 ], "wc_reply_authors_avg": [ 2000.5, 1446.0377761317302 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Twitter, Inc.;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://twitter.com;https://www.cam.ac.uk", "aff_unique_abbr": "Twitter;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Bringing Image Scene Structure to Video via Frame-Clip Consistency of Object Tokens", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55119", "id": "0JV4VVBsK6a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/abc1943857a42935ceacff03c524bb44-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0JV4VVBsK6a", "openreview": "https://openreview.net/forum?id=0JV4VVBsK6a", "poster": "/media/PosterPDFs/NeurIPS%202022/55119.png?t=1668329824.8076937", "slides": "https://nips.cc/virtual/2022/poster/55119", "video": "https://nips.cc/virtual/2022/poster/55119", "author_site": "Elad Ben Avraham, Roei Herzig, Karttikeya Mangalam, Amir Bar, Anna Rohrbach, Leonid Karlinsky, Trevor Darrell, Amir Globerson", "tldr": "This paper presents a framework and model that demonstrates how to leverage image structure from a small set of images available during training to facilitate video learning within or outside of the domain of interest.", "abstract": "Recent action recognition models have achieved impressive results by integrating objects, their locations and interactions. However, obtaining dense structured annotations for each frame is tedious and time-consuming, making these methods expensive to train and less scalable. At the same time, if a small set of annotated images is available, either within or outside the domain of interest, how could we leverage these for a video downstream task? We propose a learning framework StructureViT (SViT for short), which demonstrates how utilizing the structure of a small number of images only available during training can improve a video model. SViT relies on two key insights. First, as both images and videos contain structured information, we enrich a transformer model with a set of object tokens that can be used across images and videos. Second, the scene representations of individual frames in video should ``align'' with those of still images. This is achieved via a Frame-Clip Consistency loss, which ensures the flow of structured information between images and videos. We explore a particular instantiation of scene structure, namely a Hand-Object Graph, consisting of hands and objects with their locations as nodes, and physical relations of contact/no-contact as edges. SViT shows strong performance improvements on multiple video understanding tasks and datasets, including the first place in the Ego4D CVPR'22 Point of No Return Temporal Localization Challenge. For code and pretrained models, visit the project page at https://eladb3.github.io/SViT/.", "keywords": "video models;object centric models;image-video", "primary_area": "", "supplementary_material": "/attachment/d3e55e315cf9182a057f6e9416307e60e6ed3ed2.pdf", "author": "Elad Ben Avraham;Roei Herzig;Karttikeya Mangalam;Amir Bar;Anna Rohrbach;Leonid Karlinsky;Trevor Darrell;Amir Globerson", "authorids": "~Elad_Ben_Avraham1;~Roei_Herzig2;~Karttikeya_Mangalam1;~Amir_Bar1;~Anna_Rohrbach1;~Leonid_Karlinsky3;~Trevor_Darrell2;~Amir_Globerson1", "gender": "M;M;M;M;F;M;M;M", "homepage": ";https://roeiherz.github.io/;http://karttikeya.github.io/;http://amirbar.net;https://anna-rohrbach.net/;;http://www.cs.tau.ac.il/~gamir/;https://people.eecs.berkeley.edu/~trevor/", "dblp": "304/3614;215/5165;200/8205;73/11011;152/5114;05/4463;08/4162.html;d/TrevorDarrell", "google_scholar": ";https://scholar.google.co.il/citations?user=6Q-289IAAAAJ;2l1fWEoAAAAJ;L__n1LUAAAAJ;https://scholar.google.de/citations?user=GHpxNQIAAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;0000-0003-1161-6006;;;", "linkedin": "elad-ben-avraham-1a07a4169/;roei-herzig-7534615a/;;;;;;", "or_profile": "~Elad_Ben_Avraham1;~Roei_Herzig2;~Karttikeya_Mangalam1;~Amir_Bar1;~Anna_Rohrbach1;~Leonid_Karlinsky3;~Amir_Globerson1;~trevor_darrell1", "aff": "Tel Aviv University;Tel Aviv University;University of California, Berkeley;Meta Facebook;University of California, Berkeley;IBM Research AI;Tel Aviv University;Electrical Engineering & Computer Science Department", "aff_domain": "tau.ac.il;tau.ac.il;berkeley.edu;fb.com;berkeley.edu;ibm.com;tau.ac.il;eecs.berkeley.edu", "position": "MS student;PhD student;PhD student;Intern;Research Scientist;Staff Research Scientist;Associate Professor;Professor", "bibtex": "@inproceedings{\navraham2022bringing,\ntitle={Bringing Image Scene Structure to Video via Frame-Clip Consistency of Object Tokens},\nauthor={Elad Ben Avraham and Roei Herzig and Karttikeya Mangalam and Amir Bar and Anna Rohrbach and Leonid Karlinsky and Trevor Darrell and Amir Globerson},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0JV4VVBsK6a}\n}", "github": "", "project": "", "reviewers": "bPRy;LCAU;DF7L;oj8q", "pdf_size": 823040, "rating": "4;5;6;6", "confidence": "4;5;4;4", "soundness": "2;3;4;3", "novelty": "1;2;3;3", "presentation": "2;3;4;4", "contribution": "1;2;3;3", "wc_summary": "75;134;112;55", "wc_strengths_and_weaknesses": "584;167;266;47", "wc_questions": "27;93;217;241", "wc_limitations": "49;16;35;30", "wc_review": "735;410;630;373", "wc_reply_reviewers": "63;418;30;0", "wc_reply_authors": "2856;1208;612;833", "reply_reviewers": "1;2;1;0", "reply_authors": "5;4;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 94.0, 30.84639363037436 ], "wc_strengths_and_weaknesses_avg": [ 266.0, 199.3025338524325 ], "wc_questions_avg": [ 144.5, 88.07241338807516 ], "wc_limitations_avg": [ 32.5, 11.800423721205947 ], "wc_review_avg": [ 537.0, 150.72989086442013 ], "wc_reply_reviewers_avg": [ 127.75, 169.05084294377238 ], "wc_reply_authors_avg": [ 1377.25, 879.9378884330416 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8783919417057800696&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "email": "tau.ac.il;tau.ac.il;berkeley.edu;fb.com;berkeley.edu;ibm.com;tau.ac.il;eecs.berkeley.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;1;3;0;4", "aff_unique_norm": "Tel Aviv University;University of California, Berkeley;Meta;IBM;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";;Meta Platforms, Inc.;AI;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.tau.ac.il;https://www.berkeley.edu;https://meta.com;https://www.ibm.com/research;", "aff_unique_abbr": "TAU;UC Berkeley;Meta;IBM;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;1;1;1;0", "aff_country_unique": "Israel;United States;" }, { "title": "NUWA-Infinity: Autoregressive over Autoregressive Generation for Infinite Visual Synthesis", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54352", "id": "0Kv7cLhuhQT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6358cd0cd6607fdf4870595795eb1710-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Kv7cLhuhQT", "openreview": "https://openreview.net/forum?id=0Kv7cLhuhQT", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54352", "video": "https://nips.cc/virtual/2022/poster/54352", "author_site": "Jian Liang, Chenfei Wu, Xiaowei Hu, Zhe Gan, Jianfeng Wang, Lijuan Wang, Zicheng Liu, Yuejian Fang, Nan Duan", "tldr": "", "abstract": " Infinite visual synthesis aims to generate high-resolution images, long-duration videos, and even visual generation of infinite size. Some recent work tried to solve this task by first dividing data into processable patches and then training the models on them without considering the dependencies between patches. However, since they fail to model global dependencies between patches, the quality and consistency of the generation can be limited. To address this issue, we propose NUWA-Infinity, a patch-level \\emph{``render-and-optimize''} strategy for infinite visual synthesis. Given a large image or a long video, NUWA-Infinity first splits it into non-overlapping patches and uses the ordered patch chain as a complete training instance, a rendering model autoregressively predicts each patch based on its contexts. Once a patch is predicted, it is optimized immediately and its hidden states are saved as contexts for the next \\emph{``render-and-optimize''} process. This brings two advantages: ($i$) The autoregressive rendering process with information transfer between contexts provides an implicit global probabilistic distribution modeling; ($ii$) The timely optimization process alleviates the optimization stress of the model and helps convergence. Based on the above designs, NUWA-Infinity shows a strong synthesis ability on high-resolution images and long-duration videos. The homepage link is \\url{https://nuwa-infinity.microsoft.com}.", "keywords": "Image synthesis;video synthesis", "primary_area": "", "supplementary_material": "/attachment/6fededa00d970b5ab88ec715699a0bf64b8199ff.pdf", "author": "Jian Liang;Chenfei Wu;Xiaowei Hu;Zhe Gan;Jianfeng Wang;Lijuan Wang;Zicheng Liu;Yuejian Fang;Nan Duan", "authorids": "~Jian_Liang5;~Chenfei_Wu2;~Xiaowei_Hu4;~Zhe_Gan1;~Jianfeng_Wang4;~Lijuan_Wang1;~Zicheng_Liu1;~Yuejian_Fang1;~Nan_Duan1", "gender": "M;M;F;M;M;F;M;M;M", "homepage": "https://github.com/Muccul;;;http://zhegan27.github.io/;;https://www.microsoft.com/en-us/research/people/lijuanw/;https://sites.google.com/view/zichengliu/home?pli=1;http://www.ss.pku.edu.cn/index.php/teacherteam/teacherlist/1612-%E6%96%B9%E8%B7%83%E5%9D%9A;https://nanduan.github.io/", "dblp": ";;;41/7845;;51/2527.html;l/ZichengLiu;119/3697;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;E64XWyMAAAAJ;vJWEw_8AAAAJ;cDcWXuIAAAAJ;bkALdvsAAAAJ;;Qaa6OxIAAAAJ", "orcid": ";;;;;;0000-0001-5894-7828;;", "linkedin": ";;xiaowei-hu/;zhe-gan-a2229a78/;;;;;", "or_profile": "~Jian_Liang5;~Chenfei_Wu2;~Xiaowei_Hu4;~Zhe_Gan1;~Jianfeng_Wang4;~Lijuan_Wang1;~Zicheng_Liu1;~Yuejian_Fang1;~Nan_Duan1", "aff": "Peking University;Microsoft;Microsoft;Microsoft;Microsoft;Microsoft;Microsoft;Peking University;Microsoft Research Asia", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn;microsoft.com", "position": "MS student;Researcher;SDE;Principal Researcher;Principal Researcher;Principal Researcher;partner research manager;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nliang2022nuwainfinity,\ntitle={{NUWA}-Infinity: Autoregressive over Autoregressive Generation for Infinite Visual Synthesis},\nauthor={Jian Liang and Chenfei Wu and Xiaowei Hu and Zhe Gan and Jianfeng Wang and Lijuan Wang and Zicheng Liu and Yuejian Fang and Nan Duan},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0Kv7cLhuhQT}\n}", "github": "", "project": "", "reviewers": "j1NF;o5hC;q8JV;msGb", "pdf_size": 4976159, "rating": "5;5;6;7", "confidence": "5;4;4;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "contribution": "2;2;3;3", "wc_summary": "27;75;80;33", "wc_strengths_and_weaknesses": "523;200;212;83", "wc_questions": "2;83;3;21", "wc_limitations": "25;9;23;23", "wc_review": "577;367;318;160", "wc_reply_reviewers": "256;0;29;16", "wc_reply_authors": "715;556;529;367", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 53.75, 23.909987452945266 ], "wc_strengths_and_weaknesses_avg": [ 254.5, 163.00383431073024 ], "wc_questions_avg": [ 27.25, 33.063386093986196 ], "wc_limitations_avg": [ 20.0, 6.4031242374328485 ], "wc_review_avg": [ 355.5, 149.01426106249025 ], "wc_reply_reviewers_avg": [ 75.25, 104.8603237645202 ], "wc_reply_authors_avg": [ 541.75, 123.40862003928251 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13240374514444074345&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;0;1", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "Peking U;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;1;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Efficiently Factorizing Boolean Matrices using Proximal Gradient Descent", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52887", "id": "0OGMrvHnQbb", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1e8730e2ccd6cefcf70a98dd90d9af6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0OGMrvHnQbb", "openreview": "https://openreview.net/forum?id=0OGMrvHnQbb", "poster": "/media/PosterPDFs/NeurIPS%202022/52887.png?t=1669729569.9006352", "slides": "https://nips.cc/virtual/2022/poster/52887", "video": "https://nips.cc/virtual/2022/poster/52887", "author_site": "Sebastian Dalleiger, Jilles Vreeken", "tldr": "We propose a novel elastic-net based regularizer that permits efficient Boolean matrix factorization using proximal gradient descent.", "abstract": "Addressing the interpretability problem of NMF on Boolean data, Boolean Matrix Factorization (BMF) uses Boolean algebra to decompose the input into low-rank Boolean factor matrices. These matrices are highly interpretable and very useful in practice, but they come at the high computational cost of solving an NP-hard combinatorial optimization problem. To reduce the computational burden, we propose to relax BMF continuously using a novel elastic-binary regularizer, from which we derive a proximal gradient algorithm. Through an extensive set of experiments, we demonstrate that our method works well in practice: On synthetic data, we show that it converges quickly, recovers the ground truth precisely, and estimates the simulated rank exactly. On real-world data, we improve upon the state of the art in recall, loss, and runtime, and a case study from the medical domain confirms that our results are easily interpretable and semantically meaningful.", "keywords": "Boolean Matrix Factorization;Non-negative Matrix Factorization;Proximal Point;Elastic Net;Model Selection", "primary_area": "", "supplementary_material": "/attachment/c44db9e495838432fc09cb44bbea86fcdf669f93.pdf", "author": "Sebastian Dalleiger;Jilles Vreeken", "authorids": "~Sebastian_Dalleiger1;~Jilles_Vreeken2", "gender": ";M", "homepage": ";https://vreeken.eu", "dblp": "266/4791;94/6462", "google_scholar": "Wi97BMwAAAAJ;p5HEQfIAAAAJ", "orcid": "0000-0003-1915-1709;0000-0002-2310-2806", "linkedin": "sebastian-dalleiger-3962b024a/;jilles-vreeken-b3b05b58/", "or_profile": "~Sebastian_Dalleiger1;~Jilles_Vreeken2", "aff": "Saarland Informatics Campus, Max-Planck Institute;Max-Planck Institute for Informatics", "aff_domain": "mpi-inf.mpg.de;mpi-inf.mpg.de", "position": "PhD student;Senior Researcher", "bibtex": "@inproceedings{\ndalleiger2022efficiently,\ntitle={Efficiently Factorizing Boolean Matrices using Proximal Gradient Descent},\nauthor={Sebastian Dalleiger and Jilles Vreeken},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0OGMrvHnQbb}\n}", "github": "", "project": "", "reviewers": "t3GQ;fNGy;Bmfn", "pdf_size": 3472615, "rating": "4;6;7", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "contribution": "2;3;3", "wc_summary": "38;67;91", "wc_strengths_and_weaknesses": "249;59;307", "wc_questions": "19;111;138", "wc_limitations": "1;2;76", "wc_review": "307;239;612", "wc_reply_reviewers": "378;0;222", "wc_reply_authors": "2520;678;1910", "reply_reviewers": "4;0;1", "reply_authors": "7;1;4", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 65.33333333333333, 21.66923061752668 ], "wc_strengths_and_weaknesses_avg": [ 205.0, 105.91820743699671 ], "wc_questions_avg": [ 89.33333333333333, 50.94005191289943 ], "wc_limitations_avg": [ 26.333333333333332, 35.122009560324926 ], "wc_review_avg": [ 386.0, 162.19946567935008 ], "wc_reply_reviewers_avg": [ 200.0, 155.09996776273036 ], "wc_reply_authors_avg": [ 1702.6666666666667, 766.1511310149947 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4385965086314153791&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "email": "mpi-inf.mpg.de;mpi-inf.mpg.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Max-Planck Institute;Max-Planck Institute for Informatics", "aff_unique_dep": "Informatics;", "aff_unique_url": "https://www.mpi-sws.org;https://mpi-inf.mpg.de", "aff_unique_abbr": "MPI-SWS;MPII", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Generalised Mutual Information for Discriminative Clustering", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53101", "id": "0Oy3PiA-aDp", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/16294049ed8de15830ac0b569b97f74a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Oy3PiA-aDp", "openreview": "https://openreview.net/forum?id=0Oy3PiA-aDp", "poster": "/media/PosterPDFs/NeurIPS%202022/dc1d3cb9517bda57aacd65f5b1986c6e.png?t=1666118393.2748485", "slides": "https://nips.cc/virtual/2022/poster/53101", "video": "https://nips.cc/virtual/2022/poster/53101", "author_site": "Louis Ohl, Pierre-Alexandre Mattei, Charles Bouveyron, Warith HARCHAOUI, Micka\u00ebl Leclercq, Arnaud Droit, Frederic Precioso", "tldr": "We replace the Kullback-Leibler divergence inside the mutual information by other distances like the Wasserstein metric and improve thus clustering performances of deep models.", "abstract": "In the last decade, recent successes in deep clustering majorly involved the mutual information (MI) as an unsupervised objective for training neural networks with increasing regularisations. While the quality of the regularisations have been largely discussed for improvements, little attention has been dedicated to the relevance of MI as a clustering objective. In this paper, we first highlight how the maximisation of MI does not lead to satisfying clusters. We identified the Kullback-Leibler divergence as the main reason of this behaviour. Hence, we generalise the mutual information by changing its core distance, introducing the generalised mutual information (GEMINI): a set of metrics for unsupervised neural network training. Unlike MI, some GEMINIs do not require regularisations when training. Some of these metrics are geometry-aware thanks to distances or kernels in the data space. Finally, we highlight that GEMINIs can automatically select a relevant number of clusters, a property that has been little studied in deep clustering context where the number of clusters is a priori unknown.", "keywords": "Unsupervised learning;Clustering;Deep learning;Information Theory", "primary_area": "", "supplementary_material": "/attachment/70acfbac60a30b44286bce16aa57b8c1a614d391.pdf", "author": "Louis Ohl;Pierre-Alexandre Mattei;Charles Bouveyron;Warith HARCHAOUI;Micka\u00ebl Leclercq;Arnaud Droit;Frederic Precioso", "authorids": "~Louis_Ohl1;~Pierre-Alexandre_Mattei3;~Charles_Bouveyron2;~Warith_HARCHAOUI1;mickael.leclercq@crchudequebec.ulaval.ca;arnaud.droit@crchudequebec.ulaval.ca;~Frederic_Precioso1", "gender": "M;M;M;M;;;M", "homepage": "https://oshillou.github.io/;http://pamattei.github.io;http://math.unice.fr/~cbouveyr/;http://www.harchaoui.org/warith;;;https://www.i3s.unice.fr/~precioso/", "dblp": "331/3476;177/7275;;;;;83/1407.html", "google_scholar": "s2uxUGwAAAAJ;https://scholar.google.fr/citations?user=Tqa_-D0AAAAJ;;;;;-0cKTucAAAAJ", "orcid": ";;;;;;0000-0001-8712-1443", "linkedin": ";;;warith-harchaoui;;;fr%C3%A9d%C3%A9ric-precioso-3a37389/", "or_profile": "~Louis_Ohl1;~Pierre-Alexandre_Mattei3;~Charles_Bouveyron2;~Warith_HARCHAOUI1;mickael.leclercq@crchudequebec.ulaval.ca;arnaud.droit@crchudequebec.ulaval.ca;~Frederic_Precioso1", "aff": "Universit\u00e9 Laval;INRIA;Universit\u00e9 C\u00f4te d'Azur;Jellysmack;;;Universit\u00e9 de Nice-Sophia Antipolis", "aff_domain": "ulaval.ca;inria.fr;univ-cotedazur.fr;jellysmack.com;;;unice.fr", "position": "PhD student;Research scientist;Full Professor;Researcher;;;Full Professor", "bibtex": "@inproceedings{\nohl2022generalised,\ntitle={Generalised Mutual Information for Discriminative Clustering},\nauthor={Louis Ohl and Pierre-Alexandre Mattei and Charles Bouveyron and Warith Harchaoui and Micka{\\\"e}l Leclercq and Arnaud Droit and Frederic Precioso},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0Oy3PiA-aDp}\n}", "github": "", "project": "", "reviewers": "XXcZ;gtjD;usNx", "pdf_size": 2519487, "rating": "4;6;7", "confidence": "4;3;3", "soundness": "2;3;4", "novelty": "1;3;4", "presentation": "3;2;3", "contribution": "1;3;4", "wc_summary": "50;40;66", "wc_strengths_and_weaknesses": "216;53;246", "wc_questions": "42;20;26", "wc_limitations": "1;41;3", "wc_review": "309;154;341", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "353;205;437", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 52.0, 10.708252269472673 ], "wc_strengths_and_weaknesses_avg": [ 171.66666666666666, 84.79910900999425 ], "wc_questions_avg": [ 29.333333333333332, 9.285592184789413 ], "wc_limitations_avg": [ 15.0, 18.40289832245635 ], "wc_review_avg": [ 268.0, 81.66190462306562 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 331.6666666666667, 95.9073627112237 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9449111825230683, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17126945082306251507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ulaval.ca;inria.fr;univ-cotedazur.fr;jellysmack.com;;;unice.fr", "author_num": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Universit\u00e9 Laval;INRIA;Universit\u00e9 C\u00f4te d'Azur;Jellysmack;Universit\u00e9 de Nice-Sophia Antipolis", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ulaval.ca;https://www.inria.fr;https://www.univ-cotedazur.fr;https://www.jellysmack.com;https://www.unice.fr", "aff_unique_abbr": "ULaval;INRIA;UCA;;UNICA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Sophia Antipolis", "aff_country_unique_index": "0;1;1;2;1", "aff_country_unique": "Canada;France;United States" }, { "title": "Self-supervised surround-view depth estimation with volumetric feature fusion", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54283", "id": "0PfIQs-ttQQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/19a0a55fcb8fc0c31db093941fccd707-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0PfIQs-ttQQ", "openreview": "https://openreview.net/forum?id=0PfIQs-ttQQ", "poster": "/media/PosterPDFs/NeurIPS%202022/54283.png?t=1669214628.8946726", "slides": "https://nips.cc/virtual/2022/poster/54283", "video": "https://nips.cc/virtual/2022/poster/54283", "author_site": "Jung-Hee Kim, Junhwa Hur, Tien Phuoc Nguyen, Seong-Gyun Jeong", "tldr": "We introduce a volumetric feature representation for self-supervised surround-view depth approach, which not only outputs metric-scale depth and canonical camera motion, but also synthesizes a depth map at a novel view.", "abstract": "We present a self-supervised depth estimation approach using a unified volumetric feature fusion for surround-view images. Given a set of surround-view images, our method constructs a volumetric feature map by extracting image feature maps from surround-view images and fuse the feature maps into a shared, unified 3D voxel space. The volumetric feature map then can be used for estimating a depth map at each surround view by projecting it into an image coordinate. A volumetric feature contains 3D information at its local voxel coordinate; thus our method can also synthesize a depth map at arbitrary rotated viewpoints by projecting the volumetric feature map into the target viewpoints. Furthermore, assuming static camera extrinsics in the multi-camera system, we propose to estimate a canonical camera motion from the volumetric feature map. Our method leverages 3D spatio- temporal context to learn metric-scale depth and the canonical camera motion in a self-supervised manner. Our method outperforms the prior arts on DDAD and nuScenes datasets, especially estimating more accurate metric-scale depth and consistent depth between neighboring views.", "keywords": "Surround-view depth estimation;Monocular depth;Self-supervised learning;Depth synthesis", "primary_area": "", "supplementary_material": "/attachment/9fe5727b8bcdfab2b24678477d969cb66b0cc51d.pdf", "author": "Jung Hee Kim;Junhwa Hur;Tien Phuoc Nguyen;Seong-Gyun Jeong", "authorids": "~Jung_Hee_Kim2;~Junhwa_Hur1;~Tien_Phuoc_Nguyen1;~Seong-Gyun_Jeong3", "gender": "M;M;M;M", "homepage": "https://jungheekim29.github.io/;https://hurjunhwa.github.io/;;", "dblp": "187/8787;135/9099;;08/10699", "google_scholar": "PodbNikAAAAJ;z4dNJdkAAAAJ;HYip0TwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "jung-hee-kim-870b15144/;;tiennp51;", "or_profile": "~Jung_Hee_Kim2;~Junhwa_Hur1;~Tien_Phuoc_Nguyen1;~Seong-Gyun_Jeong3", "aff": "42dot;TU Darmstadt;;42dot.ai", "aff_domain": "42dot.ai;tu-darmstadt.de;;42dot.ai", "position": "Researcher;PhD student;;Principal Researcher", "bibtex": "@inproceedings{\nkim2022selfsupervised,\ntitle={Self-supervised surround-view depth estimation with volumetric feature fusion},\nauthor={Jung Hee Kim and Junhwa Hur and Tien Phuoc Nguyen and Seong-Gyun Jeong},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0PfIQs-ttQQ}\n}", "github": "", "project": "", "reviewers": "ZBww;9Ef1;K314", "pdf_size": 24140647, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "3;3;2", "novelty": "3;2;2", "presentation": "3;3;2", "contribution": "3;2;2", "wc_summary": "84;51;66", "wc_strengths_and_weaknesses": "219;59;299", "wc_questions": "21;61;42", "wc_limitations": "14;1;2", "wc_review": "338;172;409", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "506;484;694", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 67.0, 13.490737563232042 ], "wc_strengths_and_weaknesses_avg": [ 192.33333333333334, 99.77753031397177 ], "wc_questions_avg": [ 41.333333333333336, 16.33673433979046 ], "wc_limitations_avg": [ 5.666666666666667, 5.9066817155564495 ], "wc_review_avg": [ 306.3333333333333, 99.31207826286231 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 561.3333333333334, 94.2384682010955 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13101461649075956126&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "42dot.ai;tu-darmstadt.de;;42dot.ai", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "42dot;Technische Universit\u00e4t Darmstadt;42dot.ai", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.tu-darmstadt.de;https://42dot.ai", "aff_unique_abbr": ";TU Darmstadt;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "1;2", "aff_country_unique": ";Germany;United States" }, { "title": "A Stochastic Linearized Augmented Lagrangian Method for Decentralized Bilevel Optimization", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53495", "id": "0RMDK39mGg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c5cf13bfd3762821ef7607e63ee90075-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0RMDK39mGg", "openreview": "https://openreview.net/forum?id=0RMDK39mGg", "poster": "/media/PosterPDFs/NeurIPS%202022/3799b2e805a7fa8b076fc020574a73b2.png?t=1667287118.1130059", "slides": "https://nips.cc/virtual/2022/poster/53495", "video": "https://nips.cc/virtual/2022/poster/53495", "author_site": "Songtao Lu, Siliang Zeng, Xiaodong Cui, Mark Squillante, Lior Horesh, Brian Kingsbury, Jia Liu, Mingyi Hong", "tldr": "This work develops a stochastic linearized augmented Lagrangian method (SLAM) for solving general nonconvex bilevel optimization problems over a graph, where both upper and lower optimization variables are able to achieve a consensus.", "abstract": "Bilevel optimization has been shown to be a powerful framework for formulating multi-task machine learning problems, e.g., reinforcement learning (RL) and meta-learning, where the decision variables are coupled in both levels of the minimization problems. In practice, the learning tasks would be located at different computing resource environments, and thus there is a need for deploying a decentralized training framework to implement multi-agent and multi-task learning. We develop a stochastic linearized augmented Lagrangian method (SLAM) for solving general nonconvex bilevel optimization problems over a graph, where both upper and lower optimization variables are able to achieve a consensus. We also establish that the theoretical convergence rate of the proposed SLAM to the Karush-Kuhn-Tucker (KKT) points of this class of problems is on the same order as the one achieved by the classical distributed stochastic gradient descent for only single-level nonconvex minimization problems. Numerical results tested on multi-agent RL problems showcase the superiority of SLAM compared with the benchmarks.", "keywords": "Decentralized bilevel optimization;stochastic linearized augmented Lagrangian method (SLAM);multi-agent actor-critic algorithm", "primary_area": "", "supplementary_material": "/attachment/5ba57d5c85490652055567333ccc1b74b157afb2.pdf", "author": "Songtao Lu;Siliang Zeng;Xiaodong Cui;Mark S. Squillante;Lior Horesh;Brian Kingsbury;Jia Liu;Mingyi Hong", "authorids": "~Songtao_Lu1;~Siliang_Zeng1;~Xiaodong_Cui1;~Mark_S._Squillante1;~Lior_Horesh1;~Brian_Kingsbury1;~Jia_Liu1;~Mingyi_Hong1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://songtaogithub.github.io/;https://siliangzeng.github.io/index.html;http://researcher.watson.ibm.com/researcher/view.php?person=us-cuix;https://researcher.watson.ibm.com/researcher/view.php?person=us-mss;;https://researcher.watson.ibm.com/researcher/view.php?person=us-bedk;https://kevinliu-osu.github.io/index.html;http://people.ece.umn.edu/~mhong/mingyi.html", "dblp": "05/2887;38/9;;67/3865;14/10384;98/4359;;57/8053", "google_scholar": "LRsjX7kAAAAJ;IfqsDyYAAAAJ;wzNVJQsAAAAJ;;qbqwCbEAAAAJ;iJENOG8AAAAJ;Ofx3dScAAAAJ;qRnP-p0AAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;lior-horesh-7365a46/;brianedkingsbury/;;", "or_profile": "~Songtao_Lu1;~Siliang_Zeng1;~Xiaodong_Cui1;~Mark_S._Squillante1;~Lior_Horesh1;~Brian_Kingsbury1;~Jia_Liu1;~Mingyi_Hong1", "aff": "IBM Thomas J. Watson Research Center;University of Minnesota, Twin Cities;IBM T. J. Watson Research Center;IBM Research;International Business Machines;IBM;The Ohio State University;University of Minnesota, Minneapolis", "aff_domain": "ibm.com;umn.edu;us.ibm.com;us.ibm.com;ibm.com;us.ibm.com;osu.edu;umn.edu", "position": "Research Scientist;PhD student;Principal Research Staff Member;Distinguished Research Staff Member;Senior Manager;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlu2022a,\ntitle={A Stochastic Linearized Augmented Lagrangian Method for Decentralized Bilevel Optimization},\nauthor={Songtao Lu and Siliang Zeng and Xiaodong Cui and Mark S. Squillante and Lior Horesh and Brian Kingsbury and Jia Liu and Mingyi Hong},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0RMDK39mGg}\n}", "github": "", "project": "", "reviewers": "a2eA;6ivX;wDpj;r52d", "pdf_size": 538421, "rating": "5;6;7;8", "confidence": "3;3;2;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;3;4", "contribution": "2;3;3;4", "wc_summary": "141;31;50;70", "wc_strengths_and_weaknesses": "185;108;100;140", "wc_questions": "8;252;91;2", "wc_limitations": "1;24;74;15", "wc_review": "335;415;315;227", "wc_reply_reviewers": "0;0;80;0", "wc_reply_authors": "632;918;1199;587", "reply_reviewers": "0;0;2;0", "reply_authors": "1;2;4;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.0, 41.611296543126365 ], "wc_strengths_and_weaknesses_avg": [ 133.25, 33.41687447981932 ], "wc_questions_avg": [ 88.25, 100.87213440787302 ], "wc_limitations_avg": [ 28.5, 27.518175811634027 ], "wc_review_avg": [ 323.0, 66.87301398920195 ], "wc_reply_reviewers_avg": [ 20.0, 34.64101615137755 ], "wc_reply_authors_avg": [ 834.0, 246.0152434301582 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8386758920821437648&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ibm.com;umn.edu;us.ibm.com;us.ibm.com;ibm.com;us.ibm.com;osu.edu;umn.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;2;2;3;1", "aff_unique_norm": "IBM;University of Minnesota;International Business Machines Corporation;Ohio State University", "aff_unique_dep": "Research;;;", "aff_unique_url": "https://www.ibm.com/research;https://www.minnesota.edu;https://www.ibm.com;https://www.osu.edu", "aff_unique_abbr": "IBM;UMN;IBM;OSU", "aff_campus_unique_index": "0;1;2;4", "aff_campus_unique": "Yorktown Heights;Twin Cities;T. J. Watson;;Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Flexible Diffusion Modeling of Long Videos", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53513", "id": "0RTJcuvHtIu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b2fe1ee8d936ac08dd26f2ff58986c8f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0RTJcuvHtIu", "openreview": "https://openreview.net/forum?id=0RTJcuvHtIu", "poster": "/media/PosterPDFs/NeurIPS%202022/53513.png?t=1670527836.282049", "slides": "https://nips.cc/virtual/2022/poster/53513", "video": "https://nips.cc/virtual/2022/poster/53513", "author_site": "William Harvey, Saeid Naderiparizi, Vaden Masrani, Christian Weilbach, Frank Wood", "tldr": "We apply diffusion generative models to video. To model long videos with bounded computational resources, we present an architecture which can flexibly generate, condition on, or ignore any subsets of video frames.", "abstract": "We present a framework for video modeling based on denoising diffusion probabilistic models that produces long-duration video completions in a variety of realistic environments. We introduce a generative model that can at test-time sample any arbitrary subset of video frames conditioned on any other subset and present an architecture adapted for this purpose. Doing so allows us to efficiently compare and optimize a variety of schedules for the order in which frames in a long video are sampled and use selective sparse and long-range conditioning on previously sampled frames. We demonstrate improved video modeling over prior work on a number of datasets and sample temporally coherent videos over 25 minutes in length. We additionally release a new video modeling dataset and semantically meaningful metrics based on videos generated in the CARLA autonomous driving simulator.", "keywords": "generative modeling;denoising diffusion probabilistic model;video modeling", "primary_area": "", "supplementary_material": "/attachment/33f42547e1ae57c936431cb45fcbe1e5aca38b59.pdf", "author": "William Harvey;Saeid Naderiparizi;Vaden Masrani;Christian Dietrich Weilbach;Frank Wood", "authorids": "~William_Harvey1;~Saeid_Naderiparizi1;~Vaden_Masrani1;~Christian_Dietrich_Weilbach1;~Frank_Wood2", "gender": "M;M;;M;M", "homepage": "https://www.cs.ubc.ca/~wsgh/;https://www.cs.ubc.ca/~saeidnp/;https://vmasrani.github.io/;https://whilo.github.io/;http://www.robots.ox.ac.uk/~fwood/", "dblp": "26/8210-2;244/9611;199/5404;;44/4750", "google_scholar": "https://scholar.google.co.uk/citations?user=kDd7nBkAAAAJ;Ubt0dYYAAAAJ;https://scholar.google.ca/citations?user=3m_6zUEAAAAJ;;d4yNzXIAAAAJ", "orcid": ";;;;", "linkedin": ";saeidnp;vaden-masrani;;frank-wood-43529114?trk=hp-identity-name", "or_profile": "~William_Harvey1;~Saeid_Naderiparizi1;~Vaden_Masrani1;~Christian_Dietrich_Weilbach1;~Frank_Wood2", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;Department of Computer Science, University of British Columbia;University of British Columbia", "aff_domain": "cs.ubc.ca;ubc.ca;ubc.ca;cs.ubc.ca;ubc.ca", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nharvey2022flexible,\ntitle={Flexible Diffusion Modeling of Long Videos},\nauthor={William Harvey and Saeid Naderiparizi and Vaden Masrani and Christian Dietrich Weilbach and Frank Wood},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0RTJcuvHtIu}\n}", "github": "", "project": "", "reviewers": "twbY;sEEi;wWqf", "pdf_size": 3543722, "rating": "4;4;7", "confidence": "5;4;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;2;4", "contribution": "2;2;3", "wc_summary": "106;57;82", "wc_strengths_and_weaknesses": "223;323;167", "wc_questions": "16;56;188", "wc_limitations": "1;1;75", "wc_review": "346;437;512", "wc_reply_reviewers": "0;0;129", "wc_reply_authors": "1339;1575;746", "reply_reviewers": "0;0;1", "reply_authors": "2;3;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 81.66666666666667, 20.00555478416488 ], "wc_strengths_and_weaknesses_avg": [ 237.66666666666666, 64.52561937366859 ], "wc_questions_avg": [ 86.66666666666667, 73.49074015744357 ], "wc_limitations_avg": [ 25.666666666666668, 34.883934538536344 ], "wc_review_avg": [ 431.6666666666667, 67.87406639423403 ], "wc_reply_reviewers_avg": [ 43.0, 60.81118318204309 ], "wc_reply_authors_avg": [ 1220.0, 348.741547089914 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 310, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14027817982126481605&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.ubc.ca;ubc.ca;ubc.ca;cs.ubc.ca;ubc.ca", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Vancouver", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Mirror Descent Maximizes Generalized Margin and Can Be Implemented Efficiently", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54441", "id": "0SVOleKNRAU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c9694bf4f9bf3626f7d21158bab74f8e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0SVOleKNRAU", "openreview": "https://openreview.net/forum?id=0SVOleKNRAU", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54441", "video": "https://nips.cc/virtual/2022/poster/54441", "author_site": "Haoyuan Sun, Kwangjun Ahn, Christos Thrampoulidis, Navid Azizan", "tldr": "", "abstract": "Driven by the empirical success and wide use of deep neural networks, understanding the generalization performance of overparameterized models has become an increasingly popular question. To this end, there has been substantial effort to characterize the implicit bias of the optimization algorithms used, such as gradient descent (GD), and the structural properties of their preferred solutions. This paper answers an open question in this literature: For the classification setting, what solution does mirror descent (MD) converge to? Specifically, motivated by its efficient implementation, we consider the family of mirror descent algorithms with potential function chosen as the $p$-th power of the $\\ell_p$-norm, which is an important generalization of GD. We call this algorithm $p$-$\\textsf{GD}$. For this family, we characterize the solutions it obtains and show that it converges in direction to a generalized maximum-margin solution with respect to the $\\ell_p$-norm for linearly separable classification. While the MD update rule is in general expensive to compute and not suitable for deep learning, $p$-$\\textsf{GD}$ is fully parallelizable in the same manner as SGD and can be used to train deep neural networks with virtually no additional computational overhead. Using comprehensive experiments with both linear and deep neural network models, we demonstrate that $p$-$\\textsf{GD}$ can noticeably affect the structure and the generalization performance of the learned models.", "keywords": "mirror descent;gradient descent;overparameterization;implicit regularization", "primary_area": "", "supplementary_material": "/attachment/04f3767759ada1b9f652956cdfc2e4aa15b745d2.zip", "author": "Haoyuan Sun;Kwangjun Ahn;Christos Thrampoulidis;Navid Azizan", "authorids": "~Haoyuan_Sun1;~Kwangjun_Ahn2;~Christos_Thrampoulidis1;~Navid_Azizan1", "gender": ";;;", "homepage": ";http://kjahn.mit.edu/;https://sites.google.com/view/cthrampo/home;", "dblp": ";;127/6532;", "google_scholar": ";z94iNtgAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Haoyuan_Sun1;~Kwangjun_Ahn2;~Christos_Thrampoulidis1;~Navid_Azizan1", "aff": ";Massachusetts Institute of Technology;University of British Columbia;", "aff_domain": ";mit.edu;ubc.ca;", "position": ";PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nsun2022mirror,\ntitle={Mirror Descent Maximizes Generalized Margin and Can Be Implemented Efficiently},\nauthor={Haoyuan Sun and Kwangjun Ahn and Christos Thrampoulidis and Navid Azizan},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0SVOleKNRAU}\n}", "github": "", "project": "", "reviewers": "UoZK;FypC;LBAU;ZhPm", "pdf_size": 248715, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;4;3", "contribution": "2;3;2;3", "wc_summary": "47;59;129;103", "wc_strengths_and_weaknesses": "135;164;113;262", "wc_questions": "66;82;56;336", "wc_limitations": "1;5;1;46", "wc_review": "249;310;299;747", "wc_reply_reviewers": "48;298;40;69", "wc_reply_authors": "435;504;311;581", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.5, 33.087006513131406 ], "wc_strengths_and_weaknesses_avg": [ 168.5, 56.931976954959154 ], "wc_questions_avg": [ 135.0, 116.41735265844177 ], "wc_limitations_avg": [ 13.25, 18.978606376654742 ], "wc_review_avg": [ 401.25, 200.93826788344722 ], "wc_reply_reviewers_avg": [ 113.75, 106.90270108841965 ], "wc_reply_authors_avg": [ 457.75, 99.22543776673399 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11549089650625113974&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";mit.edu;ubc.ca;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.ubc.ca", "aff_unique_abbr": "MIT;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "title": "Decomposed Knowledge Distillation for Class-Incremental Semantic Segmentation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53823", "id": "0SgKq4ZC9r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/439bf902de1807088d8b731ca20b0777-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0SgKq4ZC9r", "openreview": "https://openreview.net/forum?id=0SgKq4ZC9r", "poster": "/media/PosterPDFs/NeurIPS%202022/53823.png?t=1668389407.1673737", "slides": "https://nips.cc/virtual/2022/poster/53823", "video": "https://nips.cc/virtual/2022/poster/53823", "author_site": "Donghyeon Baek, Youngmin Oh, Sanghoon Lee, Junghyup Lee, Bumsub Ham", "tldr": "We present a simple yet effective framework that achieves a good trade-off between plasticity and rigidity for class-incremental semantic segmentation.", "abstract": "Class-incremental semantic segmentation (CISS) labels each pixel of an image with a corresponding object/stuff class continually. To this end, it is crucial to learn novel classes incrementally without forgetting previously learned knowledge. Current CISS methods typically use a knowledge distillation (KD) technique for preserving classifier logits, or freeze a feature extractor, to avoid the forgetting problem. The strong constraints, however, prevent learning discriminative features for novel classes. We introduce a CISS framework that alleviates the forgetting problem and facilitates learning novel classes effectively. We have found that a logit can be decomposed into two terms. They quantify how likely an input belongs to a particular class or not, providing a clue for a reasoning process of a model. The KD technique, in this context, preserves the sum of two terms ($\\textit{i.e.}$, a class logit), suggesting that each could be changed and thus the KD does not imitate the reasoning process. To impose constraints on each term explicitly, we propose a new decomposed knowledge distillation (DKD) technique, improving the rigidity of a model and addressing the forgetting problem more effectively. We also introduce a novel initialization method to train new classifiers for novel classes. In CISS, the number of negative training samples for novel classes is not sufficient to discriminate old classes. To mitigate this, we propose to transfer knowledge of negatives to the classifiers successively using an auxiliary classifier, boosting the performance significantly. Experimental results on standard CISS benchmarks demonstrate the effectiveness of our framework.", "keywords": "class-incremental learning;continual learning;incremental learning;semantic segmentation", "primary_area": "", "supplementary_material": "/attachment/68f85852cb31f3c87615731849580206c8ecc42f.pdf", "author": "Donghyeon Baek;Youngmin Oh;Sanghoon Lee;Junghyup Lee;Bumsub Ham", "authorids": "~Donghyeon_Baek1;~Youngmin_Oh1;~Sanghoon_Lee3;~Junghyup_Lee1;~Bumsub_Ham2", "gender": ";;M;M;M", "homepage": "https://dh-baek.github.io/;https://50min.github.io/;https://sanghoooon.github.io/;;https://cvlab.yonsei.ac.kr/", "dblp": "299/7546;;58/6214;06/4378;03/8108", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?user=Ends2WoAAAAJ;f3pG54AAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0006-5568-2127;;;", "linkedin": ";;;;", "or_profile": "~Donghyeon_Baek1;~Youngmin_Oh1;~Sanghoon_Lee3;~Junghyup_Lee1;~Bumsub_Ham2", "aff": "Yonsei University;Yonsei University;Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbaek2022decomposed,\ntitle={Decomposed Knowledge Distillation for Class-Incremental Semantic Segmentation},\nauthor={Donghyeon Baek and Youngmin Oh and Sanghoon Lee and Junghyup Lee and Bumsub Ham},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0SgKq4ZC9r}\n}", "github": "", "project": "", "reviewers": "vbQC;X4eN;HGuy", "pdf_size": 7035962, "rating": "4;6;6", "confidence": "4;5;4", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "3;4;3", "contribution": "2;3;3", "wc_summary": "44;60;65", "wc_strengths_and_weaknesses": "89;405;186", "wc_questions": "222;29;22", "wc_limitations": "30;107;2", "wc_review": "385;601;275", "wc_reply_reviewers": "0;51;0", "wc_reply_authors": "1355;1565;593", "reply_reviewers": "0;1;0", "reply_authors": "3;4;1", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.333333333333336, 8.956685895029603 ], "wc_strengths_and_weaknesses_avg": [ 226.66666666666666, 132.1724462796825 ], "wc_questions_avg": [ 91.0, 92.67505957196178 ], "wc_limitations_avg": [ 46.333333333333336, 44.39469437769438 ], "wc_review_avg": [ 420.3333333333333, 135.41376919977114 ], "wc_reply_reviewers_avg": [ 17.0, 24.041630560342615 ], "wc_reply_authors_avg": [ 1171.0, 417.60268198372484 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2264152168013266961&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "LASSIE: Learning Articulated Shapes from Sparse Image Ensemble via 3D Part Discovery", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55171", "id": "0TDki1mlcwz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6274d57365d7a6be06e58cad30d1b9da-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0TDki1mlcwz", "openreview": "https://openreview.net/forum?id=0TDki1mlcwz", "poster": "/media/PosterPDFs/NeurIPS%202022/894b77f805bd94d292574c38c5d628d5.png?t=1667671852.9158401", "slides": "https://nips.cc/virtual/2022/poster/55171", "video": "https://nips.cc/virtual/2022/poster/55171", "author_site": "Chun-Han Yao, Wei-Chih Hung, Yuanzhen Li, Michael Rubinstein, Ming-Hsuan Yang, Varun Jampani", "tldr": "We learn to reconstruct high-quality articulated shapes from sparse image collections by discovering 3D neural parts without any shape template or keypoint annotations.", "abstract": "Creating high-quality articulated 3D models of animals is challenging either via manual creation or using 3D scanning tools. \nTherefore, techniques to reconstruct articulated 3D objects from 2D images are crucial and highly useful. In this work, we propose a practical problem setting to estimate 3D pose and shape of animals given only a few (10-30) in-the-wild images of a particular animal species (say, horse). Contrary to existing works that rely on pre-defined template shapes, we do not assume any form of 2D or 3D ground-truth annotations, nor do we leverage any multi-view or temporal information. Moreover, each input image ensemble can contain animal instances with varying poses, backgrounds, illuminations, and textures. Our key insight is that 3D parts have much simpler shape compared to the overall animal and that they are robust w.r.t. animal pose articulations. Following these insights, we propose LASSIE, a novel optimization framework which discovers 3D parts in a self-supervised manner with minimal user intervention. A key driving force behind LASSIE is the enforcing of 2D-3D part consistency using self-supervisory deep features. Experiments on Pascal-Part and self-collected in-the-wild animal datasets demonstrate considerably better 3D reconstructions as well as both 2D and 3D part discovery compared to prior arts. Project page: https://chhankyao.github.io/lassie/", "keywords": "Articulated shape;sparse-view optimization;3D part discovery", "primary_area": "", "supplementary_material": "/attachment/3d82defedbbe29455ac3e5de630c6db3c7d7d6bb.pdf", "author": "Chun-Han Yao;Wei-Chih Hung;Yuanzhen Li;Michael Rubinstein;Ming-Hsuan Yang;Varun Jampani", "authorids": "~Chun-Han_Yao1;hungwayne@waymo.com;yzli@google.com;~Michael_Rubinstein1;~Ming-Hsuan_Yang1;~Varun_Jampani2", "gender": "M;;;M;M;", "homepage": "https://chhankyao.github.io/;;;http://people.csail.mit.edu/mrub/;https://faculty.ucmerced.edu/mhyang/;", "dblp": "184/9458;;;16/1356;79/3711.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;ttBdcmsAAAAJ;p9-ohHsAAAAJ;", "orcid": ";;;;0000-0003-4848-2304;", "linkedin": ";;;;minghsuanyang/;", "or_profile": "~Chun-Han_Yao1;hungwayne@waymo.com;yzli@google.com;~Michael_Rubinstein1;~Ming-Hsuan_Yang1;~Varun_Jampani2", "aff": "University of California at Merced;;;Google;University of California at Merced;", "aff_domain": "ucmerced.edu;;;google.com;umcerced.edu;", "position": "PhD student;;;Research Scientist;Professor;", "bibtex": "@inproceedings{\nyao2022lassie,\ntitle={{LASSIE}: Learning Articulated Shapes from Sparse Image Ensemble via 3D Part Discovery},\nauthor={Chun-Han Yao and Wei-Chih Hung and Yuanzhen Li and Michael Rubinstein and Ming-Hsuan Yang and Varun Jampani},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0TDki1mlcwz}\n}", "github": "", "project": "", "reviewers": "boNQ;mH51;LjEQ;rsf7", "pdf_size": 18276456, "rating": "4;7;7;7", "confidence": "5;4;4;4", "soundness": "2;4;3;4", "novelty": "4;3;3;4", "presentation": "3;3;4;4", "contribution": "4;3;3;4", "wc_summary": "126;202;120;123", "wc_strengths_and_weaknesses": "515;559;709;303", "wc_questions": "148;514;53;17", "wc_limitations": "198;44;21;12", "wc_review": "987;1319;903;455", "wc_reply_reviewers": "207;27;45;0", "wc_reply_authors": "1495;453;469;41", "reply_reviewers": "4;1;1;0", "reply_authors": "5;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 142.75, 34.273714417903406 ], "wc_strengths_and_weaknesses_avg": [ 521.5, 145.21277492011507 ], "wc_questions_avg": [ 183.0, 197.00380706981275 ], "wc_limitations_avg": [ 68.75, 75.52938170010397 ], "wc_review_avg": [ 916.0, 308.27747241730134 ], "wc_reply_reviewers_avg": [ 69.75, 80.84359900449756 ], "wc_reply_authors_avg": [ 614.5, 536.5246965424798 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12605388632728515039&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucmerced.edu;;;google.com;umcerced.edu;", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Merced;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucmerced.edu;https://www.google.com", "aff_unique_abbr": "UC Merced;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Merced;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Conditional Meta-Learning of Linear Representations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54225", "id": "0Uejkm1GB1U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/01ecd39ca49ddecc5729ca996304781b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Uejkm1GB1U", "openreview": "https://openreview.net/forum?id=0Uejkm1GB1U", "poster": "/media/PosterPDFs/NeurIPS%202022/54225.png?t=1669401024.346318", "slides": "https://nips.cc/virtual/2022/poster/54225", "video": "https://nips.cc/virtual/2022/poster/54225", "author_site": "Giulia Denevi, Massimiliano Pontil, Carlo Ciliberto", "tldr": "We propose a conditional Meta-Learning algorithm aiming at inferring linear representations for heterogeneous environments of tasks.", "abstract": "Standard meta-learning for representation learning aims to find a common representation to be shared across multiple tasks. The effectiveness of these methods is often limited when the nuances of the tasks\u2019 distribution cannot be captured by a single representation. In this work we overcome this issue by inferring a conditioning function, mapping the tasks\u2019 side information (such as the tasks\u2019 training dataset itself) into a representation tailored to the task at hand. We study environments in which our conditional strategy outperforms standard meta-learning, such as those in which tasks can be organized in separate clusters according to the representation they share. We then propose a meta-algorithm capable of leveraging this advantage in practice. In the unconditional setting, our method yields a new estimator enjoying faster learning rates and requiring less hyper-parameters to tune than current state-of-the-art methods. Our results are supported by preliminary experiments.", "keywords": "Conditional Meta-Learning;Linear Representation Learning;Statistical Learning Theory;Online Learning", "primary_area": "", "supplementary_material": "/attachment/01228c3f1ac28ccd3ec4cb99fa461dde0b5704d2.zip", "author": "Giulia Denevi;Massimiliano Pontil;Carlo Ciliberto", "authorids": "~Giulia_Denevi1;~Massimiliano_Pontil4;~Carlo_Ciliberto1", "gender": "F;Not Specified;M", "homepage": ";https://www.iit.it/web/computational-statistics-and-machine-learning;https://cciliber.github.io/", "dblp": "217/3518;;88/10332", "google_scholar": "ckVkVnIAAAAJ;lcOacs8AAAAJ;XUcUAisAAAAJ", "orcid": ";0000-0001-9415-098X;", "linkedin": ";;", "or_profile": "~Giulia_Denevi1;~Massimiliano_Pontil4;~Carlo_Ciliberto1", "aff": "Universit\u00e0 degli Studi di Genova;University College London, University of London;University College London", "aff_domain": "unige.it;ucl.ac.uk;ucl.ac.uk", "position": "Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ndenevi2022conditional,\ntitle={Conditional Meta-Learning of Linear Representations},\nauthor={Giulia Denevi and Massimiliano Pontil and Carlo Ciliberto},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0Uejkm1GB1U}\n}", "github": "", "project": "", "reviewers": "5r8e;oWcc;4JRE;1CMy", "pdf_size": 1330974, "rating": "4;5;7;7", "confidence": "3;3;2;2", "soundness": "4;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "contribution": "2;3;2;3", "wc_summary": "72;72;72;56", "wc_strengths_and_weaknesses": "91;78;68;207", "wc_questions": "108;62;20;46", "wc_limitations": "30;26;17;17", "wc_review": "301;238;177;326", "wc_reply_reviewers": "34;0;0;0", "wc_reply_authors": "559;256;246;577", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.0, 6.928203230275509 ], "wc_strengths_and_weaknesses_avg": [ 111.0, 56.0223169817172 ], "wc_questions_avg": [ 59.0, 32.01562118716424 ], "wc_limitations_avg": [ 22.5, 5.678908345800274 ], "wc_review_avg": [ 260.5, 57.89861829094024 ], "wc_reply_reviewers_avg": [ 8.5, 14.722431864335457 ], "wc_reply_authors_avg": [ 409.5, 158.66710434113304 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3312458911340608514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "unige.it;ucl.ac.uk;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Universit\u00e0 degli Studi di Genova;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.unige.it;https://www.ucl.ac.uk", "aff_unique_abbr": "UniGe;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Italy;United Kingdom" }, { "title": "Improving Transformer with an Admixture of Attention Heads", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54356", "id": "0VFQhPGF1M3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b2e4edd53059e24002a0c916d75cc9a3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0VFQhPGF1M3", "openreview": "https://openreview.net/forum?id=0VFQhPGF1M3", "poster": "/media/PosterPDFs/NeurIPS%202022/54356.png?t=1669590101.9681041", "slides": "https://nips.cc/virtual/2022/poster/54356", "video": "https://nips.cc/virtual/2022/poster/54356", "author_site": "Tan Nguyen, Tam Nguyen, Hai Do, Khai Nguyen, Vishwanath Saragadam, Minh Pham, Khuong Duy Nguyen, Nhat Ho, Stanley Osher", "tldr": "We propose the Transformer with a Finite Admixture of Shared Heads (FiSHformers), a novel class of efficient and flexible transformers that allow the sharing of attention matrices between attention heads via a finite admixture model.", "abstract": "Transformers with multi-head self-attention have achieved remarkable success in sequence modeling and beyond. However, they suffer from high computational and memory complexities for computing the attention matrix at each head. Recently, it has been shown that those attention matrices lie on a low-dimensional manifold and, thus, are redundant. We propose the Transformer with a Finite Admixture of Shared Heads (FiSHformers), a novel class of efficient and flexible transformers that allow the sharing of attention matrices between attention heads. At the core of FiSHformer is a novel finite admixture model of shared heads (FiSH) that samples attention matrices from a set of global attention matrices. The number of global attention matrices is much smaller than the number of local attention matrices generated. FiSHformers directly learn these global attention matrices rather than the local ones as in other transformers, thus significantly improving the computational and memory efficiency of the model. We empirically verify the advantages of the FiSHformer over the baseline transformers in a wide range of practical applications including language modeling, machine translation, and image classification. On the WikiText-103, IWSLT'14 De-En and WMT'14 En-De, FiSHformers use much fewer floating-point operations per second (FLOPs), memory, and parameters compared to the baseline transformers. ", "keywords": "transformer;admixture;attentions;redundant heads", "primary_area": "", "supplementary_material": "/attachment/b4793a97b80b87588335a183518b6437f4df191a.zip", "author": "Tan Minh Nguyen;Tam Minh Nguyen;Hai Ngoc Do;Khai Nguyen;Vishwanath Saragadam;Minh Pham;Nguyen Duy Khuong;Nhat Ho;Stanley Osher", "authorids": "~Tan_Minh_Nguyen1;~Tam_Minh_Nguyen1;~Hai_Ngoc_Do1;~Khai_Nguyen1;~Vishwanath_Saragadam1;~Minh_Pham1;~Nguyen_Duy_Khuong1;~Nhat_Ho1;~Stanley_Osher1", "gender": "M;F;M;M;M;M;M;M;M", "homepage": "https://tanmnguyen89.github.io/;;;https://khainb.com;https://vishwa91.github.io;;https://khuongnd.github.io/;https://nhatptnk8912.github.io/;https://www.math.ucla.edu/~sjo/", "dblp": "255/4725;251/1464;;120/4308;172/1229;34/3955;;203/4479;", "google_scholar": "OizOh88AAAAJ;;;im5fNaQAAAAJ;u-xGD2AAAAAJ;;vAOT46YAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ;", "orcid": ";;;;0000-0001-8028-7520;;;;", "linkedin": ";tam-nguyen-6a3935132/;felix-do/;;;;;nhat-pham-minh-ho-267b8164/;", "or_profile": "~Tan_Minh_Nguyen1;~Tam_Minh_Nguyen1;~Hai_Ngoc_Do1;~Khai_Nguyen1;~Vishwanath_Saragadam1;~Minh_Pham1;~Nguyen_Duy_Khuong1;~Nhat_Ho1;~Stanley_Osher1", "aff": "University of California, Los Angeles;FPT Software;;University of Texas, Austin;Rice University;University of California, Los Angeles;FPT Software Ltd. - FPT Corporation;University of Texas, Austin;University of California, Los Angeles", "aff_domain": "ucla.edu;fsoft.com.vn;;utexas.edu;rice.edu;ucla.edu;fpt-software.com;utexas.edu;ucla.edu", "position": "Postdoc;FPT AI Residency;;PhD student;Postdoc;Postdoc;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nnguyen2022improving,\ntitle={Improving Transformer with an Admixture of Attention Heads},\nauthor={Tan Minh Nguyen and Tam Minh Nguyen and Hai Ngoc Do and Khai Nguyen and Vishwanath Saragadam and Minh Pham and Nguyen Duy Khuong and Nhat Ho and Stanley Osher},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0VFQhPGF1M3}\n}", "github": "", "project": "", "reviewers": "tCZN;ofWV;eA9U", "pdf_size": 1777429, "rating": "6;7;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "4;2;3", "contribution": "3;3;4", "wc_summary": "42;67;22", "wc_strengths_and_weaknesses": "184;56;76", "wc_questions": "100;21;30", "wc_limitations": "10;4;1", "wc_review": "336;148;129", "wc_reply_reviewers": "18;0;191", "wc_reply_authors": "1167;2044;1935", "reply_reviewers": "1;0;1", "reply_authors": "4;5;5", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 43.666666666666664, 18.408935028645434 ], "wc_strengths_and_weaknesses_avg": [ 105.33333333333333, 56.221783046154705 ], "wc_questions_avg": [ 50.333333333333336, 35.31131389355101 ], "wc_limitations_avg": [ 5.0, 3.7416573867739413 ], "wc_review_avg": [ 204.33333333333334, 93.42495502927589 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 86.10974909317115 ], "wc_reply_authors_avg": [ 1715.3333333333333, 390.27540133716974 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.666666666666667, 0.4714045207910317 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16797358298717652414&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucla.edu;fsoft.com.vn;;utexas.edu;rice.edu;ucla.edu;fpt-software.com;utexas.edu;ucla.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;0;1;2;0", "aff_unique_norm": "University of California, Los Angeles;FPT Corporation;University of Texas at Austin;Rice University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucla.edu;https://www.fpt-software.com;https://www.utexas.edu;https://www.rice.edu", "aff_unique_abbr": "UCLA;FPT;UT Austin;Rice", "aff_campus_unique_index": "0;2;0;2;0", "aff_campus_unique": "Los Angeles;;Austin", "aff_country_unique_index": "0;1;0;0;0;1;0;0", "aff_country_unique": "United States;Vietnam" }, { "title": "Learning to Compare Nodes in Branch and Bound with Graph Neural Networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52845", "id": "0VhrZPJXcTU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/cf5bb18807a3e9cfaaa51e667e18f807-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0VhrZPJXcTU", "openreview": "https://openreview.net/forum?id=0VhrZPJXcTU", "poster": "/media/PosterPDFs/NeurIPS%202022/52845.png?t=1669321765.1951838", "slides": "https://nips.cc/virtual/2022/poster/52845", "video": "https://nips.cc/virtual/2022/poster/52845", "author_site": "Abdel Ghani Labassi, Didier Chetelat, Andrea Lodi", "tldr": "", "abstract": "Branch-and-bound approaches in integer programming require ordering portions of the space to explore next, a problem known as node comparison. We propose a new siamese graph neural network model to tackle this problem, where the nodes are represented as bipartite graphs with attributes. Similar to prior work, we train our model to imitate a diving oracle that plunges towards the optimal solution. We evaluate our method by solving the instances in a plain framework where the nodes are explored according to their rank. On three NP-hard benchmarks chosen to be particularly primal-difficult, our approach leads to faster solving and smaller branch- and-bound trees than the default ranking function of the open-source solver SCIP, as well as competing machine learning methods. Moreover, these results generalize to instances larger than used for training. Code for reproducing the experiments can be found at https://github.com/ds4dm/learn2comparenodes.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/1830d6929a19dcb4585b5499b46c4640e7cd9339.zip", "author": "Abdel Ghani Labassi;Didier Ch\u00e9telat;Andrea Lodi", "authorids": "~Abdel_Ghani_Labassi1;~Didier_Ch\u00e9telat1;~Andrea_Lodi1", "gender": "M;M;M", "homepage": ";https://www.didierchetelat.com;https://www.gerad.ca/en/people/andrea-lodi", "dblp": ";;", "google_scholar": ";https://scholar.google.ca/citations?user=IkTwAY0AAAAJ;", "orcid": ";;", "linkedin": "https://ca.linkedin.com/in/aglabassi;;", "or_profile": "~Abdel_Ghani_Labassi1;~Didier_Ch\u00e9telat1;~Andrea_Lodi1", "aff": "Universit\u00e9 de Montr\u00e9al;Polytechnique Montreal;Cornell University", "aff_domain": "umontreal.ca;polymtl.ca;cornell.edu", "position": "MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nlabassi2022learning,\ntitle={Learning to Compare Nodes in Branch and Bound with Graph Neural Networks},\nauthor={Abdel Ghani Labassi and Didier Ch{\\'e}telat and Andrea Lodi},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0VhrZPJXcTU}\n}", "github": "", "project": "", "reviewers": "zJfP;zm9Z;ksTx", "pdf_size": 334184, "rating": "3;3;5", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "1;1;2", "presentation": "1;2;3", "contribution": "1;1;2", "wc_summary": "17;40;138", "wc_strengths_and_weaknesses": "155;100;318", "wc_questions": "34;4;5", "wc_limitations": "1;15;1", "wc_review": "207;159;462", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "410;434;551", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 65.0, 52.46586191674227 ], "wc_strengths_and_weaknesses_avg": [ 191.0, 92.56709278499928 ], "wc_questions_avg": [ 14.333333333333334, 13.91242450313947 ], "wc_limitations_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_review_avg": [ 276.0, 132.9736816065495 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 465.0, 61.59545437773797 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2705976177527772812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "umontreal.ca;polymtl.ca;cornell.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Polytechnique Montreal;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umontreal.ca;https://www.polymtl.ca;https://www.cornell.edu", "aff_unique_abbr": "UdeM;PolyMTL;Cornell", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Active Learning Helps Pretrained Models Learn the Intended Task", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53607", "id": "0Ww7UVEoNue", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b43a0e8a35b1c044b18cd843b9771915-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Ww7UVEoNue", "openreview": "https://openreview.net/forum?id=0Ww7UVEoNue", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53607", "video": "https://nips.cc/virtual/2022/poster/53607", "author_site": "Alex Tamkin, Dat Nguyen, Salil Deshpande, Jesse Mu, Noah Goodman", "tldr": "Active learning helps pretrained models overcome spurious correlations and domain shifts", "abstract": "Models can fail in unpredictable ways during deployment due to task ambiguity, when multiple behaviors are consistent with the provided training data. An example is an object classifier trained on red squares and blue circles: when encountering blue squares, the intended behavior is undefined. We investigate whether pretrained models are better active learners, capable of disambiguating between the possible tasks a user may be trying to specify. Intriguingly, we find that better active learning is an emergent property of the pretraining process: pretrained models require up to 5 times fewer labels when using uncertainty-based active learning, while non-pretrained models see no or even negative benefit. We find these gains come from an ability to select examples with attributes that disambiguate the intended behavior, such as rare product categories or atypical backgrounds. These attributes are far more linearly separable in pretrained model's representation spaces vs non-pretrained models, suggesting a possible mechanism for this behavior.", "keywords": "pretrained models;active learning;few shot learning;alignment", "primary_area": "", "supplementary_material": "/attachment/7bb19ee1a10425dace8875ef13c07dc5c82735f2.pdf", "author": "Alex Tamkin;Dat Pham Nguyen;Salil Deshpande;Jesse Mu;Noah Goodman", "authorids": "~Alex_Tamkin1;~Dat_Pham_Nguyen1;~Salil_Deshpande1;~Jesse_Mu1;~Noah_Goodman1", "gender": ";;;;", "homepage": ";;https://icme.stanford.edu/people/salil-deshpande;https://www.jesse.mu/;https://cocolab.stanford.edu/", "dblp": ";;;205/9022;96/1216", "google_scholar": ";;;djLcGEQAAAAJ;OUpIbcQAAAAJ", "orcid": ";;;0000-0002-0812-2710;", "linkedin": ";datpn2/;;jayelm;", "or_profile": "~Alex_Tamkin1;~Dat_Pham_Nguyen1;~Salil_Deshpande1;~Jesse_Mu1;~Noah_Goodman1", "aff": ";;Stanford University;Stanford University;Stanford University", "aff_domain": ";;stanford.edu;stanford.edu;stanford.edu", "position": ";;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntamkin2022active,\ntitle={Active Learning Helps Pretrained Models Learn the Intended Task},\nauthor={Alex Tamkin and Dat Pham Nguyen and Salil Deshpande and Jesse Mu and Noah Goodman},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0Ww7UVEoNue}\n}", "github": "", "project": "", "reviewers": "KZEm;DEK8;GGwr", "pdf_size": 622536, "rating": "4;5;7", "confidence": "4;2;3", "soundness": "3;3;4", "novelty": "2;2;2", "presentation": "3;4;4", "contribution": "2;2;2", "wc_summary": "160;34;95", "wc_strengths_and_weaknesses": "298;37;180", "wc_questions": "6;26;76", "wc_limitations": "26;1;4", "wc_review": "490;98;355", "wc_reply_reviewers": "0;0;122", "wc_reply_authors": "577;170;999", "reply_reviewers": "0;0;2", "reply_authors": "1;1;3", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 96.33333333333333, 51.44792404838983 ], "wc_strengths_and_weaknesses_avg": [ 171.66666666666666, 106.71561376959897 ], "wc_questions_avg": [ 36.0, 29.43920288775949 ], "wc_limitations_avg": [ 10.333333333333334, 11.14550233153366 ], "wc_review_avg": [ 314.3333333333333, 162.59629625411387 ], "wc_reply_reviewers_avg": [ 40.666666666666664, 57.51135153650587 ], "wc_reply_authors_avg": [ 582.0, 338.45629949325314 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3178723025927694916&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": ";;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Accelerated Projected Gradient Algorithms for Sparsity Constrained Optimization Problems", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54541", "id": "0Z0xltoU1q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/aab3003c922e0fcd2fd2c951fa3c03ad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0Z0xltoU1q", "openreview": "https://openreview.net/forum?id=0Z0xltoU1q", "poster": "/media/PosterPDFs/NeurIPS%202022/54541.png?t=1669224710.6564405", "slides": "https://nips.cc/virtual/2022/poster/54541", "video": "https://nips.cc/virtual/2022/poster/54541", "author_site": "Jan Harold Alcantara, Ching-pei Lee", "tldr": "For optimization problems with a sparsity constraint, we propose acceleration methods with provably faster convergence rates and significantly faster empirical speed than the state of the art.", "abstract": "We consider the projected gradient algorithm for the nonconvex best subset selection problem that minimizes a given empirical loss function under an $\\ell_0$-norm constraint. Through decomposing the feasible set of the given sparsity constraint as a finite union of linear subspaces, we present two acceleration schemes with global convergence guarantees, one by same-space extrapolation and the other by subspace identification. The former fully utilizes the problem structure to greatly accelerate the optimization speed with only negligible additional cost. The latter leads to a two-stage meta-algorithm that first uses classical projected gradient iterations to identify the correct subspace containing an optimal solution, and then switches to a highly-efficient smooth optimization method in the identified subspace to attain superlinear convergence. Experiments demonstrate that the proposed accelerated algorithms are magnitudes faster than their non-accelerated counterparts as well as the state of the art.", "keywords": "projected gradient method;sparse optimization;accelerated algorithms", "primary_area": "", "supplementary_material": "/attachment/c281bc2731724ed29ef9e79b81be26cb8c850295.zip", "author": "Jan Harold Mercado Alcantara;Ching-pei Lee", "authorids": "~Jan_Harold_Mercado_Alcantara1;~Ching-pei_Lee2", "gender": "M;Unspecified", "homepage": ";http://leepei.github.io", "dblp": ";", "google_scholar": "https://scholar.google.com.ph/citations?user=E6oQznwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7242-4414;", "linkedin": ";", "or_profile": "~Jan_Harold_Mercado_Alcantara1;~Ching-Pei_Lee1", "aff": "Institute of Statistical Science, Academia Sinia;", "aff_domain": "stat.sinica.edu.tw;", "position": "Postdoc;", "bibtex": "@inproceedings{\nalcantara2022accelerated,\ntitle={Accelerated Projected Gradient Algorithms for Sparsity Constrained Optimization Problems},\nauthor={Jan Harold Mercado Alcantara and Ching-pei Lee},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0Z0xltoU1q}\n}", "github": "", "project": "", "reviewers": "eyvq;vhqu;TQ21;Kgs5", "pdf_size": 559041, "rating": "6;6;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "contribution": "3;3;3;3", "wc_summary": "55;170;20;122", "wc_strengths_and_weaknesses": "201;251;95;313", "wc_questions": "26;126;110;123", "wc_limitations": "42;22;5;14", "wc_review": "324;569;230;572", "wc_reply_reviewers": "143;30;0;0", "wc_reply_authors": "1110;929;254;1404", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.75, 58.1737698623701 ], "wc_strengths_and_weaknesses_avg": [ 215.0, 79.8373346248483 ], "wc_questions_avg": [ 96.25, 41.002286521607545 ], "wc_limitations_avg": [ 20.75, 13.663363421939708 ], "wc_review_avg": [ 423.75, 150.46988901438056 ], "wc_reply_reviewers_avg": [ 43.25, 58.87858269353976 ], "wc_reply_authors_avg": [ 924.25, 422.4691556788495 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7653397836697971420&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "stat.sinica.edu.tw;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Academia Sinica", "aff_unique_dep": "Institute of Statistical Science", "aff_unique_url": "https://www.sinica.edu.tw", "aff_unique_abbr": "Academia Sinica", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Distributionally Robust Optimization via Ball Oracle Acceleration", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54253", "id": "0ZKyTHwF5V1", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/e90b00adc3ba130eb2510d93ba3ff250-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0ZKyTHwF5V1", "openreview": "https://openreview.net/forum?id=0ZKyTHwF5V1", "poster": "/media/PosterPDFs/NeurIPS%202022/54253.png?t=1669904012.167133", "slides": "https://nips.cc/virtual/2022/poster/54253", "video": "https://nips.cc/virtual/2022/poster/54253", "author_site": "Yair Carmon, Danielle Hausler", "tldr": "We develop and theoretically analyze algorithms for distributionally robust optimization with group-structured and bounded $f$-divergence uncertainty sets.", "abstract": "We develop and analyze algorithms for distributionally robust optimization (DRO) of convex losses. In particular, we consider group-structured and bounded $f$-divergence uncertainty sets. Our approach relies on an accelerated method that queries a ball optimization oracle, i.e., a subroutine that minimizes the objective within a small ball around the query point. Our main contribution is efficient implementations of this oracle for DRO objectives. For DRO with $N$ non-smooth loss functions, the resulting algorithms find an $\\epsilon$-accurate solution with $\\widetilde{O}\\left(N\\epsilon^{-2/3} + \\epsilon^{-2}\\right)$ first-order oracle queries to individual loss functions. Compared to existing algorithms for this problem, we improve complexity by a factor of up to $\\epsilon^{-4/3}$.", "keywords": "convex optimization;distributionally robust optimization;theory;oracle complexity;monteiro-svaiter acceleration;accelerated methods;algorithm design;entropy regularization;multilevel monte-carlo", "primary_area": "", "supplementary_material": "/attachment/9c1a761b36ebd5a54ff974e22b13d408e91277ac.pdf", "author": "Yair Carmon;Danielle Hausler", "authorids": "~Yair_Carmon1;~Danielle_Hausler1", "gender": "M;F", "homepage": "https://www.cs.tau.ac.il/~ycarmon/;", "dblp": "13/558;", "google_scholar": "kTKmpT0AAAAJ;", "orcid": ";", "linkedin": ";danielle-hausler-027238143/", "or_profile": "~Yair_Carmon1;~Danielle_Hausler1", "aff": "Tel Aviv University;Tel Aviv University", "aff_domain": "tau.ac.il;tau.ac.il", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\ncarmon2022distributionally,\ntitle={Distributionally Robust Optimization via Ball Oracle Acceleration},\nauthor={Yair Carmon and Danielle Hausler},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0ZKyTHwF5V1}\n}", "github": "", "project": "", "reviewers": "ZWwH;MgTq;zx8s;uy4v", "pdf_size": 827573, "rating": "3;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "1;3;3;3", "presentation": "2;2;3;2", "contribution": "1;3;3;3", "wc_summary": "176;49;55;296", "wc_strengths_and_weaknesses": "375;48;69;141", "wc_questions": "57;31;17;48", "wc_limitations": "30;3;10;45", "wc_review": "638;131;151;530", "wc_reply_reviewers": "0;0;0;23", "wc_reply_authors": "1166;190;682;568", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 144.0, 101.33360745576958 ], "wc_strengths_and_weaknesses_avg": [ 158.25, 129.80634614686602 ], "wc_questions_avg": [ 38.25, 15.417117110536587 ], "wc_limitations_avg": [ 22.0, 16.56804152578089 ], "wc_review_avg": [ 362.5, 224.87830042047187 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 651.5, 348.42323401288843 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13719981277654967775&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tau.ac.il;tau.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "RainNet: A Large-Scale Imagery Dataset and Benchmark for Spatial Precipitation Downscaling", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55341", "id": "0cn6LSqwjUv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3fbf0c1ea0716c03dea93bb6be78dd6f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0cn6LSqwjUv", "openreview": "https://openreview.net/forum?id=0cn6LSqwjUv", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55341", "video": "https://nips.cc/virtual/2022/poster/55341", "author_site": "Xuanhong Chen, Kairui Feng, Naiyuan Liu, Bingbing Ni, Yifan Lu, Zhengyan Tong, Ziang Liu", "tldr": "", "abstract": "AI-for-science approaches have been applied to solve scientific problems (e.g., nuclear fusion, ecology, genomics, meteorology) and have achieved highly promising results. Spatial precipitation downscaling is one of the most important meteorological problem and urgently requires the participation of AI. However, the lack of a well-organized and annotated large-scale dataset hinders the training and verification of more effective and advancing deep-learning models for precipitation downscaling. To alleviate these obstacles, we present the first large-scale spatial precipitation downscaling dataset named RainNet, which contains more than 62,400 pairs of high-quality low/high-resolution precipitation maps for over 17 years, ready to help the evolution of deep learning models in precipitation downscaling. Specifically, the precipitation maps carefully collected in RainNet cover various meteorological phenomena (e.g., hurricane, squall), which is of great help to improve the model generalization ability. In addition, the map pairs in RainNet are organized in the form of image sequences (720 maps per month or 1 map/hour), showing complex physical properties, e.g., temporal misalignment, temporal sparse, and fluid properties. Furthermore, two deep-learning-oriented metrics are specifically introduced to evaluate or verify the comprehensive performance of the trained model (e.g., prediction maps reconstruction accuracy). To illustrate the applications of RainNet, 14 state-of-the-art models, including deep models and traditional approaches, are evaluated. To fully explore potential downscaling solutions, we propose an implicit physical estimation benchmark framework to learn the above characteristics. Extensive experiments demonstrate the value of RainNet in training and evaluating downscaling models. Our dataset is available at https://neuralchen.github.io/RainNet/.", "keywords": "Machine Learning for Sciences;Downscaling;Meteorological Problems", "primary_area": "", "supplementary_material": "/attachment/61fcdeb0a0a0e57a99361ea609506e5720dede18.zip", "author": "Xuanhong Chen;Kairui Feng;Naiyuan Liu;Bingbing Ni;Yifan Lu;Zhengyan Tong;Ziang Liu", "authorids": "~Xuanhong_Chen1;~Kairui_Feng1;~Naiyuan_Liu1;~Bingbing_Ni3;~Yifan_Lu1;~Zhengyan_Tong1;~Ziang_Liu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/neuralchen;https://cee.princeton.edu/people/kairui-kelvin-feng;https://github.com/NNNNAI;;https://yifanlu0227.github.io;https://github.com/TZYSJTU;", "dblp": "255/6337;;;64/831.html;;281/6780;", "google_scholar": "UuCqlfEAAAAJ;;;V9W87PYAAAAJ;hiXGPH8AAAAJ;HqKiqgwAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;yifan-lu-65ab69229/;;%E5%AD%90%E6%98%82-%E5%88%98-8aaa36186/", "or_profile": "~Xuanhong_Chen1;~Kairui_Feng1;~Naiyuan_Liu1;~Bingbing_Ni3;~Yifan_Lu1;~Zhengyan_Tong1;~Ziang_Liu1", "aff": "Shanghai Jiaotong University;Princeton University;University of Technology Sydney;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;princeton.edu;uts.edu.au;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;", "position": "PhD student;Postdoc;MS student;Full Professor;Undergrad student;MS student;", "bibtex": "@inproceedings{\nchen2022rainnet,\ntitle={RainNet: A Large-Scale Imagery Dataset and Benchmark for Spatial Precipitation Downscaling},\nauthor={Xuanhong Chen and Kairui Feng and Naiyuan Liu and Bingbing Ni and Yifan Lu and Zhengyan Tong and Ziang Liu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0cn6LSqwjUv}\n}", "github": "", "project": "", "reviewers": "WVYT;9mB6;aP2Q;5jsP", "pdf_size": 3246906, "rating": "4;5;6;6", "confidence": "4;3;4;2", "soundness": "2;3;3;2", "novelty": "3;3;3;3", "presentation": "2;3;4;4", "contribution": "3;3;3;3", "wc_summary": "42;114;96;78", "wc_strengths_and_weaknesses": "281;2;179;18", "wc_questions": "103;7;27;15", "wc_limitations": "3;32;29;8", "wc_review": "429;155;331;119", "wc_reply_reviewers": "179;0;148;0", "wc_reply_authors": "1546;432;966;693", "reply_reviewers": "1;0;1;0", "reply_authors": "4;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.5, 26.622359023948274 ], "wc_strengths_and_weaknesses_avg": [ 120.0, 115.89866263249115 ], "wc_questions_avg": [ 38.0, 38.19685850956856 ], "wc_limitations_avg": [ 18.0, 12.668859459319927 ], "wc_review_avg": [ 258.5, 126.98326661414882 ], "wc_reply_reviewers_avg": [ 81.75, 82.4814373056144 ], "wc_reply_authors_avg": [ 909.25, 413.28039815602193 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15952032456003796992&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;princeton.edu;uts.edu.au;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Princeton University;University of Technology Sydney", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.princeton.edu;https://www.uts.edu.au", "aff_unique_abbr": "SJTU;Princeton;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "China;United States;Australia" }, { "title": "Sequence-to-Set Generative Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53422", "id": "0dt8wdYIAV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6091f2bb355e960600f62566ac0e2862-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0dt8wdYIAV", "openreview": "https://openreview.net/forum?id=0dt8wdYIAV", "poster": "/media/PosterPDFs/NeurIPS%202022/3e9928ece00c78dc7777c644f68d3956.png?t=1666590500.9711964", "slides": "https://nips.cc/virtual/2022/poster/53422", "video": "https://nips.cc/virtual/2022/poster/53422", "author_site": "Longtao Tang, Ying Zhou, Yu Yang", "tldr": "A sequence-to-set method that can transform any sequence generative models based on maximum likelihood to a set generative model.", "abstract": "In this paper, we propose a sequence-to-set method that can transform any sequence generative model based on maximum likelihood to a set generative model where we can evaluate the utility/probability of any set. An efficient importance sampling algorithm is devised to tackle the computational challenge of learning our sequence-to-set model. We present GRU2Set, which is an instance of our sequence-to-set method and employs the famous GRU model as the sequence generative model.\nTo further obtain permutation invariant representation of sets, we devise the SetNN model which is also an instance of the sequence-to-set model. A direct application of our models is to learn an order/set distribution from a collection of e-commerce orders, which is an essential step in many important operational decisions such as inventory arrangement for fast delivery. Based on the intuition that small-sized sets are usually easier to learn than large sets, we propose a size-bias trick that can help learn better set distributions with respect to the $\\ell_1$-distance evaluation metric. Two e-commerce order datasets, TMALL and HKTVMALL, are used to conduct extensive experiments to show the effectiveness of our models. The experimental results demonstrate that our models can learn better set/order distributions from order data than the baselines. Moreover, no matter what model we use, applying the size-bias trick can always improve the quality of the set distribution learned from data.", "keywords": "generative models;set data;sequences;e-commerce orders;representation learning", "primary_area": "", "supplementary_material": "/attachment/336c883f617e1408b98aaa3840404147575fa635.pdf", "author": "Longtao Tang;Ying Zhou;Yu Yang", "authorids": "~Longtao_Tang1;~Ying_Zhou4;~Yu_Yang9", "gender": "M;F;M", "homepage": "https://scholars.cityu.edu.hk/person/longttang2;https://sites.google.com/view/ying-zhou/home;https://yuyangcs.github.io/", "dblp": "329/6147;;16/4505-1.html", "google_scholar": ";;https://scholar.google.ca/citations?user=EMG6cL0AAAAJ", "orcid": "0000-0002-6312-5893;0000-0003-0687-7818;0000-0002-8209-2898", "linkedin": ";;", "or_profile": "~Longtao_Tang1;~Ying_Zhou4;~Yu_Yang9", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ntang2022sequencetoset,\ntitle={Sequence-to-Set Generative Models},\nauthor={Longtao Tang and Ying Zhou and Yu Yang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0dt8wdYIAV}\n}", "github": "", "project": "", "reviewers": "DMSE;uw8U;1KAT", "pdf_size": 1143158, "rating": "6;6;6", "confidence": "1;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "contribution": "3;3;3", "wc_summary": "113;117;148", "wc_strengths_and_weaknesses": "51;130;163", "wc_questions": "74;23;19", "wc_limitations": "9;1;1", "wc_review": "247;271;331", "wc_reply_reviewers": "0;0;139", "wc_reply_authors": "99;193;1432", "reply_reviewers": "0;0;1", "reply_authors": "4;3;6", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 126.0, 15.641824275533422 ], "wc_strengths_and_weaknesses_avg": [ 114.66666666666667, 46.99172503986444 ], "wc_questions_avg": [ 38.666666666666664, 25.037749277618563 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 283.0, 35.32704346531139 ], "wc_reply_reviewers_avg": [ 46.333333333333336, 65.5252283899534 ], "wc_reply_authors_avg": [ 574.6666666666666, 607.4396174838193 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.333333333333333, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LAfRdkPyNqQJ:scholar.google.com/&scioq=Sequence-to-Set+Generative+Models&hl=en&as_sdt=0,44", "gs_version_total": 8, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Beyond Adult and COMPAS: Fair Multi-Class Prediction via Information Projection", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53097", "id": "0e0es11XAIM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/fd5013ea0c3f96931dec77174eaf9d80-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0e0es11XAIM", "openreview": "https://openreview.net/forum?id=0e0es11XAIM", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53097", "video": "https://nips.cc/virtual/2022/poster/53097", "author_site": "Wael Alghamdi, Hsiang Hsu, Haewon Jeong, Hao Wang, Peter Michalak, Shahab Asoodeh, Flavio Calmon", "tldr": "We introduce a post-processing fairness intervention for multi-class probabilistic classifiers.", "abstract": "We consider the problem of producing fair probabilistic classifiers for multi-class classification tasks. We formulate this problem in terms of ``projecting'' a pre-trained (and potentially unfair) classifier onto the set of models that satisfy target group-fairness requirements. The new, projected model is given by post-processing the outputs of the pre-trained classifier by a multiplicative factor. We provide a parallelizable, iterative algorithm for computing the projected classifier and derive both sample complexity and convergence guarantees. Comprehensive numerical comparisons with state-of-the-art benchmarks demonstrate that our approach maintains competitive performance in terms of accuracy-fairness trade-off curves, while achieving favorable runtime on large datasets. We also evaluate our method at scale on an open dataset with multiple classes, multiple intersectional groups, and over 1M samples.", "keywords": "group fairness;information projection;multi-class classification;new dataset", "primary_area": "", "supplementary_material": "/attachment/2d4cac8c4a82a3584bf2933cafafa208a748b183.pdf", "author": "Wael Alghamdi;Hsiang Hsu;Haewon Jeong;Hao Wang;Peter Winston Michalak;Shahab Asoodeh;Flavio Calmon", "authorids": "~Wael_Alghamdi1;~Hsiang_Hsu1;~Haewon_Jeong1;~Hao_Wang22;~Peter_Winston_Michalak1;~Shahab_Asoodeh1;~Flavio_Calmon1", "gender": ";M;;M;M;M;", "homepage": ";https://hsianghsu.github.io;http://www.haewonjeong.com;https://haowang94.github.io;;https://www.cas.mcmaster.ca/~asoodehs/;http://people.seas.harvard.edu/~flavio/", "dblp": ";;;;;63/8658;89/4611", "google_scholar": ";https://scholar.google.com.tw/citations?user=JRl3iYIAAAAJ;h8wIUwUAAAAJ;A3WtYhAAAAAJ;;CSxeFMsAAAAJ;P8N_YH4AAAAJ", "orcid": "0000-0001-6631-2160;0000-0001-8084-3929;;;;;", "linkedin": ";;;;peter-winston-m-2b508810a/;shahabasoodeh/;", "or_profile": "~Wael_Alghamdi1;~Hsiang_Hsu1;~Haewon_Jeong1;~Hao_Wang22;~Peter_Winston_Michalak1;~Shahab_Asoodeh1;~Flavio_Calmon1", "aff": "Harvard University;Harvard University;Harvard University;Harvard University;Harvard University;McMaster University;Harvard University", "aff_domain": "harvard.edu;harvard.edu;harvard.edu;harvard.edu;harvard.edu;mcmaster.ca;harvard.edu", "position": "PhD student;PhD student;Postdoc;PhD student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nalghamdi2022beyond,\ntitle={Beyond Adult and {COMPAS}: Fair Multi-Class Prediction via Information Projection},\nauthor={Wael Alghamdi and Hsiang Hsu and Haewon Jeong and Hao Wang and Peter Winston Michalak and Shahab Asoodeh and Flavio Calmon},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0e0es11XAIM}\n}", "github": "", "project": "", "reviewers": "RebA;5rHq;3JtH;4NMe", "pdf_size": 1168100, "rating": "7;7;8;8", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;4;3", "contribution": "3;3;4;4", "wc_summary": "78;60;78;74", "wc_strengths_and_weaknesses": "317;258;142;312", "wc_questions": "6;44;92;109", "wc_limitations": "86;1;15;79", "wc_review": "487;363;327;574", "wc_reply_reviewers": "15;599;0;35", "wc_reply_authors": "1934;2189;333;508", "reply_reviewers": "1;4;0;1", "reply_authors": "4;5;1;2", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 72.5, 7.399324293474371 ], "wc_strengths_and_weaknesses_avg": [ 257.25, 70.44634483065818 ], "wc_questions_avg": [ 62.75, 40.51774302697523 ], "wc_limitations_avg": [ 45.25, 37.65883030578618 ], "wc_review_avg": [ 437.75, 98.54282064158707 ], "wc_reply_reviewers_avg": [ 162.25, 252.46323989840582 ], "wc_reply_authors_avg": [ 1241.0, 827.7538885441735 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11921729286759446348&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "harvard.edu;harvard.edu;harvard.edu;harvard.edu;harvard.edu;mcmaster.ca;harvard.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Harvard University;McMaster University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.mcmaster.ca", "aff_unique_abbr": "Harvard;McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Unsupervised Adaptation from Repeated Traversals for Autonomous Driving", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53349", "id": "0fKlU1OlANc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b1eb88348ee19a33c81cf5bc3fb8e9d2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0fKlU1OlANc", "openreview": "https://openreview.net/forum?id=0fKlU1OlANc", "poster": "/media/PosterPDFs/NeurIPS%202022/53349.png?t=1669560970.7149634", "slides": "https://nips.cc/virtual/2022/poster/53349", "video": "https://nips.cc/virtual/2022/poster/53349", "author_site": "Yurong You, Cheng Perng Phoo, Katie Luo, Travis Zhang, Wei-Lun Chao, Bharath Hariharan, Mark Campbell, Kilian Weinberger", "tldr": "", "abstract": "For a self-driving car to operate reliably, its perceptual system must generalize to the end-user's environment --- ideally without additional annotation efforts. One potential solution is to leverage unlabeled data (e.g., unlabeled LiDAR point clouds) collected from the end-users' environments (i.e. target domain) to adapt the system to the difference between training and testing environments. While extensive research has been done on such an unsupervised domain adaptation problem, one fundamental problem lingers: there is no reliable signal in the target domain to supervise the adaptation process. To overcome this issue we observe that it is easy to collect unsupervised data from multiple traversals of repeated routes. While different from conventional unsupervised domain adaptation, this assumption is extremely realistic since many drivers share the same roads. We show that this simple additional assumption is sufficient to obtain a potent signal that allows us to perform iterative self-training of 3D object detectors on the target domain. Concretely, we generate pseudo-labels with the out-of-domain detector but reduce false positives by removing detections of supposedly mobile objects that are persistent across traversals. Further, we reduce false negatives by encouraging predictions in regions that are not persistent. We experiment with our approach on two large-scale driving datasets and show remarkable improvement in 3D object detection of cars, pedestrians, and cyclists, bringing us a step closer to generalizable autonomous driving.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/1a60f2be673815312bd182e0df16687cd7b1399c.pdf", "author": "Yurong You;Cheng Perng Phoo;Katie Z Luo;Travis Zhang;Wei-Lun Chao;Bharath Hariharan;Mark Campbell;Kilian Q Weinberger", "authorids": "~Yurong_You1;~Cheng_Perng_Phoo1;~Katie_Z_Luo1;~Travis_Zhang1;~Wei-Lun_Chao1;~Bharath_Hariharan3;~Mark_Campbell1;~Kilian_Q_Weinberger1", "gender": "M;M;F;;M;M;M;M", "homepage": "http://yurongyou.com;https://cpphoo.github.io/;https://www.cs.cornell.edu/~katieluo/;;https://sites.google.com/view/wei-lun-harry-chao;http://campbell.mae.cornell.edu;http://www.cs.cornell.edu/~kilian/;http://home.bharathh.info", "dblp": "199/1968;226/0521;207/8564;302/0033;64/8842;;88/4801;05/8412", "google_scholar": "rdwkreIAAAAJ;kt9D2usAAAAJ;qlmK27YAAAAJ;https://scholar.google.com/citations?hl=en;PGKakWwAAAAJ;e1iAhHQAAAAJ;jsxk8vsAAAAJ;TpglobcAAAAJ", "orcid": ";;;;0000-0003-1269-7231;;0009-0008-9313-7239;", "linkedin": "yurong-you/;;katieluo;travis-zhang;;;;", "or_profile": "~Yurong_You1;~Cheng_Perng_Phoo1;~Katie_Z_Luo1;~Travis_Zhang1;~Wei-Lun_Chao1;~Mark_Campbell1;~Kilian_Q_Weinberger1;~Bharath_Hariharan2", "aff": "Cornell University;Meta Facebook;Cornell University;Cornell University;Ohio State University;Cornell University;ASAPP Inc.;Cornell University", "aff_domain": "cornell.edu;fb.com;cornell.edu;cornell.edu;osu.edu;cornell.edu;asapp.com;cornell.edu", "position": "PhD student;Intern;PhD student;Undergrad student;Assistant Professor;Full Professor;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyou2022unsupervised,\ntitle={Unsupervised Adaptation from Repeated Traversals for Autonomous Driving},\nauthor={Yurong You and Cheng Perng Phoo and Katie Z Luo and Travis Zhang and Wei-Lun Chao and Bharath Hariharan and Mark Campbell and Kilian Q Weinberger},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0fKlU1OlANc}\n}", "github": "", "project": "", "reviewers": "mdTS;YZhW;FkR5;UWLf", "pdf_size": 5386984, "rating": "3;5;5;8", "confidence": "4;3;4;3", "soundness": "2;3;2;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "contribution": "2;3;3;3", "wc_summary": "83;77;63;235", "wc_strengths_and_weaknesses": "302;114;66;405", "wc_questions": "269;23;126;61", "wc_limitations": "93;53;1;5", "wc_review": "747;267;256;706", "wc_reply_reviewers": "0;0;42;31", "wc_reply_authors": "1230;247;1349;826", "reply_reviewers": "0;0;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.5, 69.94819511610002 ], "wc_strengths_and_weaknesses_avg": [ 221.75, 137.7395640329967 ], "wc_questions_avg": [ 119.75, 93.71065841194373 ], "wc_limitations_avg": [ 38.0, 37.77565353504821 ], "wc_review_avg": [ 494.0, 232.98390502350156 ], "wc_reply_reviewers_avg": [ 18.25, 18.659782956937093 ], "wc_reply_authors_avg": [ 913.0, 430.61293524463474 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7001400420140049, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8974062420707288722&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cornell.edu;fb.com;cornell.edu;cornell.edu;osu.edu;cornell.edu;asapp.com;cornell.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;2;0;3;0", "aff_unique_norm": "Cornell University;Meta;Ohio State University;ASAPP Inc.", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.cornell.edu;https://meta.com;https://www.osu.edu;https://www.asapp.com", "aff_unique_abbr": "Cornell;Meta;OSU;ASAPP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Game Decision Transformers", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52843", "id": "0gouO5saq6K", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b2cac94f82928a85055987d9fd44753f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0gouO5saq6K", "openreview": "https://openreview.net/forum?id=0gouO5saq6K", "poster": "/media/PosterPDFs/NeurIPS%202022/52843.png?t=1668019129.6671317", "slides": "https://nips.cc/virtual/2022/poster/52843", "video": "https://nips.cc/virtual/2022/poster/52843", "author_site": "Kuang-Huei Lee, Ofir Nachum, Mengjiao (Sherry) Yang, Lisa Lee, Daniel Freeman, Sergio Guadarrama, Ian Fischer, Winnie Xu, Eric Jang, Henryk Michalewski, Igor Mordatch", "tldr": "We learn one Multi-Game Decision Transformer to achieve close to human-level performance on up to 41 Atari games.", "abstract": "A longstanding goal of the field of AI is a method for learning a highly capable, generalist agent from diverse experience. In the subfields of vision and language, this was largely achieved by scaling up transformer-based models and training them on large, diverse datasets. Motivated by this progress, we investigate whether the same strategy can be used to produce generalist reinforcement learning agents. Specifically, we show that a single transformer-based model \u2013 with a single set of weights \u2013 trained purely offline can play a suite of up to 46 Atari games simultaneously at close-to-human performance. When trained and evaluated appropriately, we find that the same trends observed in language and vision hold, including scaling of performance with model size and rapid adaptation to new games via fine-tuning. We compare several approaches in this multi-game setting, such as online and offline RL methods and behavioral cloning, and find that our Multi-Game Decision Transformer models offer the best scalability and performance. We release the pre-trained models and code to encourage further research in this direction.", "keywords": "Reinforcement Learning;Generalist Agent;Multi-Environment RL;Upside-Down RL;Decision Transformers", "primary_area": "", "supplementary_material": "/attachment/1935db2082c401c5e690651a487749d3507d8a08.pdf", "author": "Kuang-Huei Lee;Ofir Nachum;Sherry Yang;Lisa Lee;C. Daniel Freeman;Sergio Guadarrama;Ian Fischer;Winnie Xu;Eric Jang;Henryk Michalewski;Igor Mordatch", "authorids": "~Kuang-Huei_Lee1;~Ofir_Nachum1;~Sherry_Yang1;~Lisa_Lee1;~C._Daniel_Freeman1;~Sergio_Guadarrama1;~Ian_Fischer1;~Winnie_Xu1;~Eric_Jang1;~Henryk_Michalewski1;~Igor_Mordatch4", "gender": "M;M;F;M;M;M;F;M;M;M;F", "homepage": "https://kuanghuei.github.io/;https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;https://sherryy.github.io;https://github.com/danielfreeman11/;https://research.google/people/105009/;;https://winniexu.ca;http://evjang.com;https://www.mimuw.edu.pl/~henrykm/;;http://leelisa.com/", "dblp": "66/11466;;;190/7046;89/2073.html;17/5600;285/6560;190/7794;https://dblp.uni-trier.de/pers/hd/m/Michalewski:Henryk;21/17;97/9403", "google_scholar": "rE7-N30AAAAJ;C-ZlBWMAAAAJ;7c1B_fIAAAAJ;t5Xsx0IAAAAJ;gYiCq88AAAAJ;tPnf61gAAAAJ;k4l-zNYAAAAJ;Izhkp4YAAAAJ;YdHW1ycAAAAJ;;eGIw04UAAAAJ", "orcid": ";;;;;;;;;;0000-0002-9791-484X", "linkedin": ";;;daniel-freeman-6952136?trk=hp-identity-name;;iantfischer;https://linkedin.com/in/winnie-xu;;henryk-michalewski-8a230a27/;;lileee", "or_profile": "~Kuang-Huei_Lee1;~Ofir_Nachum1;~Sherry_Yang1;~C._Daniel_Freeman1;~Sergio_Guadarrama1;~Ian_Fischer1;~Winnie_Xu1;~Eric_Jang1;~Henryk_Michalewski1;~Igor_Mordatch1;~Lisa_Seung-Yeon_Lee1", "aff": "Google;OpenAI;University of California, Berkeley;Google Research;Google;Google;University of Toronto;Google;Google DeepMind;OpenAI;", "aff_domain": "google.com;openai.com;berkeley.edu;google.com;google.com;google.com;utoronto.ca;google.com;google.com;openai.com;", "position": "Researcher;Researcher;Student;Software Engineer;Researcher;Researcher;Undergrad student;Researcher;Researcher;Research Scientist;", "bibtex": "@inproceedings{\nlee2022multigame,\ntitle={Multi-Game Decision Transformers},\nauthor={Kuang-Huei Lee and Ofir Nachum and Sherry Yang and Lisa Lee and C. Daniel Freeman and Sergio Guadarrama and Ian Fischer and Winnie Xu and Eric Jang and Henryk Michalewski and Igor Mordatch},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0gouO5saq6K}\n}", "github": "", "project": "", "reviewers": "4PtS;EPhF;wP61;ZHCn", "pdf_size": 1022511, "rating": "6;7;7;8", "confidence": "4;4;3;4", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "3;4;3;4", "contribution": "3;4;3;3", "wc_summary": "65;176;111;45", "wc_strengths_and_weaknesses": "31;212;291;90", "wc_questions": "6;118;43;12", "wc_limitations": "379;13;28;1", "wc_review": "481;519;473;148", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "854;153;357;65", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 50.360574857719804 ], "wc_strengths_and_weaknesses_avg": [ 156.0, 101.66366115776079 ], "wc_questions_avg": [ 44.75, 44.5610536230911 ], "wc_limitations_avg": [ 105.25, 158.33883762362285 ], "wc_review_avg": [ 405.25, 149.53657579334896 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 357.25, 305.7322153453901 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 262, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14749423539350176608&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "google.com;openai.com;berkeley.edu;google.com;google.com;google.com;utoronto.ca;google.com;google.com;openai.com;", "author_num": 11, "aff_unique_index": "0;1;2;0;0;0;3;0;0;1", "aff_unique_norm": "Google;OpenAI;University of California, Berkeley;University of Toronto", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://openai.com;https://www.berkeley.edu;https://www.utoronto.ca", "aff_unique_abbr": "Google;OpenAI;UC Berkeley;U of T", "aff_campus_unique_index": "0;2;0;0;0;0", "aff_campus_unique": "Mountain View;;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;1;0;2;0", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "Efficient Knowledge Distillation from Model Checkpoints", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55373", "id": "0ltDq6SjrfW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/03e0712bf85ebe7cec4f1a7fc53216c9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0ltDq6SjrfW", "openreview": "https://openreview.net/forum?id=0ltDq6SjrfW", "poster": "/media/PosterPDFs/NeurIPS%202022/b534ba68236ba543ae44b22bd110a1d6.png?t=1666261823.2822776", "slides": "https://nips.cc/virtual/2022/poster/55373", "video": "https://nips.cc/virtual/2022/poster/55373", "author_site": "Chaofei Wang, Qisen Yang, Rui Huang, Shiji Song, Gao Huang", "tldr": "This paper explains theoretically and experimentally that appropriate model checkpoints can be more economical and efficient than the fully converged models in knowledge distillation.", "abstract": "Knowledge distillation is an effective approach to learn compact models (students) with the supervision of large and strong models (teachers). As empirically there exists a strong correlation between the performance of teacher and student models, it is commonly believed that a high performing teacher is preferred. Consequently, practitioners tend to use a well trained network or an ensemble of them as the teacher. In this paper, we observe that an intermediate model, i.e., a checkpoint in the middle of the training procedure, often serves as a better teacher compared to the fully converged model, although the former has much lower accuracy. More surprisingly, a weak snapshot ensemble of several intermediate models from a same training trajectory can outperform a strong ensemble of independently trained and fully converged models, when they are used as teachers. We show that this phenomenon can be partially explained by the information bottleneck principle: the feature representations of intermediate models can have higher mutual information regarding the input, and thus contain more ``dark knowledge'' for effective distillation. We further propose an optimal intermediate teacher selection algorithm based on maximizing the total task-related mutual information. Experiments verify its effectiveness and applicability. Our code is available at https://github.com/LeapLabTHU/CheckpointKD.", "keywords": "Deep learning;image classification;intermediate model;knowledge distillation;mutual information.", "primary_area": "", "supplementary_material": "/attachment/0a31ad0095ad20fb914ea112fdaed85118023db7.pdf", "author": "Chaofei Wang;Qisen Yang;Rui Huang;Shiji Song;Gao Huang", "authorids": "~Chaofei_Wang1;~Qisen_Yang1;~Rui_Huang9;~Shiji_Song1;~Gao_Huang1", "gender": "M;F;;M;M", "homepage": ";https://qisen-yang.netlify.app/;;;http://www.gaohuang.net", "dblp": ";;;72/5351;", "google_scholar": "https://scholar.google.com.hk/citations?user=-hwGMHcAAAAJ;VwblgV0AAAAJ;;;-P9LwcgAAAAJ", "orcid": ";0000-0002-2587-2660;;;", "linkedin": ";;;;", "or_profile": "~Chaofei_Wang1;~Qisen_Yang1;~Rui_Huang9;~Shiji_Song1;~Gao_Huang1", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2022efficient,\ntitle={Efficient Knowledge Distillation from Model Checkpoints},\nauthor={Chaofei Wang and Qisen Yang and Rui Huang and Shiji Song and Gao Huang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0ltDq6SjrfW}\n}", "github": "", "project": "", "reviewers": "So7a;6Nks;encu;CyDK", "pdf_size": 1058328, "rating": "6;6;6;7", "confidence": "2;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "4;3;3;2", "contribution": "2;3;3;3", "wc_summary": "213;72;95;132", "wc_strengths_and_weaknesses": "246;386;323;448", "wc_questions": "29;119;43;46", "wc_limitations": "252;11;10;265", "wc_review": "740;588;471;891", "wc_reply_reviewers": "236;47;25;211", "wc_reply_authors": "775;788;644;557", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.0, 53.53970489272424 ], "wc_strengths_and_weaknesses_avg": [ 350.75, 74.90452256039018 ], "wc_questions_avg": [ 59.25, 35.088281519618484 ], "wc_limitations_avg": [ 134.5, 124.08565589946325 ], "wc_review_avg": [ 672.5, 158.14629303274864 ], "wc_reply_reviewers_avg": [ 129.75, 94.48644082618415 ], "wc_reply_authors_avg": [ 691.0, 95.69482744641948 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2353993256352314616&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;tsinghua.edu.cn;;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "0oQv1Ftt_gK", "title": "Rethinking Counterfactual Explanations as Local and Regional Counterfactual Policies", "track": "main", "status": "Reject", "tldr": "", "abstract": "Among the challenges not yet resolved for Counterfactual Explanations (CE), there are stability, synthesis of the various CE and the lack of plausibility/sparsity guarantees. From a more practical point of view, recent studies show that the prescribed counterfactual recourses are often not implemented exactly by the individuals and demonstrate that most state-of-the-art CE algorithms are very likely to fail in this noisy environment. To address these issues, we propose a probabilistic framework that gives a sparse local counterfactual rule for each observation: we provide rules that give a range of values that can change the decision with a given high probability instead of giving diverse CE. In addition, the recourses derived from these rules are robust by construction. These local rules are aggregated into a regional counterfactual rule to ensure the stability of the counterfactual explanations across observations. Our local and regional rules guarantee that the recourses are faithful to the data distribution because our rules use a consistent estimator of the probabilities of changing the decision based on a Random Forest. In addition, these probabilities give interpretable and sparse rules as we select the smallest set of variables having a given probability of changing the decision. Codes for computing our counterfactual rules are available, and we compare their relevancy with standard CE and recent similar attempts.", "keywords": "counterfactuals;algorithmic recourse;learning theory;random forest;interpretability;explainable models;tree-based models", "primary_area": "", "supplementary_material": "/attachment/a01e8ab7499d7130abda8a0c07158d690bbe4bb3.pdf", "author": "Salim I. Amoukou;Nicolas J-B. Brunel", "authorids": "~Salim_I._Amoukou1;~Nicolas_J-B._Brunel1", "gender": "M;M", "homepage": "https://salimamoukou.github.io/;http://www.math-evry.cnrs.fr/members/nbrunel/welcome", "dblp": "289/1335;", "google_scholar": "JrHnICMAAAAJ;https://scholar.google.com/citations?hl=fr", "orcid": ";0000-0002-2840-8484", "linkedin": "slim-amk/;nicolasbrunel/", "or_profile": "~Salim_I._Amoukou1;~Nicolas_J-B._Brunel1", "aff": "PARIS-SACLAY, LaMME;Quantmetry", "aff_domain": "math-evry.cnrs.fr;quantmetry.com", "position": "PhD student;Principal Researcher", "bibtex": "@misc{\namoukou2022rethinking,\ntitle={Rethinking Counterfactual Explanations as Local and Regional Counterfactual Policies},\nauthor={Salim I. Amoukou and Nicolas J-B. Brunel},\nyear={2022},\nurl={https://openreview.net/forum?id=0oQv1Ftt_gK}\n}", "github": "", "project": "", "reviewers": "VVsi;b9vt;CFsf", "site": "https://openreview.net/forum?id=0oQv1Ftt_gK", "pdf_size": 577248, "rating": "3;6;6", "confidence": "5;4;3", "soundness": "1;3;3", "novelty": "1;3;3", "presentation": "2;3;2", "contribution": "1;3;3", "wc_summary": "135;26;32", "wc_strengths_and_weaknesses": "429;106;216", "wc_questions": "16;1;73", "wc_limitations": "43;1;12", "wc_review": "623;134;333", "wc_reply_reviewers": "336;0;0", "wc_reply_authors": "2029;276;350", "reply_reviewers": "2;0;0", "reply_authors": "5;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 64.33333333333333, 50.02888054802835 ], "wc_strengths_and_weaknesses_avg": [ 250.33333333333334, 134.080407053214 ], "wc_questions_avg": [ 30.0, 31.016124838541646 ], "wc_limitations_avg": [ 18.666666666666668, 17.78263822446552 ], "wc_review_avg": [ 363.3333333333333, 200.78235867614353 ], "wc_reply_reviewers_avg": [ 112.0, 158.39191898578665 ], "wc_reply_authors_avg": [ 885.0, 809.4940806866142 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17014552600705932649&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Paris-Saclay University;Quantmetry", "aff_unique_dep": "Laboratoire de Math\u00e9matiques de la Mer;", "aff_unique_url": "https://www.universite-paris-saclay.fr;", "aff_unique_abbr": "Paris-Saclay;", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris-Saclay;", "aff_country_unique_index": "0", "aff_country_unique": "France;" }, { "title": "Inducing Equilibria via Incentives: Simultaneous Design-and-Play Ensures Global Convergence", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53507", "id": "0pdLvHwh-L", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ba5f85ce126aad12075a3ffa68a3e969-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0pdLvHwh-L", "openreview": "https://openreview.net/forum?id=0pdLvHwh-L", "poster": "/media/PosterPDFs/NeurIPS%202022/53507.png?t=1669525000.2615702", "slides": "https://nips.cc/virtual/2022/poster/53507", "video": "https://nips.cc/virtual/2022/poster/53507", "author_site": "Boyi Liu, Jiayang Li, Zhuoran Yang, Hoi-To Wai, Mingyi Hong, Yu Nie, Zhaoran Wang", "tldr": "", "abstract": "To regulate a social system comprised of self-interested agents, economic incentives are often required to induce a desirable outcome. This incentive design problem naturally possesses a bilevel structure, in which a designer modifies the payoffs of the agents with incentives while anticipating the response of the agents, who play a non-cooperative game that converges to an equilibrium. The existing bilevel optimization algorithms raise a dilemma when applied to this problem: anticipating how incentives affect the agents at equilibrium requires solving the equilibrium problem repeatedly, which is computationally inefficient; bypassing the time-consuming step of equilibrium-finding can reduce the computational cost, but may lead the designer to a sub-optimal solution. To address such a dilemma, we propose a method that tackles the designer\u2019s and agents\u2019 problems simultaneously in a single loop. Specifically, at each iteration, both the designer and the agents only move one step. Nevertheless, we allow the designer to gradually learn the overall influence of the incentives on the agents, which guarantees optimality after convergence. The convergence rate of the proposed scheme is also established for a broad class of games.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5b3450c37b0d833fe7ac7047693037ca6c6560b2.pdf", "author": "Boyi Liu;Jiayang Li;Zhuoran Yang;Hoi To Wai;Mingyi Hong;Yu Nie;Zhaoran Wang", "authorids": "~Boyi_Liu1;~Jiayang_Li1;~Zhuoran_Yang1;~Hoi_To_Wai1;~Mingyi_Hong1;~Yu_Nie1;~Zhaoran_Wang1", "gender": "M;M;M;M;M;M;Not Specified", "homepage": ";;https://zhuoranyang.github.io/;http://www1.se.cuhk.edu.hk/~htwai/;http://people.ece.umn.edu/~mhong/mingyi.html;https://sites.northwestern.edu/marconie/;https://zhaoranwang.github.io/", "dblp": ";;;29/9875;57/8053;;117/2756", "google_scholar": "1G8RH_YAAAAJ;;;https://scholar.google.com.hk/citations?user=5-J7LeMAAAAJ;qRnP-p0AAAAJ;WNXai7cAAAAJ;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ", "orcid": ";0000-0001-9245-0209;;;;;", "linkedin": ";;;;;;", "or_profile": "~Boyi_Liu1;~Jiayang_Li1;~Zhuoran_Yang1;~Hoi_To_Wai1;~Mingyi_Hong1;~Yu_Nie1;~Zhaoran_Wang1", "aff": "Northwestern University, Northwestern University;Northwestern University;University of California, Berkeley;The Chinese University of Hong Kong;University of Minnesota, Minneapolis;Northwestern University;", "aff_domain": "u.northwestern.edu;northwestern.edu;berkeley.edu;cuhk.edu.hk;umn.edu;northwestern.edu;", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nliu2022inducing,\ntitle={Inducing Equilibria via Incentives: Simultaneous Design-and-Play Ensures Global Convergence},\nauthor={Boyi Liu and Jiayang Li and Zhuoran Yang and Hoi To Wai and Mingyi Hong and Yu Nie and Zhaoran Wang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0pdLvHwh-L}\n}", "github": "", "project": "", "reviewers": "ZpdR;7JdK;1gpF", "pdf_size": 773498, "rating": "5;6;7", "confidence": "3;3;2", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "2;3;3", "contribution": "3;3;3", "wc_summary": "77;97;135", "wc_strengths_and_weaknesses": "313;114;98", "wc_questions": "127;93;18", "wc_limitations": "13;23;7", "wc_review": "530;327;258", "wc_reply_reviewers": "360;0;0", "wc_reply_authors": "1134;408;287", "reply_reviewers": "2;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.0, 24.055491403558285 ], "wc_strengths_and_weaknesses_avg": [ 175.0, 97.79911383374936 ], "wc_questions_avg": [ 79.33333333333333, 45.536310297997964 ], "wc_limitations_avg": [ 14.333333333333334, 6.599663291074444 ], "wc_review_avg": [ 371.6666666666667, 115.44791995623923 ], "wc_reply_reviewers_avg": [ 120.0, 169.7056274847714 ], "wc_reply_authors_avg": [ 609.6666666666666, 374.0359459849934 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5104116692875336546&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "email": "u.northwestern.edu;northwestern.edu;berkeley.edu;cuhk.edu.hk;umn.edu;northwestern.edu;", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Northwestern University;University of California, Berkeley;Chinese University of Hong Kong;University of Minnesota", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.northwestern.edu;https://www.berkeley.edu;https://www.cuhk.edu.hk;https://www.minnesota.edu", "aff_unique_abbr": "NU;UC Berkeley;CUHK;UMN", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Berkeley;Hong Kong SAR;Minneapolis", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Meta-Complementing the Semantics of Short Texts in Neural Topic Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53521", "id": "0qaIM4W9Q1s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/bda5c35eded86adaf0231748e3ce071c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0qaIM4W9Q1s", "openreview": "https://openreview.net/forum?id=0qaIM4W9Q1s", "poster": "/media/PosterPDFs/NeurIPS%202022/53521.png?t=1669183590.2909548", "slides": "https://nips.cc/virtual/2022/poster/53521", "video": "https://nips.cc/virtual/2022/poster/53521", "author_site": "Delvin Ce Zhang, Hady Lauw", "tldr": "We propose a Meta-Complement Topic Model, which improves topic quality of short texts by transferring the semantic knowledge learned on long documents to complement semantically limited short texts with a meta-learning objective.", "abstract": "Topic models infer latent topic distributions based on observed word co-occurrences in a text corpus. While typically a corpus contains documents of variable lengths, most previous topic models treat documents of different lengths uniformly, assuming that each document is sufficiently informative. However, shorter documents may have only a few word co-occurrences, resulting in inferior topic quality. Some other previous works assume that all documents are short, and leverage external auxiliary data, e.g., pretrained word embeddings and document connectivity. Orthogonal to existing works, we remedy this problem within the corpus itself by proposing a Meta-Complement Topic Model, which improves topic quality of short texts by transferring the semantic knowledge learned on long documents to complement semantically limited short texts. As a self-contained module, our framework is agnostic to auxiliary data and can be further improved by flexibly integrating them into our framework. Specifically, when incorporating document connectivity, we further extend our framework to complement documents with limited edges. Experiments demonstrate the advantage of our framework.\n", "keywords": "neural topic model;short text;graph neural networks;semantic complement", "primary_area": "", "supplementary_material": "/attachment/17582e7e9453c9340aa7237b7d5a814e8b65d8f2.pdf", "author": "Delvin Ce Zhang;Hady W. Lauw", "authorids": "~Delvin_Ce_Zhang1;~Hady_W._Lauw1", "gender": "M;M", "homepage": "http://delvincezhang.com;http://www.hadylauw.com", "dblp": "97/919-4;00/2494", "google_scholar": "0PcgNCsAAAAJ;HTC1z2gAAAAJ", "orcid": "0000-0001-5571-9766;0000-0002-8245-8677", "linkedin": ";hadylauw", "or_profile": "~Ce_Zhang3;~Hady_W_Lauw1", "aff": "Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2022metacomplementing,\ntitle={Meta-Complementing the Semantics of Short Texts in Neural Topic Models},\nauthor={Delvin Ce Zhang and Hady W. Lauw},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0qaIM4W9Q1s}\n}", "github": "", "project": "", "reviewers": "yv6N;kUTk;zq8q;hYpp", "pdf_size": 560281, "rating": "4;6;7;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "contribution": "2;3;3;4", "wc_summary": "58;130;166;110", "wc_strengths_and_weaknesses": "155;38;147;237", "wc_questions": "83;116;98;22", "wc_limitations": "6;42;12;23", "wc_review": "302;326;423;392", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "722;746;785;362", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.0, 39.03844259188627 ], "wc_strengths_and_weaknesses_avg": [ 144.25, 70.73674787548548 ], "wc_questions_avg": [ 79.75, 35.329697139941636 ], "wc_limitations_avg": [ 20.75, 13.699908758820257 ], "wc_review_avg": [ 360.75, 48.761537096363156 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 653.75, 169.93583347840442 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4492930204771680748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "smu.edu.sg;smu.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Learning from Future: A Novel Self-Training Framework for Semantic Segmentation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55071", "id": "0tG59j2efs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1e97fb8a7c9737e9e9f4e0389b25efe8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0tG59j2efs", "openreview": "https://openreview.net/forum?id=0tG59j2efs", "poster": "/media/PosterPDFs/NeurIPS%202022/55071.png?t=1669085200.9119449", "slides": "https://nips.cc/virtual/2022/poster/55071", "video": "https://nips.cc/virtual/2022/poster/55071", "author_site": "Ye Du, Yujun Shen, Haochen Wang, Jingjing Fei, Wei Li, Liwei Wu, Rui Zhao, Zehua Fu, Qingjie LIU", "tldr": "We propose a novel self-training framework, which helps the student to learn from the future, and achieve state-of-the-art performance on the task of unsupervised domain adaptive semantic segmentation.", "abstract": "Self-training has shown great potential in semi-supervised learning. Its core idea is to use the model learned on labeled data to generate pseudo-labels for unlabeled samples, and in turn teach itself. To obtain valid supervision, active attempts typically employ a momentum teacher for pseudo-label prediction yet observe the confirmation bias issue, where the incorrect predictions may provide wrong supervision signals and get accumulated in the training process. The primary cause of such a drawback is that the prevailing self-training framework acts as guiding the current state with previous knowledge because the teacher is updated with the past student only. To alleviate this problem, we propose a novel self-training strategy, which allows the model to learn from the future. Concretely, at each training step, we first virtually optimize the student (i.e., caching the gradients without applying them to the model weights), then update the teacher with the virtual future student, and finally ask the teacher to produce pseudo-labels for the current student as the guidance. In this way, we manage to improve the quality of pseudo-labels and thus boost the performance. We also develop two variants of our future-self-training (FST) framework through peeping at the future both deeply (FST-D) and widely (FST-W). Taking the tasks of unsupervised domain adaptive semantic segmentation and semi-supervised semantic segmentation as the instances, we experimentally demonstrate the effectiveness and superiority of our approach under a wide range of settings. Code is available at https://github.com/usr922/FST.", "keywords": "unsupervised domain adaptive semantic segmentation;self-training", "primary_area": "", "supplementary_material": "/attachment/befb5a92afbe25df94fdf889d33384ad7f6f0dcd.pdf", "author": "Ye Du;Yujun Shen;Haochen Wang;Jingjing Fei;Wei Li;Liwei Wu;Rui Zhao;Zehua Fu;Qingjie LIU", "authorids": "~Ye_Du4;~Yujun_Shen1;~Haochen_Wang5;~Jingjing_Fei1;~Wei_Li24;~Liwei_Wu5;~Rui_Zhao6;~Zehua_Fu1;~Qingjie_LIU1", "gender": "M;;M;;M;M;M;F;M", "homepage": "http://google.com;;https://haochen-wang409.github.io/;;https://bigballon.github.io/;;http://zhaorui.xyz/;;https://shi.buaa.edu.cn/liuqingjie/zh_CN/index.htm", "dblp": ";;;;;;26/2578-1;137/6488;72/10584", "google_scholar": ";;oNlpTdcAAAAJ;;CPd0kEMAAAAJ;dg1JyaUAAAAJ;1c9oQNMAAAAJ;Ug8JzsAAAAAJ;HsLdRZYAAAAJ", "orcid": ";;0000-0002-2333-1844;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Ye_Du4;~Yujun_Shen1;~Haochen_Wang5;~Jingjing_Fei1;~Wei_Li24;~Liwei_Wu5;~Rui_Zhao6;~Zehua_Fu1;~Qingjie_LIU1", "aff": ";;Shanghai Jiaotong University;;SenseTime Research;SenseTime;SenseTime Research;Hangzhou Innovation Institute, Beihang University;Beihang University", "aff_domain": ";;sjtu.edu.cn;;sensetime.com;sensetime.com;sensetime.com;buaa.edu.cn;buaa.edu.cn", "position": ";;Undergrad student;;Researcher;Researcher;Researcher;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ndu2022learning,\ntitle={Learning from Future: A Novel Self-Training Framework for Semantic Segmentation},\nauthor={Ye Du and Yujun Shen and Haochen Wang and Jingjing Fei and Wei Li and Liwei Wu and Rui Zhao and Zehua Fu and Qingjie LIU},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0tG59j2efs}\n}", "github": "", "project": "", "reviewers": "9B6x;7vtH;Sb76", "pdf_size": 3256520, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "contribution": "3;3;3", "wc_summary": "142;65;232", "wc_strengths_and_weaknesses": "290;86;360", "wc_questions": "29;259;63", "wc_limitations": "11;11;65", "wc_review": "472;421;720", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "287;283;573", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 146.33333333333334, 68.24628602023377 ], "wc_strengths_and_weaknesses_avg": [ 245.33333333333334, 116.23348150262996 ], "wc_questions_avg": [ 117.0, 101.3640304381523 ], "wc_limitations_avg": [ 29.0, 25.45584412271571 ], "wc_review_avg": [ 537.6666666666666, 130.59947251892797 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 381.0, 135.77432256014635 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6027127191801048854&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": ";;sjtu.edu.cn;;sensetime.com;sensetime.com;sensetime.com;buaa.edu.cn;buaa.edu.cn", "author_num": 9, "aff_unique_index": "0;1;1;1;2;2", "aff_unique_norm": "Shanghai Jiao Tong University;SenseTime;Beihang University", "aff_unique_dep": ";SenseTime Research;Hangzhou Innovation Institute", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.sensetime.com;http://www.buaa.edu.cn", "aff_unique_abbr": "SJTU;SenseTime;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hangzhou", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Luckiness in Multiscale Online Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53247", "id": "0tpZgkAKVjB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a0d2345b43e66fa946155c98899dc03b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0tpZgkAKVjB", "openreview": "https://openreview.net/forum?id=0tpZgkAKVjB", "poster": "/media/PosterPDFs/NeurIPS%202022/53247.png?t=1669384418.737173", "slides": "https://nips.cc/virtual/2022/poster/53247", "video": "https://nips.cc/virtual/2022/poster/53247", "author_site": "Wouter Koolen, Muriel F. P\u00e9rez-Ortiz", "tldr": "For the multiscale experts problem, it is possible to achieve both constant regret under margin conditions and worst-case safety.", "abstract": "Algorithms for full-information online learning are classically tuned to minimize their worst-case regret. Modern algorithms additionally provide tighter guarantees outside the adversarial regime, most notably in the form of constant pseudoregret bounds under statistical margin assumptions. We investigate the multiscale extension of the problem where the loss ranges of the experts are vastly different. Here, the regret with respect to each expert needs to scale with its range, instead of the maximum overall range. We develop new multiscale algorithms, tuning schemes and analysis techniques to show that worst-case robustness and adaptation to easy data can be combined at a negligible cost. We further develop an extension with optimism and apply it to solve multiscale two-player zero-sum games. We demonstrate experimentally the superior performance of our scale-adaptive algorithm and discuss the subtle relationship of our results to Freund's 2016 open problem.\n", "keywords": "Online Learning;Multiscale Experts Problem;Second-order Regret Bounds;Stochastic Luckiness;FTRL", "primary_area": "", "supplementary_material": "/attachment/b80f029e07332a84628f68a84bcc649bc1b8b25b.zip", "author": "Wouter M Koolen;Muriel Felipe P\u00e9rez", "authorids": "~Wouter_M_Koolen1;~Muriel_Felipe_P\u00e9rez1", "gender": "M;M", "homepage": "http://wouterkoolen.info/;", "dblp": "08/2694;", "google_scholar": "34JTfUcAAAAJ;", "orcid": "0000-0002-1053-6701;", "linkedin": ";muriel-p%C3%A9rez-6ba828119/", "or_profile": "~Wouter_M_Koolen1;~Muriel_Felipe_P\u00e9rez1", "aff": "Centrum voor Wiskunde en Informatica;Centrum voor Wiskunde en Informatica", "aff_domain": "cwi.nl;cwi.nl", "position": "Senior Researcher;PhD student", "bibtex": "@inproceedings{\nkoolen2022luckiness,\ntitle={Luckiness in Multiscale Online Learning},\nauthor={Wouter M Koolen and Muriel Felipe P{\\'e}rez},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0tpZgkAKVjB}\n}", "github": "", "project": "", "reviewers": "iHr2;R47f;jvH3;eZu6", "pdf_size": 795315, "rating": "5;6;7;8", "confidence": "3;2;3;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;2;4;4", "contribution": "2;3;3;3", "wc_summary": "83;117;100;103", "wc_strengths_and_weaknesses": "105;131;114;86", "wc_questions": "50;11;38;188", "wc_limitations": "32;10;30;10", "wc_review": "270;269;282;387", "wc_reply_reviewers": "97;0;0;114", "wc_reply_authors": "307;106;183;668", "reply_reviewers": "1;0;0;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.75, 12.090802289343747 ], "wc_strengths_and_weaknesses_avg": [ 109.0, 16.232683080747925 ], "wc_questions_avg": [ 71.75, 68.58707968706642 ], "wc_limitations_avg": [ 20.5, 10.523782589924593 ], "wc_review_avg": [ 302.0, 49.34065261019558 ], "wc_reply_reviewers_avg": [ 52.75, 53.09131284871377 ], "wc_reply_authors_avg": [ 316.0, 215.5075404713255 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3279686715485985083&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "email": "cwi.nl;cwi.nl", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Centrum voor Wiskunde en Informatica", "aff_unique_dep": "", "aff_unique_url": "https://www.cwi.nl/", "aff_unique_abbr": "CWI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "Stochastic Window Transformer for Image Restoration", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55280", "id": "0ucMtEKCihU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3ca6d336ddaa316a6ae953a20b9477cf-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0ucMtEKCihU", "openreview": "https://openreview.net/forum?id=0ucMtEKCihU", "poster": "/media/PosterPDFs/NeurIPS%202022/df6d2338b2b8fce1ec2f6dda0a630eb0.png?t=1665455828.8067994", "slides": "https://nips.cc/virtual/2022/poster/55280", "video": "https://nips.cc/virtual/2022/poster/55280", "author_site": "Jie Xiao, Xueyang Fu, Feng Wu, Zheng-Jun Zha", "tldr": "We propose a novel stochastic window strategy to make transformer more sophisticated for image restoration tasks.", "abstract": "Thanks to the powerful representation capabilities, transformers have made impressive progress in image restoration. However, existing transformers-based methods do not carefully consider the particularities of image restoration. In general, image restoration requires that an ideal approach should be translation-invariant to the degradation, i.e., the undesirable degradation should be removed irrespective of its position within the image. Furthermore, the local relationships also play a vital role, which should be faithfully exploited for recovering clean images. Nevertheless, most transformers either adopt local attention with the fixed local window strategy or global attention, which unfortunately breaks the translation invariance and causes huge loss of local relationships. To address these issues, we propose an elegant stochastic window strategy for transformers. Specifically, we first introduce the window partition with stochastic shift to replace the original fixed window partition for training. Then, we design a new layer expectation propagation algorithm to efficiently approximate the expectation of the induced stochastic transformer for testing. Our stochastic window transformer not only enjoys powerful representation but also maintains the desired property of translation invariance and locality. Experiments validate the stochastic window strategy consistently improves performance on various image restoration tasks (deraining, denoising and deblurring) by significant margins. The code is available at https://github.com/jiexiaou/Stoformer.", "keywords": "image restoration;transformer;stochastic window strategy;translation invariance;locality", "primary_area": "", "supplementary_material": "/attachment/ed6655fbdf708ccfc4fb537546835cf2aeb58502.pdf", "author": "Jie Xiao;Xueyang Fu;Feng Wu;Zheng-Jun Zha", "authorids": "~Jie_Xiao3;~Xueyang_Fu1;~Feng_Wu1;~Zheng-Jun_Zha2", "gender": "M;M;M;M", "homepage": "https://jiexiaou.github.io/;;;https://xueyangfu.github.io/", "dblp": "15/3437-2;25/3972-1;23/1818;136/9389", "google_scholar": "https://scholar.google.com/citations?hl=en;5bInRDEAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-5677-270X;;;0000-0001-8036-4071", "linkedin": ";;;", "or_profile": "~Jie_Xiao3;~Feng_Wu1;~Zheng-Jun_Zha2;~Xueyang_Fu2", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nxiao2022stochastic,\ntitle={Stochastic Window Transformer for Image Restoration},\nauthor={Jie Xiao and Xueyang Fu and Feng Wu and Zheng-Jun Zha},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0ucMtEKCihU}\n}", "github": "", "project": "", "reviewers": "KzwD;S7Qf;86x1;jX7b", "pdf_size": 20735420, "rating": "3;3;6;8", "confidence": "4;4;3;5", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "contribution": "2;2;3;4", "wc_summary": "77;30;30;69", "wc_strengths_and_weaknesses": "377;181;37;223", "wc_questions": "25;5;11;20", "wc_limitations": "6;5;6;100", "wc_review": "485;221;84;412", "wc_reply_reviewers": "584;97;0;118", "wc_reply_authors": "1803;712;552;217", "reply_reviewers": "1;1;0;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 51.5, 21.68524844220144 ], "wc_strengths_and_weaknesses_avg": [ 204.5, 121.14763720353773 ], "wc_questions_avg": [ 15.25, 7.75806032459145 ], "wc_limitations_avg": [ 29.25, 40.84957160118084 ], "wc_review_avg": [ 300.5, 157.84882007794673 ], "wc_reply_reviewers_avg": [ 199.75, 226.26795508865146 ], "wc_reply_authors_avg": [ 821.0, 594.4287173412805 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18265900418353990435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Functional Ensemble Distillation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54398", "id": "0um6VfuBfr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0b7f639ef28a9035a71f7e0c04c1d681-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0um6VfuBfr", "openreview": "https://openreview.net/forum?id=0um6VfuBfr", "poster": "/media/PosterPDFs/NeurIPS%202022/54398.png?t=1669452253.8307168", "slides": "https://nips.cc/virtual/2022/poster/54398", "video": "https://nips.cc/virtual/2022/poster/54398", "author_site": "Coby Penso, Idan Achituve, Ethan Fetaya", "tldr": "An efficient and effective approach for ensemble distillation that captures both the ensemble accuracy and diversity", "abstract": "Bayesian models have many desirable properties, most notable is their ability to generalize from limited data and to properly estimate the uncertainty in their predictions. However, these benefits come at a steep computational cost as Bayesian inference, in most cases, is computationally intractable. One popular approach to alleviate this problem is using a Monte-Carlo estimation with an ensemble of models sampled from the posterior. However, this approach still comes at a significant computational cost, as one needs to store and run multiple models at test time. In this work, we investigate how to best distill an ensemble's predictions using an efficient model. First, we argue that current approaches are limited as they are constrained to classification and the Dirichlet distribution. Second, in many limited data settings, all ensemble members achieve nearly zero training loss, namely, they produce near-identical predictions on the training set which results in sub-optimal distilled models. To address both problems, we propose a novel and general distillation approach, named Functional Ensemble Distillation (FED), and we investigate how to best distill an ensemble in this setting. We find that learning the distilled model via a simple augmentation scheme in the form of mixup augmentation significantly boosts the performance. We evaluated our method on several tasks and showed that it achieves superior results in both accuracy and uncertainty estimation compared to current approaches.", "keywords": "Bayesian inference;distillation", "primary_area": "", "supplementary_material": "/attachment/de17c30c42698766e154502eff69d1218f58d903.pdf", "author": "Coby Penso;Idan Achituve;Ethan Fetaya", "authorids": "~Coby_Penso1;~Idan_Achituve1;~Ethan_Fetaya1", "gender": "M;;M", "homepage": ";https://idanachituve.github.io/;http://www.cs.toronto.edu/~ethanf/", "dblp": ";254/8524;01/10046", "google_scholar": ";UQIBiUcAAAAJ;zLuqh-0AAAAJ", "orcid": ";;0000-0003-3125-1665", "linkedin": "coby-penso-0190a81a7;idanachituve;", "or_profile": "~Coby_Penso1;~Idan_Achituve1;~Ethan_Fetaya1", "aff": "Bar Ilan University;Bar Ilan University;Bar Ilan University", "aff_domain": "biu.ac.il;biu.ac.il;biu.ac.il", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npenso2022functional,\ntitle={Functional Ensemble Distillation},\nauthor={Coby Penso and Idan Achituve and Ethan Fetaya},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0um6VfuBfr}\n}", "github": "", "project": "", "reviewers": "i7D4;a1xo;m5GE;rtLD", "pdf_size": 833812, "rating": "6;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;2;3", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "contribution": "3;3;3;4", "wc_summary": "180;161;106;44", "wc_strengths_and_weaknesses": "940;170;83;641", "wc_questions": "259;99;318;54", "wc_limitations": "7;1;209;13", "wc_review": "1386;431;716;752", "wc_reply_reviewers": "101;0;71;38", "wc_reply_authors": "421;302;551;556", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 122.75, 52.96874078171011 ], "wc_strengths_and_weaknesses_avg": [ 458.5, 349.77885870932795 ], "wc_questions_avg": [ 182.5, 109.19821427111343 ], "wc_limitations_avg": [ 57.5, 87.57139944068497 ], "wc_review_avg": [ 821.25, 348.9665994045848 ], "wc_reply_reviewers_avg": [ 52.5, 37.61980861195336 ], "wc_reply_authors_avg": [ 457.5, 104.82962367575303 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7557864995422109600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "biu.ac.il;biu.ac.il;biu.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Bar-Ilan University", "aff_unique_dep": "", "aff_unique_url": "https://www.biu.ac.il", "aff_unique_abbr": "BIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Learning to Share in Networked Multi-Agent Reinforcement Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54339", "id": "0vJH6C_h4-", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/61d8577984e4ef0cba20966eb3ef2ed8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0vJH6C_h4-", "openreview": "https://openreview.net/forum?id=0vJH6C_h4-", "poster": "/media/PosterPDFs/NeurIPS%202022/26b58a41da329e0cbde0cbf956640a58.png?t=1666531865.276929", "slides": "https://nips.cc/virtual/2022/poster/54339", "video": "https://nips.cc/virtual/2022/poster/54339", "author_site": "Yuxuan Yi, Ge Li, Yaowei Wang, Zongqing Lu", "tldr": "We propose a hierarchically decentralized learning framework for networked MARL that enables agents to learn to dynamically share reward with neighbors so as to collaboratively optimize the global objective.", "abstract": "In this paper, we study the problem of networked multi-agent reinforcement learning (MARL), where a number of agents are deployed as a partially connected network and each interacts only with nearby agents. Networked MARL requires all agents to make decisions in a decentralized manner to optimize a global objective with restricted communication between neighbors over the network. Inspired by the fact that sharing plays a key role in human's learning of cooperation, we propose LToS, a hierarchically decentralized MARL framework that enables agents to learn to dynamically share reward with neighbors so as to encourage agents to cooperate on the global objective through collectives. For each agent, the high-level policy learns how to share reward with neighbors to decompose the global objective, while the low-level policy learns to optimize the local objective induced by the high-level policies in the neighborhood. The two policies form a bi-level optimization and learn alternately. We empirically demonstrate that LToS outperforms existing methods in both social dilemma and networked MARL scenarios across scales.", "keywords": "Cooperative Multi-Agent Reinforcement Learning;Networked Multi-Agent Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/c8958ebbd1301e4af571db144d098c3ca61d193c.pdf", "author": "Yuxuan Yi;Ge Li;Yaowei Wang;Zongqing Lu", "authorids": "~Yuxuan_Yi1;~Ge_Li2;~Yaowei_Wang1;~Zongqing_Lu2", "gender": "M;M;M;", "homepage": "https://github.com/KamijouToumaKun/;https://dblp.org/pid/24/712-2.html;https://dblp.org/pid/68/2992.html;", "dblp": ";24/712-2.html;68/2992-1;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;0000-0002-6110-4036;", "linkedin": ";;yaowei-wang-971ab310/;", "or_profile": "~Yuxuan_Yi1;~Ge_Li2;~Yaowei_Wang1;~Zongqing_Lu2", "aff": ";Peking University Shenzhen Graduate School;Pengcheng Laboratory;", "aff_domain": ";pku.edu.cn;pcl.ac.cn;", "position": ";Full Professor;Full Professor;", "bibtex": "@inproceedings{\nyi2022learning,\ntitle={Learning to Share in Multi-Agent Reinforcement Learning},\nauthor={Yuxuan Yi and Ge Li and Yaowei Wang and Zongqing Lu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0vJH6C_h4-}\n}", "github": "", "project": "", "reviewers": "98u6;cVVf;gcHc", "pdf_size": 2121048, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "contribution": "2;3;3", "wc_summary": "124;110;120", "wc_strengths_and_weaknesses": "107;158;341", "wc_questions": "71;376;138", "wc_limitations": "163;67;29", "wc_review": "465;711;628", "wc_reply_reviewers": "126;13;60", "wc_reply_authors": "850;578;555", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 118.0, 5.887840577551898 ], "wc_strengths_and_weaknesses_avg": [ 202.0, 100.46890066085126 ], "wc_questions_avg": [ 195.0, 130.87653214639616 ], "wc_limitations_avg": [ 86.33333333333333, 56.38754787677467 ], "wc_review_avg": [ 601.3333333333334, 102.18393002598577 ], "wc_reply_reviewers_avg": [ 66.33333333333333, 46.34891824220089 ], "wc_reply_authors_avg": [ 661.0, 133.97263402152944 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3236631293136958948&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";pku.edu.cn;pcl.ac.cn;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Peking University;Pengcheng Laboratory", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;", "aff_unique_abbr": "PKU;", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "VAEL: Bridging Variational Autoencoders and Probabilistic Logic Programming", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54076", "id": "0xbP4W7rdJW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1e38b2a0b77541b14a3315c99697b835-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0xbP4W7rdJW", "openreview": "https://openreview.net/forum?id=0xbP4W7rdJW", "poster": "/media/PosterPDFs/NeurIPS%202022/4d7a968bb636e25818ff2a3941db08c1.png?t=1667467262.582353", "slides": "https://nips.cc/virtual/2022/poster/54076", "video": "https://nips.cc/virtual/2022/poster/54076", "author_site": "Eleonora Misino, Giuseppe Marra, Emanuele Sansone", "tldr": "VAEL is a neuro-symbolic generative model integrating variational autoencoders (VAE) with the reasoning capabilities of probabilistic logic (L) programming.", "abstract": "We present VAEL, a neuro-symbolic generative model integrating variational autoencoders (VAE) with the reasoning capabilities of probabilistic logic (L) programming. Besides standard latent subsymbolic variables, our model exploits a probabilistic logic program to define a further structured representation, which is used for logical reasoning. The entire process is end-to-end differentiable. Once trained, VAEL can solve new unseen generation tasks by (i) leveraging the previously acquired knowledge encoded in the neural component and (ii) exploiting new logical programs on the structured latent space. Our experiments provide support on the benefits of this neuro-symbolic integration both in terms of task generalization and data efficiency. To the best of our knowledge, this work is the first to propose a general-purpose end-to-end framework integrating probabilistic logic programming into a deep generative model.", "keywords": "neuro-symbolic;variational autoencoders;probabilistic logic programming", "primary_area": "", "supplementary_material": "/attachment/ae3aaa6b9b8d4ed1c5ab5db9e9148de2779fe620.zip", "author": "Eleonora Misino;Giuseppe Marra;Emanuele Sansone", "authorids": "~Eleonora_Misino1;~Giuseppe_Marra1;~Emanuele_Sansone1", "gender": "F;M;", "homepage": ";https://www.giuseppemarra.com;", "dblp": ";150/7452;", "google_scholar": "rInfApkAAAAJ;https://scholar.google.it/citations?user=BBcsk7MAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Eleonora_Misino1;~Giuseppe_Marra1;~Emanuele_Sansone1", "aff": "University of Bologna;KU Leuven;", "aff_domain": "unibo.it;kuleuven.be;", "position": "PhD student;Postdoc;", "bibtex": "@inproceedings{\nmisino2022vael,\ntitle={{VAEL}: Bridging Variational Autoencoders and Probabilistic Logic Programming},\nauthor={Eleonora Misino and Giuseppe Marra and Emanuele Sansone},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0xbP4W7rdJW}\n}", "github": "", "project": "", "reviewers": "aUPz;icZs;2dcW;t873", "pdf_size": 5803463, "rating": "4;5;6;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;4;3", "contribution": "2;2;3;2", "wc_summary": "80;61;26;83", "wc_strengths_and_weaknesses": "270;154;348;197", "wc_questions": "109;1;15;113", "wc_limitations": "3;1;40;63", "wc_review": "462;217;429;456", "wc_reply_reviewers": "126;22;204;58", "wc_reply_authors": "1058;484;570;507", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.5, 22.699118925632334 ], "wc_strengths_and_weaknesses_avg": [ 242.25, 73.80506418939015 ], "wc_questions_avg": [ 59.5, 51.75664208582315 ], "wc_limitations_avg": [ 26.75, 26.06122598804592 ], "wc_review_avg": [ 391.0, 101.22499691281793 ], "wc_reply_reviewers_avg": [ 102.5, 69.48920779516773 ], "wc_reply_authors_avg": [ 654.75, 234.93549646658334 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10135207146367765358&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "email": "unibo.it;kuleuven.be;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Bologna;Katholieke Universiteit Leuven", "aff_unique_dep": ";", "aff_unique_url": "https://www.unibo.it;https://www.kuleuven.be", "aff_unique_abbr": "Unibo;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Italy;Belgium" }, { "title": "ComGAN: Unsupervised Disentanglement and \ufeffSegmentation via Image Composition", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54645", "id": "0xbhGxgzd1t", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1df282080150537df7b00c20aadcafad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0xbhGxgzd1t", "openreview": "https://openreview.net/forum?id=0xbhGxgzd1t", "poster": "/media/PosterPDFs/NeurIPS%202022/71a8b2ffe0b594a5c1b3c28090384fd7.png?t=1666066022.7663674", "slides": "https://nips.cc/virtual/2022/poster/54645", "video": "https://nips.cc/virtual/2022/poster/54645", "author_site": "Rui Ding, Kehua Guo, Xiangyuan Zhu, Zheng Wu, Liwei Wang", "tldr": "ComGAN is a flexible unsupervised model that generates realistic images and high-semantic masks, and effectively avoids trivial solutions.", "abstract": "We propose ComGAN, a simple unsupervised generative model, which simultaneously generates realistic images and high semantic masks under an adversarial loss and a binary regularization. In this paper, we first investigate two kinds of trivial solutions in the compositional generation process, and demonstrate their source is vanishing gradients on the mask. Then, we solve trivial solutions from the perspective of architecture. Furthermore, we redesign two fully unsupervised modules based on ComGAN (DS-ComGAN), where the disentanglement module associates the foreground, background and mask with three independent variables, and the segmentation module learns object segmentation. Experimental results show that (i) ComGAN's network architecture effectively avoids trivial solutions without any supervised information and regularization; (ii) DS-ComGAN achieves remarkable results and outperforms existing semi-supervised and weakly supervised methods by a large margin in both the image disentanglement and unsupervised segmentation tasks. It implies that the redesign of ComGAN is a possible direction for future unsupervised work.", "keywords": "Generative Adversarial Networks;Trivial solutions;Image Disentanglement;Unsupervised Segmentation", "primary_area": "", "supplementary_material": "/attachment/5c45fd6c9dea5b522db867673671a4c5f6c39aa2.pdf", "author": "Rui Ding;Kehua Guo;Xiangyuan Zhu;Zheng Wu;Liwei Wang", "authorids": "~Rui_Ding6;~Kehua_Guo1;~Xiangyuan_Zhu1;wuzhengtea@gmail.com;wlw115611@csu.edu.cn", "gender": "M;M;M;;", "homepage": "https://github.com/Ruiding1;https://faculty.csu.edu.cn/guokehua;;;", "dblp": "55/5564;02/8779;;;", "google_scholar": ";;;;", "orcid": ";;0000-0002-1349-3399;;", "linkedin": ";;;;", "or_profile": "~Rui_Ding6;~Kehua_Guo1;~Xiangyuan_Zhu1;wuzhengtea@gmail.com;wlw115611@csu.edu.cn", "aff": "Central South University;Central South University, China;Central South University, China;;", "aff_domain": "csu.edu.cn;csu.edu.cn;csu.edu.cn;;", "position": "PhD student;Full Professor;PhD student;;", "bibtex": "@inproceedings{\nding2022comgan,\ntitle={Com{GAN}: Unsupervised Disentanglement and \ufeffSegmentation via Image Composition},\nauthor={Rui Ding and Kehua Guo and Xiangyuan Zhu and Zheng Wu and Liwei Wang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0xbhGxgzd1t}\n}", "github": "", "project": "", "reviewers": "rWMN;19KW;Tkuw", "pdf_size": 2372354, "rating": "6;6;6", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;2", "contribution": "3;3;3", "wc_summary": "59;84;60", "wc_strengths_and_weaknesses": "254;113;406", "wc_questions": "18;33;90", "wc_limitations": "11;40;10", "wc_review": "342;270;566", "wc_reply_reviewers": "23;57;73", "wc_reply_authors": "835;1376;1533", "reply_reviewers": "1;1;1", "reply_authors": "3;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.66666666666667, 11.55662388223981 ], "wc_strengths_and_weaknesses_avg": [ 257.6666666666667, 119.6448448069907 ], "wc_questions_avg": [ 47.0, 31.016124838541646 ], "wc_limitations_avg": [ 20.333333333333332, 13.912424503139471 ], "wc_review_avg": [ 392.6666666666667, 126.04055784636238 ], "wc_reply_reviewers_avg": [ 51.0, 20.848661028149188 ], "wc_reply_authors_avg": [ 1248.0, 298.98606433522394 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15639267066341879993&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "csu.edu.cn;csu.edu.cn;csu.edu.cn;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Central South University", "aff_unique_dep": "", "aff_unique_url": "https://www.csu.edu.cn", "aff_unique_abbr": "CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Effective Dimension in Bandit Problems under Censorship", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53245", "id": "0xdH-09oGD7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/228ffa71ce31ebbdebc6cf413a39cdce-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0xdH-09oGD7", "openreview": "https://openreview.net/forum?id=0xdH-09oGD7", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53245", "video": "https://nips.cc/virtual/2022/poster/53245", "author_site": "Gauthier Guinet, Saurabh Amin, Patrick Jaillet", "tldr": "We demonstrate that the complexity of bandit learning under a broad class of censorship models is governed by the notion of effective dimension. ", "abstract": "In this paper, we study both multi-armed and contextual bandit problems in censored environments. Our goal is to estimate the performance loss due to censorship in the context of classical algorithms designed for uncensored environments. Our main contributions include the introduction of a broad class of censorship models and their analysis in terms of the effective dimension of the problem -- a natural measure of its underlying statistical complexity and main driver of the regret bound. In particular, the effective dimension allows us to maintain the structure of the original problem at first order, while embedding it in a bigger space, and thus naturally leads to results analogous to uncensored settings. Our analysis involves a continuous generalization of the Elliptical Potential Inequality, which we believe is of independent interest. We also discover an interesting property of decision-making under censorship: a transient phase during which initial misspecification of censorship is self-corrected at an extra cost; followed by a stationary phase that reflects the inherent slowdown of learning governed by the effective dimension. Our results are useful for applications of sequential decision-making models where the feedback received depends on strategic uncertainty (e.g., agents\u2019 willingness to follow a recommendation) and/or random uncertainty (e.g., loss or delay in arrival of information).", "keywords": "Bandit Algorithms;Missing Data;Censored Processes;Statistical Learning Theory", "primary_area": "", "supplementary_material": "/attachment/7fb47a58c55248aca6906ba556cf97fff2da04b6.pdf", "author": "Gauthier Guinet;Saurabh Amin;Patrick Jaillet", "authorids": "~Gauthier_Guinet1;~Saurabh_Amin1;~Patrick_Jaillet1", "gender": "M;M;M", "homepage": "https://gguinet.github.io;https://cee.mit.edu/people_individual/saurabh-amin/;http://web.mit.edu/jaillet/www/", "dblp": ";62/2621;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick", "google_scholar": "https://scholar.google.fr/citations?hl=fr;qTCXoLQAAAAJ;ND0FM6EAAAAJ", "orcid": ";;0000-0002-8585-6566", "linkedin": "gauthier-guinet;;patrick-jaillet-1260445/", "or_profile": "~Gauthier_Guinet1;~Saurabh_Amin1;~Patrick_Jaillet1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nguinet2022effective,\ntitle={Effective Dimension in Bandit Problems under Censorship},\nauthor={Gauthier Guinet and Saurabh Amin and Patrick Jaillet},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0xdH-09oGD7}\n}", "github": "", "project": "", "reviewers": "R7Ls;hhGV;5N2o;qkWU", "pdf_size": 719313, "rating": "3;4;6;7", "confidence": "2;3;1;4", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;2;3", "contribution": "2;2;3;4", "wc_summary": "22;31;41;43", "wc_strengths_and_weaknesses": "76;76;93;128", "wc_questions": "140;4;2;186", "wc_limitations": "15;3;1;23", "wc_review": "253;114;137;380", "wc_reply_reviewers": "27;0;0;0", "wc_reply_authors": "1587;962;901;1317", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 34.25, 8.407585860400118 ], "wc_strengths_and_weaknesses_avg": [ 93.25, 21.22940178149163 ], "wc_questions_avg": [ 83.0, 81.6394512475433 ], "wc_limitations_avg": [ 10.5, 8.986100377805714 ], "wc_review_avg": [ 221.0, 105.84186317332097 ], "wc_reply_reviewers_avg": [ 6.75, 11.691342951089922 ], "wc_reply_authors_avg": [ 1191.75, 278.0426001532859 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.28284271247461906, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9923547911032192412&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Paraphrasing Is All You Need for Novel Object Captioning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53418", "id": "0zHXmOXwkIf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2a8e6c09a1fd747e43a74710c79efdd5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0zHXmOXwkIf", "openreview": "https://openreview.net/forum?id=0zHXmOXwkIf", "poster": "/media/PosterPDFs/NeurIPS%202022/53418.png?t=1668429571.9453282", "slides": "https://nips.cc/virtual/2022/poster/53418", "video": "https://nips.cc/virtual/2022/poster/53418", "author_site": "Cheng-Fu Yang, Yao-Hung Hubert Tsai, Wan-Cyuan Fan, Russ Salakhutdinov, Louis-Philippe Morency, Frank Wang", "tldr": "", "abstract": "Novel object captioning (NOC) aims to describe images containing objects without observing their ground truth captions during training. Due to the absence of caption annotation, captioning models cannot be directly optimized via sequence-to-sequence training or CIDEr optimization. As a result, we present Paraphrasing-to-Captioning (P2C), a two-stage learning framework for NOC, which would heuristically optimize the output captions via paraphrasing. With P2C, the captioning model first learns paraphrasing from a language model pre-trained on text-only corpus, allowing expansion of the word bank for improving linguistic fluency. To further enforce the output caption sufficiently describing the visual content of the input image, we perform self-paraphrasing for the captioning model with fidelity and adequacy objectives introduced. Since no ground truth captions are available for novel object images during training, our P2C leverages cross-modality (image-text) association modules to ensure the above caption characteristics can be properly preserved. In the experiments, we not only show that our P2C achieves state-of-the-art performances on nocaps and COCO Caption datasets, we also verify the effectiveness and flexibility of our learning framework by replacing language and cross-modality association models for NOC. Implementation details and code are available in the supplementary materials.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/6a3b640664e28372cde229c01aac3e15867ba406.zip", "author": "Cheng-Fu Yang;Yao-Hung Hubert Tsai;Wan-Cyuan Fan;Ruslan Salakhutdinov;Louis-Philippe Morency;Yu-Chiang Frank Wang", "authorids": "~Cheng-Fu_Yang1;~Yao-Hung_Hubert_Tsai1;~Wan-Cyuan_Fan1;~Ruslan_Salakhutdinov1;~Louis-Philippe_Morency1;~Yu-Chiang_Frank_Wang2", "gender": "M;M;M;M;M;M", "homepage": "https://joeyy5588.github.io/;;https://www.cs.cmu.edu/~morency/;http://vllab.ee.ntu.edu.tw/ycwang.html;https://www.cs.cmu.edu/~rsalakhu/;https://sites.google.com/view/wancyuanfan", "dblp": "51/8564;154/3702;31/739;30/1690;;300/5836", "google_scholar": "https://scholar.google.com.tw/citations?user=cJ5oowQAAAAJ;;https://scholar.google.com.tw/citations?user=APgaFK0AAAAJ;HSGvdtoAAAAJ;;EIPHoLEAAAAJ", "orcid": ";;0000-0001-6376-7696;0000-0002-2333-157X;;", "linkedin": ";;morency?challengeId=AQELGK_OvMa0vwAAAY72L-VV4X9hW8juuY80VHVeeSGHZ1PJHeeEa5LTFoeTmDGU0t1OL07MXJTYC9EAi6qgPDd2z9ztnbdFYA&submissionId=09a0ff34-04ac-c717-bef7-8c9c8811b463&challengeSource=AgFhxWkU3q7v4wAAAY72L-1xRE0eG-BnZUNE9e3eAG95pgOCZ9u1nxEg-1dK2Dw&challegeType=AgHMzV0lqKgEFwAAAY72L-11X6DHMd3V_A3Iur8XZeyYF2-oBzoufs8&memberId=AgH4yz7pZ_riCgAAAY72L-146jmR2pdr3dmhy2icxBtEQzQ&recognizeDevice=AgFDCNyrhKiFSAAAAY72L-16m7z2EH2t0ueWmMKjyk1_ZJAkfFVe;;;", "or_profile": "~Cheng-Fu_Yang1;~Yao-Hung_Hubert_Tsai1;~Louis-Philippe_Morency1;~Yu-Chiang_Frank_Wang2;~Russ_Salakhutdinov1;~WanCyuan_Fan1", "aff": "University of California, Los Angeles;Apple;Carnegie Mellon University;National Taiwan University;School of Computer Science, Carnegie Mellon University;National Taiwan University", "aff_domain": "cs.ucla.edu;apple.com;cmu.edu;ntu.edu.tw;cs.cmu.edu;ntu.edu.tw", "position": "PhD student;Principal Researcher;Associate Professor;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nyang2022paraphrasing,\ntitle={Paraphrasing Is All You Need for Novel Object Captioning},\nauthor={Cheng-Fu Yang and Yao-Hung Hubert Tsai and Wan-Cyuan Fan and Ruslan Salakhutdinov and Louis-Philippe Morency and Yu-Chiang Frank Wang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0zHXmOXwkIf}\n}", "github": "", "project": "", "reviewers": "azLR;B4GQ;HSxb;YAnp", "pdf_size": 4172405, "rating": "6;7;7;7", "confidence": "4;3;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "contribution": "3;3;3;3", "wc_summary": "88;279;211;85", "wc_strengths_and_weaknesses": "208;110;331;134", "wc_questions": "39;67;144;1", "wc_limitations": "17;35;57;3", "wc_review": "352;491;743;223", "wc_reply_reviewers": "0;0;94;18", "wc_reply_authors": "1489;344;1863;347", "reply_reviewers": "0;0;2;1", "reply_authors": "3;1;5;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 165.75, 82.82323043687683 ], "wc_strengths_and_weaknesses_avg": [ 195.75, 86.03596631641909 ], "wc_questions_avg": [ 62.75, 52.43269495267242 ], "wc_limitations_avg": [ 28.0, 20.223748416156685 ], "wc_review_avg": [ 452.25, 192.7710753717995 ], "wc_reply_reviewers_avg": [ 28.0, 38.80721582386451 ], "wc_reply_authors_avg": [ 1010.75, 678.2648358126787 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9705898153504440209&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.ucla.edu;apple.com;cmu.edu;ntu.edu.tw;cs.cmu.edu;ntu.edu.tw", "author_num": 6, "aff_unique_index": "0;1;2;3;2;3", "aff_unique_norm": "University of California, Los Angeles;Apple;Carnegie Mellon University;National Taiwan University", "aff_unique_dep": ";Apple Inc.;;", "aff_unique_url": "https://www.ucla.edu;https://www.apple.com;https://www.cmu.edu;https://www.ntu.edu.tw", "aff_unique_abbr": "UCLA;Apple;CMU;NTU", "aff_campus_unique_index": "0;2;3;2", "aff_campus_unique": "Los Angeles;;Taiwan;Pittsburgh", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Bessel Equivariant Networks for Inversion of Transmission Effects in Multi-Mode Optical Fibres", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54798", "id": "0zlLhfG6rxI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/666dd0d92a64396e753c691db93493d4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=0zlLhfG6rxI", "openreview": "https://openreview.net/forum?id=0zlLhfG6rxI", "poster": "/media/PosterPDFs/NeurIPS%202022/54798.png?t=1669209119.7067862", "slides": "https://nips.cc/virtual/2022/poster/54798", "video": "https://nips.cc/virtual/2022/poster/54798", "author_site": "Joshua Mitton, Simon Mekhail, Miles Padgett, Daniele Faccio, Marco Aversa, Roderick Murray-Smith", "tldr": "A physics informed equivariant model to inverse the transmission effects of multi-mode optical fibres.", "abstract": "We develop a new type of model for solving the task of inverting the transmission effects of multi-mode optical fibres through the construction of an $\\mathrm{SO}^{+}(2,1)$-equivariant neural network. This model takes advantage of the of the azimuthal correlations known to exist in fibre speckle patterns and naturally accounts for the difference in spatial arrangement between input and speckle patterns. In addition, we use a second post-processing network to remove circular artifacts, fill gaps, and sharpen the images, which is required due to the nature of optical fibre transmission. This two stage approach allows for the inspection of the predicted images produced by the more robust physically motivated equivariant model, which could be useful in a safety-critical application, or by the output of both models, which produces high quality images. Further, this model can scale to previously unachievable resolutions of imaging with multi-mode optical fibres and is demonstrated on $256 \\times 256$ pixel images. This is a result of improving the trainable parameter requirement from $\\mathcal{O}(N^4)$ to $\\mathcal{O}(m)$, where $N$ is pixel size and $m$ is number of fibre modes. Finally, this model generalises to new images, outside of the set of training data classes, better than previous models.", "keywords": "physics;physics informed machine learning;inverse problems;optical fibre;optics;fibres;multi-mode fibre;equivariance;group theory", "primary_area": "", "supplementary_material": "/attachment/3ee26cf72fccb0f7f13997f513111a883b75ec8c.pdf", "author": "Joshua Mitton;Simon Peter Mekhail;Miles Padgett;Daniele Faccio;Marco Aversa;Roderick Murray-Smith", "authorids": "~Joshua_Mitton1;~Simon_Peter_Mekhail1;~Miles_Padgett1;~Daniele_Faccio1;~Marco_Aversa1;~Roderick_Murray-Smith1", "gender": ";M;M;M;M;M", "homepage": "https://github.com/JoshuaMitton;;;;https://marcoaversa.github.io;http://www.dcs.gla.ac.uk/~rod/", "dblp": ";;;198/1001;325/5090;78/604", "google_scholar": "https://scholar.google.co.uk/citations?user=OHIUJkkAAAAJ;;https://scholar.google.co.uk/citations?user=1OXAatkAAAAJ;;XSd_7RgAAAAJ;https://scholar.google.co.uk/citations?user=laX7LzQAAAAJ", "orcid": ";0000-0003-4775-4208;0000-0001-6643-0618;;0000-0002-7724-7488;", "linkedin": ";;;;marco-aversa-5bb15b169/;rodms/", "or_profile": "~Joshua_Mitton1;~Simon_Peter_Mekhail1;~Miles_Padgett1;~Daniele_Faccio1;~Marco_Aversa1;~Roderick_Murray-Smith1", "aff": "University of Glasgow;University of Glasgow;University of Glasgow;University of Glasgow;University of Glasgow;University of Glasgow", "aff_domain": "gla.ac.uk;glasgow.ac.uk;glasgow.ac.uk;glasgow.ac.uk;gla.ac.uk;gla.ac.uk", "position": "PhD student;Postdoc;Full Professor;Full Professor;PhD student;Professor", "bibtex": "@inproceedings{\nmitton2022bessel,\ntitle={Bessel Equivariant Networks for Inversion of Transmission Effects in Multi-Mode Optical Fibres},\nauthor={Joshua Mitton and Simon Peter Mekhail and Miles Padgett and Daniele Faccio and Marco Aversa and Roderick Murray-Smith},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=0zlLhfG6rxI}\n}", "github": "", "project": "", "reviewers": "iftp;cTsz;88iJ", "pdf_size": 1070706, "rating": "5;6;8", "confidence": "5;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "contribution": "3;3;3", "wc_summary": "85;75;85", "wc_strengths_and_weaknesses": "145;77;116", "wc_questions": "18;86;34", "wc_limitations": "26;6;7", "wc_review": "274;244;242", "wc_reply_reviewers": "131;0;11", "wc_reply_authors": "2347;533;262", "reply_reviewers": "1;0;1", "reply_authors": "4;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.66666666666667, 4.714045207910316 ], "wc_strengths_and_weaknesses_avg": [ 112.66666666666667, 27.86076492528915 ], "wc_questions_avg": [ 46.0, 29.028721409436322 ], "wc_limitations_avg": [ 13.0, 9.201449161228174 ], "wc_review_avg": [ 253.33333333333334, 14.636332266733433 ], "wc_reply_reviewers_avg": [ 47.333333333333336, 59.33146064460424 ], "wc_reply_authors_avg": [ 1047.3333333333333, 925.6386383945352 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7559289460184544, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18207011023654231724&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "gla.ac.uk;glasgow.ac.uk;glasgow.ac.uk;glasgow.ac.uk;gla.ac.uk;gla.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Glasgow", "aff_unique_dep": "", "aff_unique_url": "https://www.gla.ac.uk", "aff_unique_abbr": "Glasgow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Instance-based Learning for Knowledge Base Completion", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54556", "id": "1-F7HbLInPy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c6875cb36db4ba791b3c388881f31788-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1-F7HbLInPy", "openreview": "https://openreview.net/forum?id=1-F7HbLInPy", "poster": "/media/PosterPDFs/NeurIPS%202022/54556.png?t=1668904164.0600162", "slides": "https://nips.cc/virtual/2022/poster/54556", "video": "https://nips.cc/virtual/2022/poster/54556", "author_site": "Wanyun Cui, Xingran Chen", "tldr": "In this paper, we proposed a new method for knowledge base completion (KBC): instance-based learning (IBL).", "abstract": "In this paper, we propose a new method for knowledge base completion (KBC): instance-based learning (IBL). For example, to answer (Jill Biden, lived city,? ), instead of going directly to Washington D.C., our goal is to find Joe Biden, who has the same lived city as Jill Biden. Through prototype entities, IBL provides interpretability. We develop theories for modeling prototypes and combining IBL with translational models. Experiments on various tasks confirmed the IBL model's effectiveness and interpretability.\n\nIn addition, IBL shed light on the mechanism of rule-based KBC models. Previous research has generally agreed that rule-based models provide rules with semantically compatible premise and hypothesis. We challenge this view. We begin by demonstrating that some logical rules represent {\\it instance-based equivalence} (i.e. prototypes) rather than semantic compatibility. These are denoted as {\\it IBL rules}. Surprisingly, despite occupying only a small portion of the rule space, IBL rules outperform non-IBL rules in all four benchmarks. %KBC can be achieved using only IBL rules in two benchmarks without sacrificing effectiveness. We use a variety of experiments to demonstrate that rule-based models work because they have the ability to represent instance-based equivalence via IBL rules. The findings provide new insights of how rule-based models work and how to interpret their rules.", "keywords": "knowledge base completion", "primary_area": "", "supplementary_material": "/attachment/65f3f6903bb3e7d4ca8d2e9744ce142ed9012b29.pdf", "author": "Wanyun Cui;Xingran Chen", "authorids": "~Wanyun_Cui1;~Xingran_Chen1", "gender": "M;M", "homepage": "https://cuiwanyun.github.io/;https://www.chenxingran.com/", "dblp": "116/5305;203/8349", "google_scholar": "https://scholar.google.com.hk/citations?user=FP7ANisAAAAJ;X01oTv8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Wanyun_Cui1;~Xingran_Chen1", "aff": "Shanghai University of Finance and Economics;University of Michigan - Ann Arbor", "aff_domain": "sufe.edu.cn;umich.edu", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\ncui2022instancebased,\ntitle={Instance-based Learning for Knowledge Base Completion},\nauthor={Wanyun Cui and Xingran Chen},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1-F7HbLInPy}\n}", "github": "", "project": "", "reviewers": "DVJo;iZrV;qiwG;ZosV", "pdf_size": 367598, "rating": "4;5;6;7", "confidence": "3;4;3;3", "soundness": "2;2;3;4", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "contribution": "2;3;2;3", "wc_summary": "80;93;128;111", "wc_strengths_and_weaknesses": "104;135;41;106", "wc_questions": "110;66;3;163", "wc_limitations": "11;1;1;8", "wc_review": "305;295;173;388", "wc_reply_reviewers": "61;0;7;0", "wc_reply_authors": "1093;1186;498;461", "reply_reviewers": "1;0;1;0", "reply_authors": "4;4;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.0, 18.152134860671346 ], "wc_strengths_and_weaknesses_avg": [ 96.5, 34.311076928595526 ], "wc_questions_avg": [ 85.5, 58.72180174347514 ], "wc_limitations_avg": [ 5.25, 4.380353866983808 ], "wc_review_avg": [ 290.25, 76.71823446873631 ], "wc_reply_reviewers_avg": [ 17.0, 25.563646062328434 ], "wc_reply_authors_avg": [ 809.5, 331.89192518047196 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14765487766577879365&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "sufe.edu.cn;umich.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Shanghai University of Finance and Economics;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "http://www.sufe.edu.cn;https://www.umich.edu", "aff_unique_abbr": "SUFE;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Chartalist: Labeled Graph Datasets for UTXO and Account-based Blockchains", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55744", "id": "10iA3OowAV3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/e245189a86310b6667ac633dbb922d50-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=10iA3OowAV3", "openreview": "https://openreview.net/forum?id=10iA3OowAV3", "poster": "/media/PosterPDFs/NeurIPS%202022/55744.png?t=1667860843.803454", "slides": "https://nips.cc/virtual/2022/poster/55744", "video": "https://nips.cc/virtual/2022/poster/55744", "author_site": "Kiarash Shamsi, Friedhelm Victor, Murat Kantarcioglu, Yulia Gel, Cuneyt G Akcora", "tldr": "We created the first blockchain ML-Ready dataset platform", "abstract": "Machine learning on blockchain graphs is an emerging field with many applications such as ransomware payment tracking, price manipulation analysis, and money laundering detection. However, analyzing blockchain data requires domain expertise and computational resources, which pose a significant barrier and hinder advancement in this field. \n\nWe introduce Chartalist, the first comprehensive platform to methodically access and use machine learning across a large selection of blockchains to address this challenge. Chartalist contains ML-ready datasets from unspent transaction output (UTXO) (e.g., Bitcoin) and account-based blockchains (e.g., Ethereum). We envision that Chartalist can facilitate data modeling, analysis, and representation of blockchain data and attract a wider community of scientists to analyze blockchains. Chartalist is an open-science initiative at https://github.com/cakcora/Chartalist.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/810774a95ee828411b03e167bd12f1265b7978a0.pdf", "author": "Kiarash Shamsi;Friedhelm Victor;Murat Kantarcioglu;Yulia Gel;Cuneyt Gurcan Akcora", "authorids": "~Kiarash_Shamsi1;~Friedhelm_Victor1;~Murat_Kantarcioglu1;~Yulia_Gel1;~Cuneyt_Gurcan_Akcora2", "gender": "M;;;;M", "homepage": ";;https://www.kantarcioglu.net;;http://cakcora.github.io", "dblp": ";;36/195.html;;64/10038", "google_scholar": ";;https://scholar.google.com.tw/citations?user=qXb4xQMAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-9795-9063;;0000-0002-2882-6950", "linkedin": "https://ir.linkedin.com/in/kiarash-shamsi-a45096b1;;kantarcioglu/;;cuneyt-gurcan-akcora-97272421/", "or_profile": "~Kiarash_Shamsi1;~Friedhelm_Victor1;~Murat_Kantarcioglu1;~Yulia_Gel1;~Cuneyt_Gurcan_Akcora2", "aff": "University of Manitoba;;Harvard University;;", "aff_domain": "umanitoba.ca;;harvard.edu;;", "position": "PhD student;;Faculty Associate;;", "bibtex": "@inproceedings{\nshamsi2022chartalist,\ntitle={Chartalist: Labeled Graph Datasets for {UTXO} and Account-based Blockchains},\nauthor={Kiarash Shamsi and Friedhelm Victor and Murat Kantarcioglu and Yulia Gel and Cuneyt Gurcan Akcora},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=10iA3OowAV3}\n}", "github": "", "project": "", "reviewers": "tG6T;Hrz4;4zbJ;xh75;JJtm;puuZ", "pdf_size": 337598, "rating": "6;6;6;7;7;8", "confidence": "5;4;3;3;3;3", "wc_summary_and_contributions": "74;99;76;204;81;31", "wc_strengths": "95;33;39;155;58;53", "wc_weaknesses": "359;148;60;103;83;32", "wc_correctness": "36;12;2;37;1;1", "wc_clarity": "12;26;13;114;1;10", "wc_relation_to_prior_work": "56;11;7;58;1;1", "wc_documentation": "44;8;8;13;1;21", "wc_additional_feedback": "42;83;1;23;4;7", "wc_review": "718;420;206;707;230;156", "wc_reply_reviewers": "201;359;0;94;0;0", "wc_reply_authors": "1251;1390;382;597;479;130", "reply_reviewers": "1;3;0;2;0;0", "reply_authors": "2;4;1;3;1;1", "rating_avg": [ 6.666666666666667, 0.7453559924999299 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "wc_summary_and_contributions_avg": [ 94.16666666666667, 53.20844753315858 ], "wc_strengths_avg": [ 72.16666666666667, 42.00165340660875 ], "wc_weaknesses_avg": [ 130.83333333333334, 108.1687210898891 ], "wc_correctness_avg": [ 14.833333333333334, 15.784134087395762 ], "wc_clarity_avg": [ 29.333333333333332, 38.564520251420504 ], "wc_relation_to_prior_work_avg": [ 22.333333333333332, 24.76332413514434 ], "wc_documentation_avg": [ 15.833333333333334, 13.969212178533507 ], "wc_additional_feedback_avg": [ 26.666666666666668, 28.81357704663241 ], "wc_review_avg": [ 406.1666666666667, 231.5227970536715 ], "wc_reply_reviewers_avg": [ 109.0, 133.43662665600226 ], "wc_reply_authors_avg": [ 704.8333333333334, 459.17223227116955 ], "reply_reviewers_avg": [ 1.0, 1.1547005383792515 ], "reply_authors_avg": [ 2.0, 1.1547005383792515 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5855400437691199, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5478295064143822298&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "umanitoba.ca;;harvard.edu;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of Manitoba;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://umanitoba.ca;https://www.harvard.edu", "aff_unique_abbr": "U of M;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "title": "Provable Defense against Backdoor Policies in Reinforcement Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52877", "id": "11WmFbrIt26", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5e67e6a814526079ad8505bf6d926fb6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=11WmFbrIt26", "openreview": "https://openreview.net/forum?id=11WmFbrIt26", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/52877", "video": "https://nips.cc/virtual/2022/poster/52877", "author_site": "Shubham Bharti, Xuezhou Zhang, Adish Singla, Jerry Zhu", "tldr": "We propose a provable defense mechanism against backdoor policies in reinforcement learning.", "abstract": "We propose a provable defense mechanism against backdoor policies in reinforcement learning under subspace trigger assumption. A backdoor policy is a security threat where an adversary publishes a seemingly well-behaved policy which in fact allows hidden triggers. During deployment, the adversary can modify observed states in a particular way to trigger unexpected actions and harm the agent. We assume the agent does not have the resources to re-train a good policy. Instead, our defense mechanism sanitizes the backdoor policy by projecting observed states to a `safe subspace', estimated from a small number of interactions with a clean (non-triggered) environment. Our sanitized policy achieves $\\epsilon$ approximate optimality in the presence of triggers, provided the number of clean interactions is $O\\left(\\frac{D}{(1-\\gamma)^4 \\epsilon^2}\\right)$ where $\\gamma$ is the discounting factor and $D$ is the dimension of state space. Empirically, we show that our sanitization defense performs well on two Atari game environments.", "keywords": "Adversarial Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/64b0804587ee824b9f77d3ce6b0a039c32be06ec.zip", "author": "Shubham Kumar Bharti;Xuezhou Zhang;Adish Singla;Jerry Zhu", "authorids": "~Shubham_Kumar_Bharti1;~Xuezhou_Zhang2;~Adish_Singla2;~Jerry_Zhu1", "gender": "M;;M;M", "homepage": "http://skbharti.github.io;https://machineteaching.mpi-sws.org/adishsingla.html;http://pages.cs.wisc.edu/~jerryzhu/;https://zhangxz1123.github.io/", "dblp": "255/5381;58/657;z/XiaojinZhu;213/7993", "google_scholar": ";kXz2seUAAAAJ;https://scholar.google.com.tw/citations?user=hqTu-QcAAAAJ;tR-p-r8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shubham_Kumar_Bharti1;~Adish_Kumar_Singla1;~Xiaojin_Zhu1;~Xuezhou_Zhang1", "aff": "Department of Computer Science, University of Wisconsin - Madison;Max Planck Institute for Software Systems (MPI-SWS);University of Wisconsin, Madison;Princeton University", "aff_domain": "cs.wisc.edu;mpi-sws.org;wisc.edu;princeton.edu", "position": "PhD student;Researcher;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nbharti2022provable,\ntitle={Provable Defense against Backdoor Policies in Reinforcement Learning},\nauthor={Shubham Kumar Bharti and Xuezhou Zhang and Adish Singla and Jerry Zhu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=11WmFbrIt26}\n}", "github": "", "project": "", "reviewers": "iqtz;5omh;uJFB;JEuN", "pdf_size": 1319767, "rating": "5;6;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "contribution": "2;2;3;3", "wc_summary": "71;45;96;119", "wc_strengths_and_weaknesses": "125;68;177;86", "wc_questions": "449;29;24;176", "wc_limitations": "5;29;12;39", "wc_review": "650;171;309;420", "wc_reply_reviewers": "90;0;0;0", "wc_reply_authors": "1175;135;259;597", "reply_reviewers": "2;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 82.75, 27.625848403261752 ], "wc_strengths_and_weaknesses_avg": [ 114.0, 41.803109932156964 ], "wc_questions_avg": [ 169.5, 172.53477910264934 ], "wc_limitations_avg": [ 21.25, 13.460590625971804 ], "wc_review_avg": [ 387.5, 175.35464065715513 ], "wc_reply_reviewers_avg": [ 22.5, 38.97114317029974 ], "wc_reply_authors_avg": [ 541.5, 402.9426137801759 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15582632130939406311&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 9, "email": "cs.wisc.edu;mpi-sws.org;wisc.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Wisconsin-Madison;Max Planck Institute for Software Systems;University of Wisconsin;Princeton University", "aff_unique_dep": "Department of Computer Science;;;", "aff_unique_url": "https://www.wisc.edu;https://www.mpi-sws.org;https://www.wisc.edu;https://www.princeton.edu", "aff_unique_abbr": "UW-Madison;MPI-SWS;UW;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Pruning has a disparate impact on model accuracy", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53970", "id": "11nMVZK0WYM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/7087c949df293f13c0052ac825936e6f-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=11nMVZK0WYM", "openreview": "https://openreview.net/forum?id=11nMVZK0WYM", "poster": "/media/PosterPDFs/NeurIPS%202022/27e9661e033a73a6ad8cefcde965c54d.png?t=1666498084.782803", "slides": "https://nips.cc/virtual/2022/poster/53970", "video": "https://nips.cc/virtual/2022/poster/53970", "author_site": "Cuong Tran, Ferdinando Fioretto, Jung-Eun Kim, Rakshit Naidu", "tldr": "The paper observes that pruning causes disparate impacts to the accuracy of different groups of individuals and sheds light on the causes of such disparate impacts to arise.", "abstract": "Network pruning is a widely-used compression technique that is able to significantly scale down overparameterized models with minimal loss of accuracy. This paper shows that pruning may create or exacerbate disparate impacts. The paper sheds light on the factors to cause such disparities, suggesting differences in gradient norms and distance to decision boundary across groups to be responsible for this critical issue. It analyzes these factors in detail, providing both theoretical and empirical support, and proposes a simple, yet effective, solution that mitigates the disparate impacts caused by pruning. ", "keywords": "Model pruning;Fairness", "primary_area": "", "supplementary_material": "/attachment/191b9888609a85aaabb684a4dc32b3680f1409e9.pdf", "author": "Cuong Tran;Ferdinando Fioretto;Jung-Eun Kim;Rakshit Naidu", "authorids": "~Cuong_Tran1;~Ferdinando_Fioretto1;~Jung-Eun_Kim1;~Rakshit_Naidu1", "gender": "M;M;F;M", "homepage": ";http://nandofioretto.com;https://jungeunkim.wordpress.ncsu.edu/;https://rakshit-naidu.github.io/", "dblp": "275/3885;119/6404;33/1183;https://dblp.uni-trier.de/pid/268/5808", "google_scholar": "RiYBF7sAAAAJ;ASf9Q04AAAAJ;https://scholar.google.com/citations?hl=en;bbscqSsAAAAJ", "orcid": ";;;", "linkedin": ";;;rakshit-naidu-8b3431166/", "or_profile": "~Cuong_Tran1;~Ferdinando_Fioretto1;~Jung-Eun_Kim1;~Rakshit_Naidu1", "aff": "Syracuse University;Syracuse University;Syracuse University;School of Computer Science, Carnegie Mellon University", "aff_domain": "syr.edu;syr.edu;syr.edu;cs.cmu.edu", "position": "PhD student;Assistant Professor;Assistant Professor;MS student", "bibtex": "@inproceedings{\ntran2022pruning,\ntitle={Pruning has a disparate impact on model accuracy},\nauthor={Cuong Tran and Ferdinando Fioretto and Jung-Eun Kim and Rakshit Naidu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=11nMVZK0WYM}\n}", "github": "", "project": "", "reviewers": "3Nrs;B2Lo;Zs7H", "pdf_size": 356203, "rating": "6;7;7", "confidence": "4;3;4", "soundness": "3;3;2", "novelty": "3;4;3", "presentation": "3;4;3", "contribution": "3;4;3", "wc_summary": "60;83;86", "wc_strengths_and_weaknesses": "55;191;333", "wc_questions": "180;198;737", "wc_limitations": "41;51;143", "wc_review": "336;523;1299", "wc_reply_reviewers": "81;44;397", "wc_reply_authors": "820;332;1691", "reply_reviewers": "1;1;2", "reply_authors": "3;2;5", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.33333333333333, 11.61416759345623 ], "wc_strengths_and_weaknesses_avg": [ 193.0, 113.50183552113448 ], "wc_questions_avg": [ 371.6666666666667, 258.4341738668132 ], "wc_limitations_avg": [ 78.33333333333333, 45.908120801831515 ], "wc_review_avg": [ 719.3333333333334, 416.93511352354204 ], "wc_reply_reviewers_avg": [ 174.0, 158.40664969207154 ], "wc_reply_authors_avg": [ 947.6666666666666, 562.1057630810138 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8812509374287892112&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 10, "email": "syr.edu;syr.edu;syr.edu;cs.cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Syracuse University;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://www.syracuse.edu;https://www.cmu.edu", "aff_unique_abbr": "Syracuse;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Coordinate Linear Variance Reduction for Generalized Linear Programming", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53988", "id": "12nqqeQnDW7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/8a54a80ffc2834689ffdd0920202018e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=12nqqeQnDW7", "openreview": "https://openreview.net/forum?id=12nqqeQnDW7", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53988", "video": "https://nips.cc/virtual/2022/poster/53988", "author_site": "Chaobing Song, Cheuk Yin Lin, Stephen Wright, Jelena Diakonikolas", "tldr": "We provide a novel variance reduced primal-dual algorithm for generalized linear programs with improved theoretical and empirical performance among primal-dual methods and that is competitive with off-the-shelf solvers on considered datasets.", "abstract": "We study a class of generalized linear programs (GLP) in a large-scale setting, which includes simple, possibly nonsmooth convex regularizer and simple convex set constraints. By reformulating (GLP) as an equivalent convex-concave min-max problem, we show that the linear structure in the problem can be used to design an efficient, scalable first-order algorithm, to which we give the name Coordinate Linear Variance Reduction (CLVR; pronounced ``clever''). CLVR yields improved complexity results for (GLP) that depend on the max row norm of the linear constraint matrix in (GLP) rather than the spectral norm. When the regularization terms and constraints are separable, CLVR admits an efficient lazy update strategy that makes its complexity bounds scale with the number of nonzero elements of the linear constraint matrix in (GLP) rather than the matrix dimensions. On the other hand, for the special case of linear programs, by exploiting sharpness, we propose a restart scheme for CLVR to obtain empirical linear convergence. Then we show that Distributionally Robust Optimization (DRO) problems with ambiguity sets based on both $f$-divergence and Wasserstein metrics can be reformulated as (GLPs) by introducing sparsely connected auxiliary variables. We complement our theoretical guarantees with numerical experiments that verify our algorithm's practical effectiveness, in terms of wall-clock time and number of data passes.", "keywords": "Linear Programming;Variance Reduction;Min-max optimization;Distributionally Robust Optimization", "primary_area": "", "supplementary_material": "/attachment/568e696325b266e8298f44774ba3fbb8965feedf.zip", "author": "Chaobing Song;Cheuk Yin Lin;Stephen Wright;Jelena Diakonikolas", "authorids": "~Chaobing_Song3;~Cheuk_Yin_Lin1;~Stephen_Wright1;~Jelena_Diakonikolas2", "gender": "M;M;M;F", "homepage": "https://sites.google.com/view/chaobing-song/home;https://ericlincc.com;https://wrightstephen.github.io/sw_proj/;http://www.jelena-diakonikolas.com/", "dblp": ";285/6061;75/2677;147/5178", "google_scholar": "https://scholar.google.com/citations?hl=en;;VFQRIOwAAAAJ;J8ixfu8AAAAJ", "orcid": ";;;0000-0003-3439-0310", "linkedin": ";;;", "or_profile": "~Chaobing_Song3;~Cheuk_Yin_Lin1;~Stephen_Wright1;~Jelena_Diakonikolas2", "aff": "University of Wisconsin, Madison;Department of Computer Science, University of Wisconsin, Madison;University of Wisconsin, Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;cs.wisc.edu;wisc.edu;wisc.edu", "position": "Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsong2022coordinate,\ntitle={Coordinate Linear Variance Reduction for Generalized Linear Programming},\nauthor={Chaobing Song and Cheuk Yin Lin and Stephen Wright and Jelena Diakonikolas},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=12nqqeQnDW7}\n}", "github": "", "project": "", "reviewers": "ogmY;FrTQ;bGMS;aEEZ", "pdf_size": 804899, "rating": "5;6;6;6", "confidence": "3;4;4;4", "soundness": "2;2;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "contribution": "2;3;3;3", "wc_summary": "104;39;50;75", "wc_strengths_and_weaknesses": "101;529;71;78", "wc_questions": "119;152;3;7", "wc_limitations": "1;1;3;5", "wc_review": "325;721;127;165", "wc_reply_reviewers": "0;468;0;149", "wc_reply_authors": "628;2012;139;583", "reply_reviewers": "0;8;0;1", "reply_authors": "1;8;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 25.029982021567655 ], "wc_strengths_and_weaknesses_avg": [ 194.75, 193.29818286781693 ], "wc_questions_avg": [ 70.25, 66.29998114630199 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 334.5, 235.1908799252216 ], "wc_reply_reviewers_avg": [ 154.25, 191.08424189346437 ], "wc_reply_authors_avg": [ 840.5, 702.8472451393689 ], "reply_reviewers_avg": [ 2.25, 3.344772040064913 ], "reply_authors_avg": [ 3.0, 2.9154759474226504 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2883862209535127654&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "wisc.edu;cs.wisc.edu;wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW;UW-Madison", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Challenging Common Assumptions in Convex Reinforcement Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53468", "id": "13S0tUMqynI", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1cb5b3d64bdf3c6642c8d9a8fbecd019-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=13S0tUMqynI", "openreview": "https://openreview.net/forum?id=13S0tUMqynI", "poster": "/media/PosterPDFs/NeurIPS%202022/53468.png?t=1669054920.342183", "slides": "https://nips.cc/virtual/2022/poster/53468", "video": "https://nips.cc/virtual/2022/poster/53468", "author_site": "Mirco Mutti, Riccardo De Santi, Piersilvio De Bartolomeis, Marcello Restelli", "tldr": "", "abstract": "The classic Reinforcement Learning (RL) formulation concerns the maximization of a scalar reward function. More recently, convex RL has been introduced to extend the RL formulation to all the objectives that are convex functions of the state distribution induced by a policy. Notably, convex RL covers several relevant applications that do not fall into the scalar formulation, including imitation learning, risk-averse RL, and pure exploration. In classic RL, it is common to optimize an infinite trials objective, which accounts for the state distribution instead of the empirical state visitation frequencies, even though the actual number of trajectories is always finite in practice. This is theoretically sound since the infinite trials and finite trials objectives are equivalent and thus lead to the same optimal policy. In this paper, we show that this hidden assumption does not hold in convex RL. In particular, we prove that erroneously optimizing the infinite trials objective in place of the actual finite trials one, as it is usually done, can lead to a significant approximation error. Since the finite trials setting is the default in both simulated and real-world RL, we believe shedding light on this issue will lead to better approaches and methodologies for convex RL, impacting relevant research areas such as imitation learning, risk-averse RL, and pure exploration among others. ", "keywords": "Convex reinforcement learning;Reinforcement learning with general utilities;Theoretical aspects of reinforcement learning", "primary_area": "", "supplementary_material": "/attachment/f37b5177706a7518fa509470fd218159f85ca759.zip", "author": "Mirco Mutti;Riccardo De Santi;Piersilvio De Bartolomeis;Marcello Restelli", "authorids": "~Mirco_Mutti1;~Riccardo_De_Santi1;~Piersilvio_De_Bartolomeis1;~Marcello_Restelli1", "gender": ";M;;M", "homepage": ";http://www.riccardodesanti.com;https://pdebartol.github.io;http://home.deib.polimi.it/restelli/", "dblp": "222/2815;313/1635;;64/1011", "google_scholar": "GlLkJ9UAAAAJ;K7qyOj0AAAAJ;YtoJ9mQAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";;;0000-0002-6322-1076", "linkedin": ";riccardo-de-santi-426139135/;piersilvio-de-bartolomeis-19aa67170/;", "or_profile": "~Mirco_Mutti1;~Riccardo_De_Santi1;~Piersilvio_De_Bartolomeis1;~Marcello_Restelli1", "aff": "Universit\u00e0 di Bologna;Imperial College London;ETH Zurich;Politecnico di Milano", "aff_domain": "unibo.it;ic.ac.uk;ethz.ch;polimi.it", "position": "PhD student;Researcher;MS student;Associate Professor", "bibtex": "@inproceedings{\nmutti2022challenging,\ntitle={Challenging Common Assumptions in Convex Reinforcement Learning},\nauthor={Mirco Mutti and Riccardo De Santi and Piersilvio De Bartolomeis and Marcello Restelli},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=13S0tUMqynI}\n}", "github": "", "project": "", "reviewers": "CnKV;J5z6;eN8u;EMAG", "pdf_size": 1245115, "rating": "4;7;7;8", "confidence": "3;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "contribution": "2;3;3;3", "wc_summary": "71;90;115;284", "wc_strengths_and_weaknesses": "165;146;47;278", "wc_questions": "1212;329;163;140", "wc_limitations": "22;40;68;1", "wc_review": "1470;605;393;703", "wc_reply_reviewers": "748;13;27;49", "wc_reply_authors": "2754;885;671;336", "reply_reviewers": "5;1;1;1", "reply_authors": "5;2;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 140.0, 84.59018855635682 ], "wc_strengths_and_weaknesses_avg": [ 159.0, 82.0213386869539 ], "wc_questions_avg": [ 461.0, 439.67886007858056 ], "wc_limitations_avg": [ 32.75, 24.5903944661325 ], "wc_review_avg": [ 792.75, 406.7470805058101 ], "wc_reply_reviewers_avg": [ 209.25, 311.3120420092997 ], "wc_reply_authors_avg": [ 1161.5, 940.0198136209683 ], "reply_reviewers_avg": [ 2.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8533836393518336788&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "unibo.it;ic.ac.uk;ethz.ch;polimi.it", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Bologna;Imperial College London;ETH Zurich;Politecnico di Milano", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.unibo.it;https://www.imperial.ac.uk;https://www.ethz.ch;https://www.polimi.it", "aff_unique_abbr": "Unibo;ICL;ETHZ;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Italy;United Kingdom;Switzerland" }, { "title": "Knowledge Distillation from A Stronger Teacher", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54366", "id": "157Usp_kbi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/da669dfd3c36c93905a17ddba01eef06-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=157Usp_kbi", "openreview": "https://openreview.net/forum?id=157Usp_kbi", "poster": "/media/PosterPDFs/NeurIPS%202022/c9319967c038f9b923068dabdf60cfe3.png?t=1666257590.9184756", "slides": "https://nips.cc/virtual/2022/poster/54366", "video": "https://nips.cc/virtual/2022/poster/54366", "author_site": "Tao Huang, Shan You, Fei Wang, Chen Qian, Chang Xu", "tldr": "", "abstract": "Unlike existing knowledge distillation methods focus on the baseline settings, where the teacher models and training strategies are not that strong and competing as state-of-the-art approaches, this paper presents a method dubbed DIST to distill better from a stronger teacher. We empirically find that the discrepancy of predictions between the student and a stronger teacher may tend to be fairly severer. As a result, the exact match of predictions in KL divergence would disturb the training and make existing methods perform poorly. In this paper, we show that simply preserving the relations between the predictions of teacher and student would suffice, and propose a correlation-based loss to capture the intrinsic inter-class relations from the teacher explicitly. Besides, considering that different instances have different semantic similarities to each class, we also extend this relational match to the intra-class level. Our method is simple yet practical, and extensive experiments demonstrate that it adapts well to various architectures, model sizes and training strategies, and can achieve state-of-the-art performance consistently on image classification, object detection, and semantic segmentation tasks. Code is available at: https://github.com/hunto/DIST_KD.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/4219236435ffdf0f3c88bc6e7898c774c783a743.pdf", "author": "Tao Huang;Shan You;Fei Wang;Chen Qian;Chang Xu", "authorids": "~Tao_Huang5;~Shan_You3;~Fei_Wang9;~Chen_Qian1;~Chang_Xu4", "gender": "M;M;M;M;", "homepage": "https://taohuang.info;https://shanyou92.github.io/;;;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": "34/808-20;179/2548;;;97/2966-2", "google_scholar": "jkcRdBgAAAAJ;https://scholar.google.com/citations?hl=en;ljt16JkAAAAJ;AerkT0YAAAAJ;N4F_3eoAAAAJ", "orcid": ";0000-0003-1964-0430;;;0000-0002-4756-0609", "linkedin": ";;;;", "or_profile": "~Tao_Huang5;~Shan_You3;~Fei_Wang9;~Chen_Qian1;~Charles_Xu1", "aff": "SenseTime Research;SenseTime Research;University of Science and Technology of China;Tsinghua University;University of Sydney", "aff_domain": "sensetime.com;sensetime.com;mail.ustc.edu.cn;mails.tsinghua.edu.cn;sydney.edu.au", "position": "Researcher;Researcher;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2022knowledge,\ntitle={Knowledge Distillation from A Stronger Teacher},\nauthor={Tao Huang and Shan You and Fei Wang and Chen Qian and Chang Xu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=157Usp_kbi}\n}", "github": "", "project": "", "reviewers": "FMUS;RvKW;kJN1;xqDf", "pdf_size": 557418, "rating": "5;5;7;7", "confidence": "5;2;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;4;4", "contribution": "2;3;3;4", "wc_summary": "61;65;76;73", "wc_strengths_and_weaknesses": "251;125;179;180", "wc_questions": "2;32;23;3", "wc_limitations": "22;22;10;14", "wc_review": "336;244;288;270", "wc_reply_reviewers": "0;0;22;0", "wc_reply_authors": "656;544;275;122", "reply_reviewers": "0;0;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.75, 6.015604707757983 ], "wc_strengths_and_weaknesses_avg": [ 183.75, 44.751396626250674 ], "wc_questions_avg": [ 15.0, 12.90348790056394 ], "wc_limitations_avg": [ 17.0, 5.196152422706632 ], "wc_review_avg": [ 284.5, 33.596874854664684 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 399.25, 211.64755491146124 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.22941573387056177, "gs_citation": 318, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9782451594224614440&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sensetime.com;sensetime.com;mail.ustc.edu.cn;mails.tsinghua.edu.cn;sydney.edu.au", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "SenseTime;University of Science and Technology of China;Tsinghua University;University of Sydney", "aff_unique_dep": "SenseTime Research;;;", "aff_unique_url": "https://www.sensetime.com;http://www.ustc.edu.cn;https://www.tsinghua.edu.cn;https://www.sydney.edu.au", "aff_unique_abbr": "SenseTime;USTC;THU;USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Australia" }, { "title": "Multi-block-Single-probe Variance Reduced Estimator for Coupled Compositional Optimization", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55173", "id": "16nVkS8Twxo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d13ee73683fd5567e5c07634a25cd7b8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=16nVkS8Twxo", "openreview": "https://openreview.net/forum?id=16nVkS8Twxo", "poster": "/media/PosterPDFs/NeurIPS%202022/55173.png?t=1668346884.8052285", "slides": "https://nips.cc/virtual/2022/poster/55173", "video": "https://nips.cc/virtual/2022/poster/55173", "author_site": "Wei Jiang, Gang Li, Yibo Wang, Lijun Zhang, Tianbao Yang", "tldr": "We propose a novel stochastic estimator, which can track multiple functional mappings with stochastic samples of only O(1) functional mappings at each iteration.", "abstract": "Variance reduction techniques such as SPIDER/SARAH/STORM have been extensively studied to improve the convergence rates of stochastic non-convex optimization, which usually maintain and update a sequence of estimators for a single function across iterations. What if we need to track multiple functional mappings across iterations but only with access to stochastic samples of $\\mathcal{O}(1)$ functional mappings at each iteration? There is an important application in solving an emerging family of coupled compositional optimization problems in the form of $\\sum_{i=1}^m f_i(g_i(\\mathbf{w}))$, where $g_i$ is accessible through a stochastic oracle. The key issue is to track and estimate a sequence of $\\mathbf g(\\mathbf{w})=(g_1(\\mathbf{w}), \\ldots, g_m(\\mathbf{w}))$ across iterations, where $\\mathbf g(\\mathbf{w})$ has $m$ blocks and it is only allowed to probe $\\mathcal{O}(1)$ blocks to attain their stochastic values and Jacobians. To improve the complexity for solving these problems, we propose a novel stochastic method named Multi-block-Single-probe Variance Reduced (MSVR) estimator to track the sequence of $\\mathbf g(\\mathbf{w})$. It is inspired by STORM but introduces a customized error correction term to alleviate the noise not only in stochastic samples for the selected blocks but also in those blocks that are not sampled. With the help of the MSVR estimator, we develop several algorithms for solving the aforementioned compositional problems with improved complexities across a spectrum of settings with non-convex/convex/strongly convex/Polyak-Lojasiewicz (PL) objectives. Our results improve upon prior ones in several aspects, including the order of sample complexities and dependence on the strong convexity parameter. Empirical studies on multi-task deep AUC maximization demonstrate the better performance of using the new estimator. ", "keywords": "variance reduction;stochastic non-convex optimization;coupled compositional optimization;sample complexity", "primary_area": "", "supplementary_material": "/attachment/99f32c54ad05971ceb9f2c97cd8f3a70c3a465aa.pdf", "author": "Wei Jiang;Gang Li;Yibo Wang;Lijun Zhang;Tianbao Yang", "authorids": "~Wei_Jiang8;~Gang_Li17;~Yibo_Wang2;~Lijun_Zhang1;~Tianbao_Yang1", "gender": "M;M;;;M", "homepage": "http://www.lamda.nju.edu.cn/jiangw/?AspxAutoDetectCookieSupport=1;https://github.com/GangLii;;;https://people.tamu.edu/~tianbao-yang/publications.html", "dblp": ";;;;56/7047", "google_scholar": ";;;;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Wei_Jiang8;~Gang_Li17;~Yibo_Wang2;~Lijun_Zhang1;~Tianbao_Yang1", "aff": "Nanjing University;University of Iowa;;;University of Iowa", "aff_domain": "nju.edu.cn;uiowa.edu;;;uiowa.edu", "position": "PhD student;Researcher;;;Associate Professor", "bibtex": "@inproceedings{\njiang2022multiblocksingleprobe,\ntitle={Multi-block-Single-probe Variance Reduced Estimator for Coupled Compositional Optimization},\nauthor={Wei Jiang and Gang Li and Yibo Wang and Lijun Zhang and Tianbao Yang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=16nVkS8Twxo}\n}", "github": "", "project": "", "reviewers": "9gCU;v4Z6;qGar;YMmz", "pdf_size": 457293, "rating": "6;6;7;7", "confidence": "4;4;5;3", "soundness": "3;3;4;3", "novelty": "2;3;4;4", "presentation": "3;3;3;3", "contribution": "2;3;4;4", "wc_summary": "87;67;59;112", "wc_strengths_and_weaknesses": "163;122;135;107", "wc_questions": "323;81;41;107", "wc_limitations": "29;1;3;20", "wc_review": "602;271;238;346", "wc_reply_reviewers": "13;12;0;0", "wc_reply_authors": "522;300;160;307", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 81.25, 20.474068965401088 ], "wc_strengths_and_weaknesses_avg": [ 131.75, 20.58367071248469 ], "wc_questions_avg": [ 138.0, 109.36635680134911 ], "wc_limitations_avg": [ 13.25, 11.712706775122479 ], "wc_review_avg": [ 364.25, 142.73467518441342 ], "wc_reply_reviewers_avg": [ 6.25, 6.2599920127744575 ], "wc_reply_authors_avg": [ 322.25, 129.3761473379077 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5225669731385107049&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "nju.edu.cn;uiowa.edu;;;uiowa.edu", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Nanjing University;University of Iowa", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.uiowa.edu", "aff_unique_abbr": "Nanjing U;UIowa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Compositional generalization through abstract representations in human and artificial neural networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54735", "id": "177GzUAds8U", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d0241a0fb1fc9be477bdfde5e0da276a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=177GzUAds8U", "openreview": "https://openreview.net/forum?id=177GzUAds8U", "poster": "/media/PosterPDFs/NeurIPS%202022/54735.png?t=1669572274.4301274", "slides": "https://nips.cc/virtual/2022/poster/54735", "video": "https://nips.cc/virtual/2022/poster/54735", "author_site": "Takuya Ito, Tim Klinger, Doug Schultz, John Murray, Michael Cole, Mattia Rigotti", "tldr": "We study the impact of abstract representations on compositional generalization in human imaging data and simple artificial neural networks.", "abstract": "Humans have a remarkable ability to rapidly generalize to new tasks that is difficult to reproduce in artificial learning systems.\nCompositionality has been proposed as a key mechanism supporting generalization in humans, but evidence of its neural implementation and impact on behavior is still scarce. Here we study the computational properties associated with compositional generalization in both humans and artificial neural networks (ANNs) on a highly compositional task. First, we identified behavioral signatures of compositional generalization in humans, along with their neural correlates using whole-cortex functional magnetic resonance imaging (fMRI) data. Next, we designed pretraining paradigms aided by a procedure we term primitives pretraining to endow compositional task elements into ANNs. We found that ANNs with this prior knowledge had greater correspondence with human behavior and neural compositional signatures. Importantly, primitives pretraining induced abstract internal representations, excellent zero-shot generalization, and sample-efficient learning. Moreover, it gave rise to a hierarchy of abstract representations that matched human fMRI data, where sensory rule abstractions emerged in early sensory areas, and motor rule abstractions emerged in later motor areas. Our findings give empirical support to the role of compositional generalization in humans behavior, implicate abstract representations as its neural implementation, and illustrate that these representations can be embedded into ANNs by designing simple and efficient pretraining procedures.", "keywords": "neuroscience;cognition;compositionality;generalization;neural coding;abstraction;representations;human;fMRI;artificial neural networks", "primary_area": "", "supplementary_material": "/attachment/59cbf6341a6f1c85529064200cae7eec7c2249c3.pdf", "author": "Takuya Ito;Tim Klinger;Doug H Schultz;John D Murray;Michael Cole;Mattia Rigotti", "authorids": "~Takuya_Ito1;~Tim_Klinger1;~Doug_H_Schultz1;~John_D_Murray1;~Michael_Cole2;~Mattia_Rigotti1", "gender": "M;M;M;M;;", "homepage": "https://ito-takuya.github.io;https://researcher.watson.ibm.com/researcher/view.php?person=us-tklinger;;http://murraylab.yale.edu;https://www.colelab.org;http://www.matrig.net", "dblp": "32/3223;35/6223;;;;01/9816", "google_scholar": "Ym08_RMAAAAJ;dd8awr4AAAAJ;;LCAIVIUAAAAJ;;TmHt7CwAAAAJ", "orcid": "0000-0002-2060-4608;; 0000-0003-0809-9036; 0000-0003-4115-8181;;0000-0001-6466-2810", "linkedin": ";;;;;", "or_profile": "~Takuya_Ito1;~Tim_Klinger1;~Doug_H_Schultz1;~John_D_Murray1;~Michael_Cole2;~Mattia_Rigotti1", "aff": "Yale University;International Business Machines;University of Nebraska, Lincoln;Yale University;Rutgers University, Newark;International Business Machines", "aff_domain": "yale.edu;ibm.com;unl.edu;yale.edu;rutgers.edu;ibm.com", "position": "Postdoc;Research Staff Member;Assistant Professor;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nito2022compositional,\ntitle={Compositional generalization through abstract representations in human and artificial neural networks},\nauthor={Takuya Ito and Tim Klinger and Doug H Schultz and John D Murray and Michael Cole and Mattia Rigotti},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=177GzUAds8U}\n}", "github": "", "project": "", "reviewers": "h8RA;CF9z;HvQw;ETpS", "pdf_size": 1849417, "rating": "6;7;7;7", "confidence": "4;3;4;4", "soundness": "4;3;3;3", "novelty": "3;3;2;3", "presentation": "2;4;3;3", "contribution": "3;3;2;3", "wc_summary": "135;232;156;64", "wc_strengths_and_weaknesses": "150;521;355;280", "wc_questions": "238;131;373;64", "wc_limitations": "23;18;4;122", "wc_review": "546;902;888;530", "wc_reply_reviewers": "0;0;85;586", "wc_reply_authors": "1147;717;2477;1799", "reply_reviewers": "0;0;1;2", "reply_authors": "2;1;4;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 146.75, 59.87225985379206 ], "wc_strengths_and_weaknesses_avg": [ 326.5, 134.1240097819924 ], "wc_questions_avg": [ 201.5, 116.85568022137392 ], "wc_limitations_avg": [ 41.75, 46.852828089668186 ], "wc_review_avg": [ 716.5, 178.65819320702872 ], "wc_reply_reviewers_avg": [ 167.75, 243.9573477065202 ], "wc_reply_authors_avg": [ 1535.0, 666.4698042672301 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14883658921994090863&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "yale.edu;ibm.com;unl.edu;yale.edu;rutgers.edu;ibm.com", "author_num": 6, "aff_unique_index": "0;1;2;0;3;1", "aff_unique_norm": "Yale University;International Business Machines Corporation;University of Nebraska;Rutgers University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.yale.edu;https://www.ibm.com;https://www.unl.edu;https://www.rutgers.edu", "aff_unique_abbr": "Yale;IBM;UNL;Rutgers", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Lincoln;Newark", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SGAM: Building a Virtual 3D World through Simultaneous Generation and Mapping", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55439", "id": "17KCLTbRymw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/8ae9cf363ea625161f885b798c1f1f78-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=17KCLTbRymw", "openreview": "https://openreview.net/forum?id=17KCLTbRymw", "poster": "/media/PosterPDFs/NeurIPS%202022/55439.png?t=1669338177.5521307", "slides": "https://nips.cc/virtual/2022/poster/55439", "video": "https://nips.cc/virtual/2022/poster/55439", "author_site": "Yuan Shen, Wei-Chiu Ma, Shenlong Wang", "tldr": "We present a new 3D scene generation framework that simultaneously generates sensor data at novel viewpoints and builds a 3D map. ", "abstract": "We present simultaneous generation and mapping (SGAM), a novel 3D scene generation algorithm. Our goal is to produce a realistic, globally consistent 3D world on a large scale. Achieving this goal is challenging and goes beyond the capacities of existing 3D generation or video generation approaches, which fail to scale up to create large, globally consistent 3D scene structures. Towards tackling the challenges, we take a hybrid approach that integrates generative sensor model- ing with 3D reconstruction. Our proposed approach is an autoregressive generative framework that simultaneously generates sensor data at novel viewpoints and builds a 3D map at each timestamp. Given an arbitrary camera trajectory, our method repeatedly applies this generation-and-mapping process for thousands of steps, allowing us to create a gigantic virtual world. Our model can be trained from RGB-D sequences without having access to the complete 3D scene structure. The generated scenes are readily compatible with various interactive environments and rendering engines. Experiments on CLEVER and GoogleEarth datasets demon- strates ours can generate consistent, realistic, and geometrically-plausible scenes that compare favorably to existing view synthesis methods. Our project page is available at https://yshen47.github.io/sgam.", "keywords": "3D generation;mapping;view synthesis", "primary_area": "", "supplementary_material": "/attachment/be67bebb6516a7e4077b47476861c49562b9a1a0.pdf", "author": "Yuan Shen;Wei-Chiu Ma;Shenlong Wang", "authorids": "~Yuan_Shen2;~Wei-Chiu_Ma1;~Shenlong_Wang1", "gender": "M;M;M", "homepage": "https://yshen47.github.io;https://www.cs.cornell.edu/~weichiu/;https://shenlong.web.illinois.edu/", "dblp": ";151/4277;117/4842", "google_scholar": "fReKTsYAAAAJ;SVIdh6AAAAAJ;QFpswmcAAAAJ", "orcid": "0009-0001-4787-9644;;", "linkedin": "yshen47/;;shenlong-wang-3496023b", "or_profile": "~Yuan_Shen2;~Wei-Chiu_Ma1;~Shenlong_Wang1", "aff": "University of Illinois, Urbana Champaign;Massachusetts Institute of Technology;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;mit.edu;illinois.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshen2022sgam,\ntitle={{SGAM}: Building a Virtual 3D World through Simultaneous Generation and Mapping},\nauthor={Yuan Shen and Wei-Chiu Ma and Shenlong Wang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=17KCLTbRymw}\n}", "github": "", "project": "", "reviewers": "GmLE;QFY4;SFD9", "pdf_size": 2605027, "rating": "3;5;8", "confidence": "5;3;4", "soundness": "3;3;4", "novelty": "1;3;4", "presentation": "3;3;3", "contribution": "1;3;4", "wc_summary": "51;66;96", "wc_strengths_and_weaknesses": "93;391;176", "wc_questions": "9;38;74", "wc_limitations": "12;22;1", "wc_review": "165;517;347", "wc_reply_reviewers": "0;210;0", "wc_reply_authors": "469;806;425", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 71.0, 18.708286933869708 ], "wc_strengths_and_weaknesses_avg": [ 220.0, 125.57335173780568 ], "wc_questions_avg": [ 40.333333333333336, 26.587382136812362 ], "wc_limitations_avg": [ 11.666666666666666, 8.576453553512405 ], "wc_review_avg": [ 343.0, 143.7312306587078 ], "wc_reply_reviewers_avg": [ 70.0, 98.99494936611666 ], "wc_reply_authors_avg": [ 566.6666666666666, 170.18486680339382 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3973597071195132, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14555313886546111082&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "illinois.edu;mit.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://web.mit.edu", "aff_unique_abbr": "UIUC;MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "One-Inlier is First: Towards Efficient Position Encoding for Point Cloud Registration", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55075", "id": "19MmorTQhho", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2e163450c1ae3167832971e6da29f38d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=19MmorTQhho", "openreview": "https://openreview.net/forum?id=19MmorTQhho", "poster": "/media/PosterPDFs/NeurIPS%202022/55075.png?t=1669607388.4927423", "slides": "https://nips.cc/virtual/2022/poster/55075", "video": "https://nips.cc/virtual/2022/poster/55075", "author_site": "Fan Yang, Lin Guo, Zhi Chen, Wenbing Tao", "tldr": "we propose a simple but efficient position encoding for point cloud registration.", "abstract": "Transformer architecture has shown great potential for many visual tasks, including point cloud registration. As an order-aware module, position encoding plays an important role in Transformer architecture applied to point cloud registration task. In this paper, we propose OIF-PCR, a one-inlier based position encoding method for point cloud registration network. Specifically, we first find one correspondence by a differentiable optimal transport layer, and use it to normalize each point for position encoding. It can eliminate the challenges brought by the different reference frames of two point clouds, and mitigate the feature ambiguity by learning the spatial consistency. Then, we propose a joint approach for establishing correspondence and position encoding, presenting an iterative optimization process. Finally, we design a progressive way for point cloud alignment and feature learning to gradually optimize the rigid transformation. The proposed position encoding is very efficient, requiring only a small addition of memory and computing overhead. Extensive experiments demonstrate the proposed method can achieve competitive performance with the state-of-the-art methods in both indoor and outdoor scenes.", "keywords": "Point cloud registration;Position encoding;One-inlier;Joint optimization", "primary_area": "", "supplementary_material": "/attachment/0f7262bb0af9543ee9d16bbb77d648c4d1ba00b6.pdf", "author": "Fan Yang;Lin Guo;Zhi Chen;Wenbing Tao", "authorids": "~Fan_Yang40;~Lin_Guo3;~Zhi_Chen5;~Wenbing_Tao1", "gender": "M;M;M;M", "homepage": ";;;http://faculty.hust.edu.cn/taowenbing/zh_CN/index.htm", "dblp": "29/3081;;05/1539;73/188.html", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0002-1832-1940;0000-0003-4537-3145;0000-0003-4642-5728;", "linkedin": ";;;", "or_profile": "~Fan_Yang40;~Lin_Guo3;~Zhi_Chen5;~Wenbing_Tao1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2022one,\ntitle={One Inlier is First: Towards Efficient Position Encoding for Point Cloud Registration},\nauthor={Fan Yang and Lin Guo and Zhi Chen and Wenbing Tao},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=19MmorTQhho}\n}", "github": "", "project": "", "reviewers": "2PZE;dDv9;VUqY;fXaq", "pdf_size": 1122123, "rating": "5;5;6;6", "confidence": "5;5;4;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "contribution": "2;2;3;3", "wc_summary": "120;215;91;47", "wc_strengths_and_weaknesses": "162;264;139;40", "wc_questions": "179;23;221;7", "wc_limitations": "34;3;24;1", "wc_review": "495;505;475;95", "wc_reply_reviewers": "118;0;89;0", "wc_reply_authors": "1072;534;723;507", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 118.25, 61.609151105984246 ], "wc_strengths_and_weaknesses_avg": [ 151.25, 79.61587467333383 ], "wc_questions_avg": [ 107.5, 93.85494126576394 ], "wc_limitations_avg": [ 15.5, 13.97318861248212 ], "wc_review_avg": [ 392.5, 172.10098779495718 ], "wc_reply_reviewers_avg": [ 51.75, 52.75592383799188 ], "wc_reply_authors_avg": [ 709.0, 225.49611970053942 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10082377606698086046&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "1BJUwgi3ed", "title": "Controlling Confusion via Generalisation Bounds", "track": "main", "status": "Reject", "tldr": "A new type of generalisation bound providing more informative measures of performance.", "abstract": "We establish new generalisation bounds for multiclass classification by abstracting to a more general setting of discretised error types. Extending the PAC-Bayes theory, we are hence able to provide fine-grained bounds on performance for multiclass classification, as well as applications to other learning problems including discretisation of regression losses. Tractable training objectives are derived from the bounds. The bounds are uniform over all weightings of the discretised error types and thus can be used to bound weightings not foreseen at training, including the full confusion matrix in the multiclass classification case.", "keywords": "PAC-Bayes;Generalisation bounds;Multiclass classification", "primary_area": "", "supplementary_material": "/attachment/1f0a2f1849239df8c78d7d1fc5092371ac00032c.pdf", "author": "Reuben Adams;John Shawe-Taylor;Benjamin Guedj", "authorids": "~Reuben_Adams1;~John_Shawe-Taylor1;~Benjamin_Guedj1", "gender": "M;M;M", "homepage": ";;https://bguedj.github.io", "dblp": ";59/41;177/7258", "google_scholar": "xXQzg80AAAAJ;;https://scholar.google.fr/citations?user=q-JTC2sAAAAJ", "orcid": ";;0000-0003-1237-7430", "linkedin": ";;benjaminguedj/", "or_profile": "~Reuben_Adams1;~John_Shawe-Taylor1;~Benjamin_Guedj1", "aff": "University College London, University of London;University College London;University College London, University of London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "position": "PhD student;Professor;Principal Researcher", "bibtex": "@misc{\nadams2022controlling,\ntitle={Controlling Confusion via Generalisation Bounds},\nauthor={Reuben Adams and John Shawe-Taylor and Benjamin Guedj},\nyear={2022},\nurl={https://openreview.net/forum?id=1BJUwgi3ed}\n}", "github": "", "project": "", "reviewers": "9zyv;x9gX;pSUq", "site": "https://openreview.net/forum?id=1BJUwgi3ed", "pdf_size": 409531, "rating": "5;5;7", "confidence": "3;4;2", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;2", "contribution": "2;3;3", "wc_summary": "78;143;89", "wc_strengths_and_weaknesses": "131;123;244", "wc_questions": "39;52;19", "wc_limitations": "1;13;3", "wc_review": "249;331;355", "wc_reply_reviewers": "26;200;34", "wc_reply_authors": "462;911;807", "reply_reviewers": "1;1;1", "reply_authors": "2;3;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 103.33333333333333, 28.40578970718626 ], "wc_strengths_and_weaknesses_avg": [ 166.0, 55.25094267672423 ], "wc_questions_avg": [ 36.666666666666664, 13.572848714334887 ], "wc_limitations_avg": [ 5.666666666666667, 5.2493385826745405 ], "wc_review_avg": [ 311.6666666666667, 45.382326466980025 ], "wc_reply_reviewers_avg": [ 86.66666666666667, 80.20529215013737 ], "wc_reply_authors_avg": [ 726.6666666666666, 191.90333214639315 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12026432259132043576&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning Chaotic Dynamics in Dissipative Systems", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55033", "id": "1C36tFZn7sR", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6ad68277e27b42c60ac228c9859fc1a2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1C36tFZn7sR", "openreview": "https://openreview.net/forum?id=1C36tFZn7sR", "poster": "/media/PosterPDFs/NeurIPS%202022/55033.png?t=1669583772.0533006", "slides": "https://nips.cc/virtual/2022/poster/55033", "video": "https://nips.cc/virtual/2022/poster/55033", "author_site": "Zongyi Li, Miguel Liu-Schiaffini, Nikola Kovachki, Kamyar Azizzadenesheli, Burigede Liu, Kaushik Bhattacharya, Andrew Stuart, Anima Anandkumar", "tldr": "We learn the Markov operator of dissipative chaotic PDEs to predict long term statistical properties of chaos.", "abstract": "Chaotic systems are notoriously challenging to predict because of their sensitivity to perturbations and errors due to time stepping. Despite this unpredictable behavior, for many dissipative systems the statistics of the long term trajectories are governed by an invariant measure supported on a set, known as the global attractor; for many problems this set is finite dimensional, even if the state space is infinite dimensional. For Markovian systems, the statistical properties of long-term trajectories are uniquely determined by the solution operator that maps the evolution of the system over arbitrary positive time increments. In this work, we propose a machine learning framework to learn the underlying solution operator for dissipative chaotic systems, showing that the resulting learned operator accurately captures short-time trajectories and long-time statistical behavior. Using this framework, we are able to predict various statistics of the invariant measure for the turbulent Kolmogorov Flow dynamics with Reynolds numbers up to $5000$.", "keywords": "Dissipative Chaotic systems;operator learning;invariant statistics;attractor learning", "primary_area": "", "supplementary_material": "/attachment/96a582d3fce0eb55bb219bb5eb9bb6e075c273e9.zip", "author": "Zongyi Li;Miguel Liu-Schiaffini;Nikola Borislavov Kovachki;Kamyar Azizzadenesheli;Burigede Liu;Kaushik Bhattacharya;Andrew Stuart;Anima Anandkumar", "authorids": "~Zongyi_Li1;~Miguel_Liu-Schiaffini1;~Nikola_Borislavov_Kovachki1;~Kamyar_Azizzadenesheli1;~Burigede_Liu1;~Kaushik_Bhattacharya1;~Andrew_Stuart2;~Anima_Anandkumar1", "gender": "M;;M;M;M;;;F", "homepage": "https://zongyi-li.github.io;https://mliuschi.github.io/;http://www.its.caltech.edu/~nkovachk/;https://kamyar.page/;;;http://stuart.caltech.edu/index.html;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": ";332/5619;;176/5584;;;;", "google_scholar": ";LebtA84AAAAJ;;CxAS4SQAAAAJ;GMKw0g8AAAAJ;;BQwkpB0AAAAJ;bEcLezcAAAAJ", "orcid": ";0000-0001-9685-8383;;;;;;", "linkedin": ";;;;;;;anima-anandkumar-35171b1/", "or_profile": "~Zongyi_Li1;~Miguel_Liu-Schiaffini1;~Nikola_Borislavov_Kovachki1;~Kamyar_Azizzadenesheli1;~Burigede_Liu1;~Kaushik_Bhattacharya1;~Andrew_Stuart2;~anima_anandkumar1", "aff": "California Institute of Technology;California Institute of Technology;California Institute of Technology;Purdue University;University of Cambridge;California Institute of Technology;California Institute of Technology;California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu;caltech.edu;purdue.edu;cam.ac.uk;caltech.edu;caltech.edu;caltech.edu", "position": "PhD student;Undergrad student;PhD student;Assistant Professor;Assistant Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2022learning,\ntitle={Learning Chaotic Dynamics in Dissipative Systems},\nauthor={Zongyi Li and Miguel Liu-Schiaffini and Nikola Borislavov Kovachki and Kamyar Azizzadenesheli and Burigede Liu and Kaushik Bhattacharya and Andrew Stuart and Anima Anandkumar},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1C36tFZn7sR}\n}", "github": "", "project": "", "reviewers": "Hc1s;MCtx;NdmN;3spS", "pdf_size": 6233810, "rating": "5;5;7;7", "confidence": "5;3;4;4", "soundness": "3;3;3;2", "novelty": "4;3;3;2", "presentation": "3;3;4;3", "contribution": "4;3;3;2", "wc_summary": "59;46;109;88", "wc_strengths_and_weaknesses": "111;66;87;168", "wc_questions": "1235;138;542;123", "wc_limitations": "1;6;10;45", "wc_review": "1406;256;748;424", "wc_reply_reviewers": "301;92;391;0", "wc_reply_authors": "875;783;1232;324", "reply_reviewers": "1;1;3;0", "reply_authors": "2;2;4;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.5, 24.60182920028509 ], "wc_strengths_and_weaknesses_avg": [ 108.0, 38.1247950814165 ], "wc_questions_avg": [ 509.5, 451.3316408141579 ], "wc_limitations_avg": [ 15.5, 17.327723451163457 ], "wc_review_avg": [ 708.5, 439.8189968612088 ], "wc_reply_reviewers_avg": [ 196.0, 156.74980063783175 ], "wc_reply_authors_avg": [ 803.5, 323.6761498782386 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7209899140718868611&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "caltech.edu;caltech.edu;caltech.edu;purdue.edu;cam.ac.uk;caltech.edu;caltech.edu;caltech.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;2;0;0;0", "aff_unique_norm": "California Institute of Technology;Purdue University;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.caltech.edu;https://www.purdue.edu;https://www.cam.ac.uk", "aff_unique_abbr": "Caltech;Purdue;Cambridge", "aff_campus_unique_index": "0;0;0;2;0;0;0", "aff_campus_unique": "Pasadena;;Cambridge", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "SemiFL: Semi-Supervised Federated Learning for Unlabeled Clients with Alternate Training", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53635", "id": "1GAjC_FauE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/71c3451f6cd6a4f82bb822db25cea4fd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1GAjC_FauE", "openreview": "https://openreview.net/forum?id=1GAjC_FauE", "poster": "/media/PosterPDFs/NeurIPS%202022/53635.png?t=1669220240.8603964", "slides": "https://nips.cc/virtual/2022/poster/53635", "video": "https://nips.cc/virtual/2022/poster/53635", "author_site": "Enmao Diao, Jie Ding, Vahid Tarokh", "tldr": "We propose SemiFL to address the problem of combining communication efficient Federated Learning like FedAvg with Semi-Supervised Learning.", "abstract": "Federated Learning allows the training of machine learning models by using the computation and private data resources of many distributed clients. Most existing results on Federated Learning (FL) assume the clients have ground-truth labels. However, in many practical scenarios, clients may be unable to label task-specific data due to a lack of expertise or resource. We propose SemiFL to address the problem of combining communication-efficient FL such as FedAvg with Semi-Supervised Learning (SSL). In SemiFL, clients have completely unlabeled data and can train multiple local epochs to reduce communication costs, while the server has a small amount of labeled data. We provide a theoretical understanding of the success of data augmentation-based SSL methods to illustrate the bottleneck of a vanilla combination of communication-efficient FL with SSL. To address this issue, we propose alternate training to 'fine-tune global model with labeled data' and 'generate pseudo-labels with the global model.' We conduct extensive experiments and demonstrate that our approach significantly improves the performance of a labeled server with unlabeled clients training with multiple local epochs. Moreover, our method outperforms many existing SSFL baselines and performs competitively with the state-of-the-art FL and SSL results.", "keywords": "Federated Learning;Semi-Supervised Learning", "primary_area": "", "supplementary_material": "/attachment/2829dfe8ed4b4d2785df7e49f44c699db7eef0a4.zip", "author": "Enmao Diao;Jie Ding;Vahid Tarokh", "authorids": "~Enmao_Diao1;~Jie_Ding2;~Vahid_Tarokh1", "gender": "M;M;", "homepage": "https://diaoenmao.com/;http://jding.org;", "dblp": "226/5549;94/1825-2;", "google_scholar": "jhVVjF4AAAAJ;ZyqvoqcAAAAJ;", "orcid": "0000-0002-9151-7990;;", "linkedin": "enmaodiao/;;", "or_profile": "~Enmao_Diao1;~Jie_Ding2;~Vahid_Tarokh1", "aff": "Duke University;University of Minnesota, Minneapolis;", "aff_domain": "duke.edu;umn.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\ndiao2022semifl,\ntitle={Semi{FL}: Semi-Supervised Federated Learning for Unlabeled Clients with Alternate Training},\nauthor={Enmao Diao and Jie Ding and Vahid Tarokh},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1GAjC_FauE}\n}", "github": "", "project": "", "reviewers": "Y1ff;rcxY;sMTb;zPN3", "pdf_size": 2172906, "rating": "3;3;5;5", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "2;2;2;3", "contribution": "2;2;2;3", "wc_summary": "50;150;71;141", "wc_strengths_and_weaknesses": "58;266;130;139", "wc_questions": "103;202;134;116", "wc_limitations": "55;12;6;1", "wc_review": "266;630;341;397", "wc_reply_reviewers": "0;0;15;172", "wc_reply_authors": "984;1152;526;1142", "reply_reviewers": "0;0;1;1", "reply_authors": "4;4;3;4", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 103.0, 43.260836792646536 ], "wc_strengths_and_weaknesses_avg": [ 148.25, 74.88115584043825 ], "wc_questions_avg": [ 138.75, 38.140365755980895 ], "wc_limitations_avg": [ 18.5, 21.43011899173684 ], "wc_review_avg": [ 408.5, 136.06707904559428 ], "wc_reply_reviewers_avg": [ 46.75, 72.57194705945267 ], "wc_reply_authors_avg": [ 951.0, 254.26167623139747 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18076014455082772882&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 9, "email": "duke.edu;umn.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Duke University;University of Minnesota", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.minnesota.edu", "aff_unique_abbr": "Duke;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Minneapolis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Better Evaluation for Dynamic Link Prediction", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55630", "id": "1GVpwr2Tfdg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d49042a5d49818711c401d34172f9900-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1GVpwr2Tfdg", "openreview": "https://openreview.net/forum?id=1GVpwr2Tfdg", "poster": "/media/PosterPDFs/NeurIPS%202022/55630.png?t=1669588728.2531986", "slides": "https://nips.cc/virtual/2022/poster/55630", "video": "https://nips.cc/virtual/2022/poster/55630", "author_site": "Farimah Poursafaei, Shenyang Huang, Kellin Pelrine, Reihaneh Rabbany", "tldr": "In this paper we proposed tools to improve evaluation of dynamic link prediction including new datasets, new negative sampling strategies, and a strong baseline.", "abstract": "Despite the prevalence of recent success in learning from static graphs, learning from time-evolving graphs remains an open challenge. In this work, we design new, more stringent evaluation procedures for link prediction specific to dynamic graphs, which reflect real-world considerations, to better compare the strengths and weaknesses of methods. First, we create two visualization techniques to understand the reoccurring patterns of edges over time and show that many edges reoccur at later time steps. Based on this observation, we propose a pure memorization-based baseline called EdgeBank. EdgeBank achieves surprisingly strong performance across multiple settings which highlights that the negative edges used in the current evaluation are easy. To sample more challenging negative edges, we introduce two novel negative sampling strategies that improve robustness and better match real-world applications. Lastly, we introduce six new dynamic graph datasets from a diverse set of domains missing from current benchmarks, providing new challenges and opportunities for future research. Our code repository is accessible at https://github.com/fpour/DGB.git.", "keywords": "dynamic link prediction;evaluation;dynamic graph representation learning", "primary_area": "", "supplementary_material": "/attachment/f3e9b2fa12a5021672b440fb03dda39d9d551f59.pdf", "author": "Farimah Poursafaei;Andy Huang;Kellin Pelrine;Reihaneh Rabbany", "authorids": "~Farimah_Poursafaei1;~Andy_Huang1;~Kellin_Pelrine1;~Reihaneh_Rabbany1", "gender": "F;M;;F", "homepage": ";https://shenyanghuang.github.io/;https://kellinpelrine.github.io/;http://www.reirab.com/", "dblp": "277/0215;249/2209;281/0602;94/9024", "google_scholar": "https://scholar.google.ca/citations?user=gZ7HEsMAAAAJ;ljIXv6kAAAAJ;_s2HT_0AAAAJ;https://scholar.google.ca/citations?user=Foh_c-QAAAAJ", "orcid": ";;;", "linkedin": "farimah-poursafaei-133195167/?originalSubdomain=ca;;kellin-pelrine/;", "or_profile": "~Farimah_Poursafaei1;~Andy_Huang1;~Kellin_Pelrine1;~Reihaneh_Rabbany1", "aff": "McGill University;McGill University, Mila;McGill University;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "mcgill.ca;mcgill.ca;mcgill.ca;mila.umontreal.ca", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npoursafaei2022towards,\ntitle={Towards Better Evaluation for Dynamic Link Prediction},\nauthor={Farimah Poursafaei and Andy Huang and Kellin Pelrine and Reihaneh Rabbany},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=1GVpwr2Tfdg}\n}", "github": "", "project": "", "reviewers": "jFGW;Gob7;yWmH;3Auu;WjMG", "pdf_size": 840919, "rating": "6;6;6;7;7", "confidence": "3;3;5;4;4", "wc_summary_and_contributions": "100;97;112;129;58", "wc_strengths": "40;68;45;68;59", "wc_weaknesses": "215;350;52;74;95", "wc_correctness": "1;8;10;13;102", "wc_clarity": "1;39;9;29;8", "wc_relation_to_prior_work": "1;4;15;11;6", "wc_documentation": "1;10;25;62;139", "wc_additional_feedback": "37;1;84;112;51", "wc_review": "396;577;352;498;518", "wc_reply_reviewers": "0;0;0;10;45", "wc_reply_authors": "538;464;492;864;811", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 99.2, 23.472537144501445 ], "wc_strengths_avg": [ 56.0, 11.610340218959994 ], "wc_weaknesses_avg": [ 157.2, 111.68777909869996 ], "wc_correctness_avg": [ 26.8, 37.806877681183884 ], "wc_clarity_avg": [ 17.2, 14.344336861632888 ], "wc_relation_to_prior_work_avg": [ 7.4, 5.0039984012787215 ], "wc_documentation_avg": [ 47.4, 50.31341769349405 ], "wc_additional_feedback_avg": [ 57.0, 38.277930978567795 ], "wc_review_avg": [ 468.2, 82.3660124080315 ], "wc_reply_reviewers_avg": [ 11.0, 17.435595774162696 ], "wc_reply_authors_avg": [ 633.8, 168.82464275099179 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2182178902359924, "gs_citation": 127, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2464517726378679836&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "mcgill.ca;mcgill.ca;mcgill.ca;mila.umontreal.ca", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "McGill University;University of Montreal", "aff_unique_dep": ";Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.mcgill.ca;https://www.umontreal.ca", "aff_unique_abbr": "McGill;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "A Spectral Approach to Item Response Theory", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55254", "id": "1ItkxrZP0rg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/fd88ea50ca8c1973db037462f116ff99-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1ItkxrZP0rg", "openreview": "https://openreview.net/forum?id=1ItkxrZP0rg", "poster": "/media/PosterPDFs/NeurIPS%202022/55254.png?t=1669140009.3498464", "slides": "https://nips.cc/virtual/2022/poster/55254", "video": "https://nips.cc/virtual/2022/poster/55254", "author_site": "Duc Nguyen, Anderson Ye Zhang", "tldr": "We propose a new spectral method for the item estimation problem under the Rasch model, one of the most fundamental models in item response theory; our algorithm enjoys favorable theoretical guarantees and achieves competitive numerical performance.", "abstract": "The Rasch model is one of the most fundamental models in item response theory and has wide-ranging applications from education testing to recommendation systems. In a universe with $n$ users and $m$ items, the Rasch model assumes that the binary response $X_{li} \\in \\{0,1\\}$ of a user $l$ with parameter $\\theta^*_l$ to an item $i$ with parameter $\\beta^*_i$ (e.g., a user likes a movie, a student correctly solves a problem) is distributed as $\\mathbb{P}(X_{li}=1) = 1/(1 + \\exp(-(\\theta^*_l - \\beta^*_i)))$. In this paper, we propose a new item estimation algorithm for this celebrated model (i.e., to estimate $\\beta^*$). The core of our algorithm is the computation of the stationary distribution of a Markov chain defined on an item-item graph. We complement our algorithmic contributions with finite-sample error guarantees, the first of their kind in the literature, showing that our algorithm is consistent and enjoys favorable optimality properties. We discuss practical modifications to accelerate and robustify the algorithm that practitioners can adopt. Experiments on synthetic and real-life datasets, ranging from small education testing datasets to large recommendation systems datasets show that our algorithm is scalable, accurate, and competitive with the most commonly used methods in the literature.", "keywords": "item response theory;education testing;recommendation systems;Rasch model;spectral method", "primary_area": "", "supplementary_material": "/attachment/57b2e07c7d8226ddba925d06fa0c9651704122d3.zip", "author": "Duc Nguyen;Anderson Ye Zhang", "authorids": "~Duc_Nguyen3;~Anderson_Ye_Zhang1", "gender": "M;", "homepage": "https://dnguyen1196.github.io/;", "dblp": ";", "google_scholar": "ELbDvOsAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Duc_Nguyen3;~Anderson_Ye_Zhang1", "aff": "University of Pennsylvania;", "aff_domain": "seas.upenn.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nnguyen2022a,\ntitle={A Spectral Approach to Item Response Theory},\nauthor={Duc Nguyen and Anderson Ye Zhang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1ItkxrZP0rg}\n}", "github": "", "project": "", "reviewers": "L9zk;d3YA;evfB;LyYF", "pdf_size": 409479, "rating": "6;6;7;8", "confidence": "4;2;4;4", "soundness": "3;3;4;3", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "contribution": "3;3;3;4", "wc_summary": "66;65;126;65", "wc_strengths_and_weaknesses": "161;59;638;508", "wc_questions": "355;22;126;50", "wc_limitations": "9;120;102;4", "wc_review": "591;266;992;627", "wc_reply_reviewers": "0;0;68;38", "wc_reply_authors": "643;172;517;519", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 26.272609310839304 ], "wc_strengths_and_weaknesses_avg": [ 341.5, 238.75772238819837 ], "wc_questions_avg": [ 138.25, 130.79827024850138 ], "wc_limitations_avg": [ 58.75, 52.66580959218229 ], "wc_review_avg": [ 619.0, 257.18961876405507 ], "wc_reply_reviewers_avg": [ 26.5, 28.543825952384168 ], "wc_reply_authors_avg": [ 462.75, 175.45138215471545 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14601294678619289268&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "seas.upenn.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Learning to Sample and Aggregate: Few-shot Reasoning over Temporal Knowledge Graphs", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53963", "id": "1LmgISIDZJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6b295b08549c0441914e391651423477-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1LmgISIDZJ", "openreview": "https://openreview.net/forum?id=1LmgISIDZJ", "poster": "/media/PosterPDFs/NeurIPS%202022/53963.png?t=1669738157.0250657", "slides": "https://nips.cc/virtual/2022/poster/53963", "video": "https://nips.cc/virtual/2022/poster/53963", "author_site": "Ruijie Wang, Zheng Li, Dachun Sun, Shengzhong Liu, Jinning Li, Bing Yin, Tarek Abdelzaher", "tldr": "In this paper, we propose a temporal meta-learning framework MetaTKGR to tackle a practical but still less investigated few-shot temporal knowledge graph reasoning task.", "abstract": "In this paper, we investigate a realistic but underexplored problem, called few-shot temporal knowledge graph reasoning, that aims to predict future facts for newly emerging entities based on extremely limited observations in evolving graphs. It offers practical value in applications that need to derive instant new knowledge about new entities in temporal knowledge graphs (TKGs) with minimal supervision. The challenges mainly come from the few-shot and time shift properties of new entities. First, the limited observations associated with them are insufficient for training a model from scratch. Second, the potentially dynamic distributions from the initially observable facts to the future facts ask for explicitly modeling the evolving characteristics of new entities. We correspondingly propose a novel Meta Temporal Knowledge Graph Reasoning (MetaTKGR) framework. Unlike prior work that relies on rigid neighborhood aggregation schemes to enhance low-data entity representation, MetaTKGR dynamically adjusts the strategies of sampling and aggregating neighbors from recent facts for new entities, through temporally supervised signals on future facts as instant feedback. Besides, such a meta temporal reasoning procedure goes beyond existing meta-learning paradigms on static knowledge graphs that fail to handle temporal adaptation with large entity variance. We further provide a theoretical analysis and propose a temporal adaptation regularizer to stabilize the meta temporal reasoning over time. Empirically, extensive experiments on three real-world TKGs demonstrate the superiority of MetaTKGR over eight state-of-the-art baselines by a large margin.", "keywords": "Few-shot knowledge graph reasoning;Temporal knowledge graph;Meta learning", "primary_area": "", "supplementary_material": "/attachment/99edc4018c36c91d67837f96fad75a25c81eb104.pdf", "author": "Ruijie Wang;zheng li;Dachun Sun;Shengzhong Liu;Jinning Li;Bing Yin;Tarek Abdelzaher", "authorids": "~Ruijie_Wang2;~zheng_li4;~Dachun_Sun1;~Shengzhong_Liu1;~Jinning_Li2;~Bing_Yin1;~Tarek_Abdelzaher1", "gender": "M;M;M;M;M;M;M", "homepage": "https://wjerry5.github.io;https://hsqmlzno1.github.io/;https://dsun9.github.io/;https://liushengzhong1023.github.io/;https://jinningli.cn;;http://abdelzaher.cs.illinois.edu/", "dblp": "57/5759-4;10/1143-18;262/6139.html;166/5424;211/7889-1;;a/TarekFAbdelzaher", "google_scholar": "S1TuNNIAAAAJ;https://scholar.google.com.hk/citations?user=P6fwn4AAAAAJ;2Rl25vkAAAAJ;REzrIucAAAAJ;ED8QSJwAAAAJ;qSOxydEAAAAJ;https://scholar.google.com.tw/citations?user=cA28Zs0AAAAJ", "orcid": ";;0000-0003-4000-2783;;0000-0003-1927-9999;0000-0002-5890-0031;0000-0003-3883-7220", "linkedin": ";;dachun-sun-3b3ba9126/;;jinning-li-343168162/;bingyin;tarek-abdelzaher-0216071/", "or_profile": "~Ruijie_Wang2;~zheng_li4;~Dachun_Sun1;~Shengzhong_Liu1;~Jinning_Li2;~Bing_Yin1;~Tarek_Abdelzaher1", "aff": "University of Illinois, Urbana-Champaign;Amazon;University of Illinois Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Amazon;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;amazon.com;cs.illinois.edu;illinois.edu;illinois.edu;amazon.com;illinois.edu", "position": "PhD student;Researcher;PhD student;Postdoc;PhD student;Senior Science Manager;Full Professor", "bibtex": "@inproceedings{\nwang2022learning,\ntitle={Learning to Sample and Aggregate: Few-shot Reasoning over Temporal Knowledge Graphs},\nauthor={Ruijie Wang and zheng li and Dachun Sun and Shengzhong Liu and Jinning Li and Bing Yin and Tarek Abdelzaher},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1LmgISIDZJ}\n}", "github": "", "project": "", "reviewers": "KWLf;NqGR;oR3L", "pdf_size": 901424, "rating": "5;6;7", "confidence": "3;2;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;4;3", "contribution": "3;3;3", "wc_summary": "74;112;188", "wc_strengths_and_weaknesses": "55;70;156", "wc_questions": "13;1;207", "wc_limitations": "1;38;1", "wc_review": "143;221;552", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "578;1345;1381", "reply_reviewers": "0;0;0", "reply_authors": "2;3;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 124.66666666666667, 47.39432689913659 ], "wc_strengths_and_weaknesses_avg": [ 93.66666666666667, 44.4996878890428 ], "wc_questions_avg": [ 73.66666666666667, 94.40809758113383 ], "wc_limitations_avg": [ 13.333333333333334, 17.441967269268172 ], "wc_review_avg": [ 305.3333333333333, 177.3026289207868 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1101.3333333333333, 370.34428426653074 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4938605183361197569&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "uiuc.edu;amazon.com;cs.illinois.edu;illinois.edu;illinois.edu;amazon.com;illinois.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;2;1;2", "aff_unique_norm": "University of Illinois;Amazon;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://illinois.edu;https://www.amazon.com;https://illinois.edu", "aff_unique_abbr": "UIUC;Amazon;UIUC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A gradient estimator via L1-randomization for online zero-order optimization with two point feedback", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53449", "id": "1PRnYiuJkQx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/329ef22fd8cb68223d5df09a037f7dd9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1PRnYiuJkQx", "openreview": "https://openreview.net/forum?id=1PRnYiuJkQx", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53449", "video": "https://nips.cc/virtual/2022/poster/53449", "author_site": "Arya Akhavan, Evgenii Chzhen, Massimiliano Pontil, Alexandre Tsybakov", "tldr": "We propose a new gradient estimator for zero-order optimisation and study its theoretical and practical aspects", "abstract": "This work studies online zero-order optimization of convex and Lipschitz functions. We present a novel gradient estimator based on two function evaluations and randomization on the $\\ell_1$-sphere. Considering different geometries of feasible sets and Lipschitz assumptions we analyse online dual averaging algorithm with our estimator in place of the usual gradient. We consider two types of assumptions on the noise of the zero-order oracle: canceling noise and adversarial noise. We provide an anytime and completely data-driven algorithm, which is adaptive to all parameters of the problem. In the case of canceling noise that was previously studied in the literature, our guarantees are either comparable or better than state-of-the-art bounds obtained by~\\citet{duchi2015} and \\citet{Shamir17} for non-adaptive algorithms. Our analysis is based on deriving a new weighted Poincar\u00e9 type inequality for the uniform measure on the $\\ell_1$-sphere with explicit constants, which may be of independent interest.", "keywords": "zero-order optimization;online learning", "primary_area": "", "supplementary_material": "/attachment/e74a8312caa503922026abd30d355983f10288f6.pdf", "author": "Arya Akhavan;Evgenii E Chzhen;Massimiliano Pontil;Alexandre Tsybakov", "authorids": "~Arya_Akhavan1;~Evgenii_E_Chzhen1;~Massimiliano_Pontil4;~Alexandre_Tsybakov1", "gender": "M;M;Not Specified;M", "homepage": "https://aryaakhavan.github.io;https://echzhen.com;https://www.iit.it/web/computational-statistics-and-machine-learning;http://www.crest.fr/ses.php?user=2891", "dblp": "267/5626.html;198/1158;;", "google_scholar": "lopam2wAAAAJ;;lcOacs8AAAAJ;", "orcid": ";;0000-0001-9415-098X;", "linkedin": ";;;", "or_profile": "~Arya_Akhavan1;~Evgenii_E_Chzhen1;~Massimiliano_Pontil4;~Alexandre_Tsybakov1", "aff": "IIT, Istituto Italiano di Tecnologia;CNRS/University Paris-Saclay;University College London, University of London;", "aff_domain": "iit.it;universite-paris-saclay.fr;ucl.ac.uk;", "position": "PhD student;Researcher;Full Professor;", "bibtex": "@inproceedings{\nakhavan2022a,\ntitle={A gradient estimator via L1-randomization for online zero-order optimization with two point feedback},\nauthor={Arya Akhavan and Evgenii E Chzhen and Massimiliano Pontil and Alexandre Tsybakov},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1PRnYiuJkQx}\n}", "github": "", "project": "", "reviewers": "Kfmr;ewZu;Evju", "pdf_size": 1161979, "rating": "6;7;7", "confidence": "4;3;3", "soundness": "4;3;3", "novelty": "2;3;3", "presentation": "4;3;4", "contribution": "2;3;3", "wc_summary": "123;54;132", "wc_strengths_and_weaknesses": "78;88;353", "wc_questions": "93;47;104", "wc_limitations": "1;13;43", "wc_review": "295;202;632", "wc_reply_reviewers": "0;0;211", "wc_reply_authors": "197;141;537", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 103.0, 34.84250278036869 ], "wc_strengths_and_weaknesses_avg": [ 173.0, 127.34467663262043 ], "wc_questions_avg": [ 81.33333333333333, 24.689178916188272 ], "wc_limitations_avg": [ 19.0, 17.663521732655695 ], "wc_review_avg": [ 376.3333333333333, 184.7274268272641 ], "wc_reply_reviewers_avg": [ 70.33333333333333, 99.46635388690768 ], "wc_reply_authors_avg": [ 291.6666666666667, 174.97682386215865 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11605972809917948955&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 11, "email": "iit.it;universite-paris-saclay.fr;ucl.ac.uk;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Istituto Italiano di Tecnologia;University Paris-Saclay;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iit.it;https://www.universite-paris-saclay.fr;https://www.ucl.ac.uk", "aff_unique_abbr": "IIT;Paris-Saclay;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Italy;France;United Kingdom" }, { "title": "AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54012", "id": "1Re5RKwpieG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/28f699175783a2c828ae74d53dd3da20-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1Re5RKwpieG", "openreview": "https://openreview.net/forum?id=1Re5RKwpieG", "poster": "/media/PosterPDFs/NeurIPS%202022/54012.png?t=1669658382.4302342", "slides": "https://nips.cc/virtual/2022/poster/54012", "video": "https://nips.cc/virtual/2022/poster/54012", "author_site": "Sudipta Paul, Amit Roy-Chowdhury, Anoop Cherian", "tldr": "What if an audio-visual navigation agent can interact with a human for guidance when it is confused? ", "abstract": "Recent years have seen embodied visual navigation advance in two distinct directions: (i) in equipping the AI agent to follow natural language instructions, and (ii) in making the navigable world multimodal, e.g., audio-visual navigation. However, the real world is not only multimodal, but also often complex, and thus in spite of these advances, agents still need to understand the uncertainty in their actions and seek instructions to navigate. To this end, we present AVLEN -- an interactive agent for Audio-Visual-Language Embodied Navigation. Similar to audio-visual navigation tasks, the goal of our embodied agent is to localize an audio event via navigating the 3D visual world; however, the agent may also seek help from a human (oracle), where the assistance is provided in free-form natural language. To realize these abilities, AVLEN uses a multimodal hierarchical reinforcement learning backbone that learns: (a) high-level policies to choose either audio-cues for navigation or to query the oracle, and (b) lower-level policies to select navigation actions based on its audio-visual and language inputs. The policies are trained via rewarding for the success on the navigation task while minimizing the number of queries to the oracle. To empirically evaluate AVLEN, we present experiments on the SoundSpaces framework for semantic audio-visual navigation tasks. Our results show that equipping the agent to ask for help leads to a clear improvement in performances, especially in challenging cases, e.g., when the sound is unheard during training or in the presence of distractor sounds.", "keywords": "audio-visual navigation;hierarchical policy learning;vision and language navigation", "primary_area": "", "supplementary_material": "/attachment/5a12e0fe0b274fafc3ecd981a44f25e403e04a34.zip", "author": "Sudipta Paul;Amit Roy-Chowdhury;Anoop Cherian", "authorids": "~Sudipta_Paul1;~Amit_Roy-Chowdhury2;~Anoop_Cherian1", "gender": "M;M;M", "homepage": ";https://vlg.engr.ucr.edu/amit;http://users.cecs.anu.edu.au/~cherian/", "dblp": "170/4205-7;c/AmitKRoyChowdhury;44/7734", "google_scholar": "https://scholar.google.com/citations?hl=en;hfgwx0oAAAAJ;https://scholar.google.com.au/citations?hl=en", "orcid": ";0000-0001-6690-9725;0000-0002-5566-0351", "linkedin": "sudipta-paul-80761b86/;;anoop-cherian-4678a04/", "or_profile": "~Sudipta_Paul1;~Amit_Roy-chowdhury1;~Anoop_Cherian2", "aff": "University of California, Riverside;University of California, Riverside;Mitsubishi Electric Research Labs", "aff_domain": "ucr.edu;ucr.edu;merl.com", "position": "PhD student;Professor;Researcher", "bibtex": "@inproceedings{\npaul2022avlen,\ntitle={{AVLEN}: Audio-Visual-Language Embodied Navigation in 3D Environments},\nauthor={Sudipta Paul and Amit Roy-Chowdhury and Anoop Cherian},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1Re5RKwpieG}\n}", "github": "", "project": "", "reviewers": "FS8x;zxx9;kLxQ;ob8D", "pdf_size": 2364309, "rating": "4;5;6;7", "confidence": "5;3;3;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;4;2;3", "contribution": "2;3;3;3", "wc_summary": "34;164;95;95", "wc_strengths_and_weaknesses": "178;258;78;126", "wc_questions": "55;131;147;75", "wc_limitations": "11;159;25;11", "wc_review": "278;712;345;307", "wc_reply_reviewers": "0;0;0;16", "wc_reply_authors": "835;753;802;272", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.0, 46.00543446159377 ], "wc_strengths_and_weaknesses_avg": [ 160.0, 66.7233092704491 ], "wc_questions_avg": [ 102.0, 38.09199390948182 ], "wc_limitations_avg": [ 51.5, 62.327762674429444 ], "wc_review_avg": [ 410.5, 175.685087585714 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 665.5, 229.05294147860226 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.40451991747794525, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8292304965849883824&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 10, "email": "ucr.edu;ucr.edu;merl.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Riverside;Mitsubishi Electric Research Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucr.edu;https://www.merl.com", "aff_unique_abbr": "UCR;MERL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Riverside;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Agent Reinforcement Learning is a Sequence Modeling Problem", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54268", "id": "1W8UwXAQubL", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/69413f87e5a34897cd010ca698097d0a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1W8UwXAQubL", "openreview": "https://openreview.net/forum?id=1W8UwXAQubL", "poster": "/media/PosterPDFs/NeurIPS%202022/54268.png?t=1668867257.8939795", "slides": "https://nips.cc/virtual/2022/poster/54268", "video": "https://nips.cc/virtual/2022/poster/54268", "author_site": "Muning Wen, Jakub Kuba, Runji Lin, Weinan Zhang, Ying Wen, Jun Wang, Yaodong Yang", "tldr": "", "abstract": "Large sequence models (SM) such as GPT series and BERT have displayed outstanding performance and generalization capabilities in natural language process, vision and recently reinforcement learning. A natural follow-up question is how to abstract multi-agent decision making also as an sequence modeling problem and benefit from the prosperous development of the SMs. In this paper, we introduce a novel architecture named Multi-Agent Transformer (MAT) that effectively casts cooperative multi-agent reinforcement learning (MARL) into SM problems wherein the objective is to map agents' observation sequences to agents' optimal action sequences. Our goal is to build the bridge between MARL and SMs so that the modeling power of modern sequence models can be unleashed for MARL. Central to our MAT is an encoder-decoder architecture which leverages the multi-agent advantage decomposition theorem to transform the joint policy search problem into a sequential decision making process; this renders only linear time complexity for multi-agent problems and, most importantly, endows MAT with monotonic performance improvement guarantee. Unlike prior arts such as Decision Transformer fit only pre-collected offline data, MAT is trained by online trial and error from the environment in an on-policy fashion. To validate MAT, we conduct extensive experiments on StarCraftII, Multi-Agent MuJoCo, Dexterous Hands Manipulation, and Google Research Football benchmarks. Results demonstrate that MAT achieves superior performance and data efficiency compared to strong baselines including MAPPO and HAPPO. Furthermore, we demonstrate that MAT is an excellent few-short learner on unseen tasks regardless of changes in the number of agents.\nSee our project page at https://sites.google.com/view/multi-agent-transformer.", "keywords": "Multi-Agent Reinforcement Learning;Sequence Modeling;Transformer", "primary_area": "", "supplementary_material": "/attachment/b94c14d82cbbfeccfd89e3a9b871f154ede48d96.pdf", "author": "Muning Wen;Jakub Grudzien Kuba;Runji Lin;Weinan Zhang;Ying Wen;Jun Wang;Yaodong Yang", "authorids": "~Muning_Wen2;~Jakub_Grudzien_Kuba1;~Runji_Lin1;~Weinan_Zhang1;~Ying_Wen1;~Jun_Wang2;~Yaodong_Yang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/morning9393;http://wnzhang.net;https://yingwen.io;http://www0.cs.ucl.ac.uk/staff/jun.wang/;https://www.yangyaodong.com;;https://linprophet.github.io/", "dblp": "295/0261;28/10261-1;41/4203-1;w/JunWang12;170/1496-1;;", "google_scholar": "Zt1WFtQAAAAJ;Qzss0GEAAAAJ;_A1CxG8AAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;;", "orcid": "0009-0000-7868-1262;0000-0002-0127-2425;0000-0003-1247-2382;;0000-0001-8132-5613;;", "linkedin": ";;wenying45;;yaodong-yang;kuba-grudzie%C5%84-58039114b/;", "or_profile": "~Muning_Wen2;~Weinan_Zhang1;~Ying_Wen1;~Jun_Wang2;~Yaodong_Yang1;~Jakub_Grudzien1;~Lin_Runji1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;University College London;King's College London;University of Oxford;School of Artificial Intelligence, University of Chinese Academy of Sciences", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;kcl.ac.uk;ox.ac.uk;ia.ac.cn", "position": "PhD student;Associate Professor;Assistant Professor;Professor;Assistant Professor;MS student;MS student", "bibtex": "@inproceedings{\nwen2022multiagent,\ntitle={Multi-Agent Reinforcement Learning is a Sequence Modeling Problem},\nauthor={Muning Wen and Jakub Grudzien Kuba and Runji Lin and Weinan Zhang and Ying Wen and Jun Wang and Yaodong Yang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1W8UwXAQubL}\n}", "github": "", "project": "", "reviewers": "rnUZ;YpeW;MEiB", "pdf_size": 3118143, "rating": "6;6;7", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "3;3;2", "presentation": "4;3;3", "contribution": "3;3;2", "wc_summary": "59;91;161", "wc_strengths_and_weaknesses": "243;186;287", "wc_questions": "24;58;124", "wc_limitations": "13;12;13", "wc_review": "339;347;585", "wc_reply_reviewers": "0;48;36", "wc_reply_authors": "221;842;994", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 103.66666666666667, 42.59368758656876 ], "wc_strengths_and_weaknesses_avg": [ 238.66666666666666, 41.34677200889515 ], "wc_questions_avg": [ 68.66666666666667, 41.51572660517404 ], "wc_limitations_avg": [ 12.666666666666666, 0.4714045207910317 ], "wc_review_avg": [ 423.6666666666667, 114.1266353174792 ], "wc_reply_reviewers_avg": [ 28.0, 20.396078054371138 ], "wc_reply_authors_avg": [ 685.6666666666666, 334.3773649968284 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 238, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14170076594522259195&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;kcl.ac.uk;ox.ac.uk;ia.ac.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3;4", "aff_unique_norm": "Shanghai Jiao Tong University;University College London;King's College London;University of Oxford;University of Chinese Academy of Sciences", "aff_unique_dep": ";;;;School of Artificial Intelligence", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ucl.ac.uk;https://www.kcl.ac.uk;https://www.ox.ac.uk;http://www.ucas.ac.cn", "aff_unique_abbr": "SJTU;UCL;KCL;Oxford;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Split-kl and PAC-Bayes-split-kl Inequalities for Ternary Random Variables", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54997", "id": "1WZyphXPLwC", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/49ffa271264808cf500ea528ed8ec9b3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1WZyphXPLwC", "openreview": "https://openreview.net/forum?id=1WZyphXPLwC", "poster": "/media/PosterPDFs/NeurIPS%202022/81b073de9370ea873f548e31b8adc081.png?t=1667463039.080555", "slides": "https://nips.cc/virtual/2022/poster/54997", "video": "https://nips.cc/virtual/2022/poster/54997", "author_site": "Yi-Shan Wu, Yevgeny Seldin", "tldr": "", "abstract": "We present a new concentration of measure inequality for sums of independent bounded random variables, which we name a split-kl inequality. The inequality combines the combinatorial power of the kl inequality with ability to exploit low variance. While for Bernoulli random variables the kl inequality is tighter than the Empirical Bernstein, for random variables taking values inside a bounded interval and having low variance the Empirical Bernstein inequality is tighter than the kl. The proposed split-kl inequality yields the best of both worlds. We discuss an application of the split-kl inequality to bounding excess losses. We also derive a PAC-Bayes-split-kl inequality and use a synthetic example and several UCI datasets to compare it with the PAC-Bayes-kl, PAC-Bayes Empirical Bernstein, PAC-Bayes Unexpected Bernstein, and PAC-Bayes Empirical Bennett inequalities.", "keywords": "Concentration Inequalities;Ternary Random Variables;PAC-Bayes Analysis;Learning Theory", "primary_area": "", "supplementary_material": "/attachment/80ade26053c7820665a9cc0b8ac518c17969a2f8.pdf", "author": "Yi-Shan Wu;Yevgeny Seldin", "authorids": "~Yi-Shan_Wu1;~Yevgeny_Seldin2", "gender": "Non-Binary;M", "homepage": "https://scholar.google.com/citations?user=IrW8ytQAAAAJ&hl=en;https://sites.google.com/site/yevgenyseldin/", "dblp": "138/4357-3;34/39", "google_scholar": "IrW8ytQAAAAJ;fpWsD9oAAAAJ", "orcid": "0000-0002-7949-0115;", "linkedin": ";", "or_profile": "~Yi-Shan_Wu1;~Yevgeny_Seldin2", "aff": "University of Copenhagen;University of Copenhagen", "aff_domain": "diku.dk;di.ku.dk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2022splitkl,\ntitle={Split-kl and {PAC}-Bayes-split-kl Inequalities for Ternary Random Variables},\nauthor={Yi-Shan Wu and Yevgeny Seldin},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1WZyphXPLwC}\n}", "github": "", "project": "", "reviewers": "nL9t;eeea;5fcJ;vwKX", "pdf_size": 741242, "rating": "4;5;6;8", "confidence": "3;4;5;4", "soundness": "3;3;3;4", "novelty": "2;2;2;4", "presentation": "3;3;4;4", "contribution": "2;2;2;4", "wc_summary": "93;120;365;153", "wc_strengths_and_weaknesses": "190;171;361;110", "wc_questions": "155;34;82;3", "wc_limitations": "88;5;9;3", "wc_review": "526;330;817;269", "wc_reply_reviewers": "387;218;0;0", "wc_reply_authors": "918;913;355;21", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 182.75, 107.34611078190025 ], "wc_strengths_and_weaknesses_avg": [ 208.0, 93.1477321248349 ], "wc_questions_avg": [ 68.5, 57.325823151525704 ], "wc_limitations_avg": [ 26.25, 35.716767770894386 ], "wc_review_avg": [ 485.5, 213.64983032991157 ], "wc_reply_reviewers_avg": [ 151.25, 162.62437547920052 ], "wc_reply_authors_avg": [ 551.75, 382.4417439297128 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.47809144373375745, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12394829187156343702&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "diku.dk;di.ku.dk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "title": "Nearly-Tight Bounds for Testing Histogram Distributions", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53628", "id": "1X5zpwWoHwu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/cd51b67dcb19db4e9f0022f500076b00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1X5zpwWoHwu", "openreview": "https://openreview.net/forum?id=1X5zpwWoHwu", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53628", "video": "https://nips.cc/virtual/2022/poster/53628", "author_site": "Cl\u00e9ment L Canonne, Ilias Diakonikolas, Daniel Kane, Sihan Liu", "tldr": "We provide nearly optimal sample complexity upper and lower bounds for testing whether a probability distribution is a histogram.", "abstract": "We investigate the problem of testing whether a discrete probability distribution over an ordered domain is a histogram on a specified number of bins. One of the most common tools for the succinct approximation of data, $k$-histograms over $[n]$, are probability distributions that are piecewise constant over a set of $k$ intervals. Given samples from an unknown distribution $\\mathbf p$ on $[n]$, we want to distinguish between the cases that $\\mathbf p$ is a $k$-histogram versus far from any $k$-histogram, in total variation distance. Our main result is a sample near-optimal and computationally efficient algorithm for this testing problem, and a nearly-matching (within logarithmic factors) sample complexity lower bound, showing that the testing problem has sample complexity $\\widetilde \\Theta (\\sqrt{nk} / \\epsilon + k / \\epsilon^2 + \\sqrt{n} / \\epsilon^2)$.", "keywords": "distribution testing;histograms;binning;probability distributions;lower bounds;sub-linear algorithms", "primary_area": "", "supplementary_material": "/attachment/a70d7f963dd615d6dd91349a658bc4cebc3d57d6.pdf", "author": "Clement Louis Canonne;Ilias Diakonikolas;Daniel Kane;Sihan Liu", "authorids": "~Clement_Louis_Canonne1;~Ilias_Diakonikolas1;~Daniel_Kane1;~Sihan_Liu2", "gender": "M;M;M;M", "homepage": "https://ccanonne.github.io/;http://www.iliasdiakonikolas.org/;http://cseweb.ucsd.edu/~dakane/;https://lteins.github.io/", "dblp": "28/9840L;d/IliasDiakonikolas;52/6817;", "google_scholar": "u_OXsBIAAAAJ;Vb3FLmkAAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;eq7JPDgAAAAJ", "orcid": "0000-0001-7153-5211;;;", "linkedin": ";;;", "or_profile": "~Clement_Louis_Canonne1;~Ilias_Diakonikolas1;~Daniel_Kane1;~Sihan_Liu2", "aff": "University of Sydney;University of Wisconsin, Madison;University of California, San Diego;Computer Science and Engineering Department, University of California, San Diego", "aff_domain": "sydney.edu.au;wisc.edu;ucsd.edu;cse.ucsd.edu", "position": "Lecturer;Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ncanonne2022nearlytight,\ntitle={Nearly-Tight Bounds for Testing Histogram Distributions},\nauthor={Clement Louis Canonne and Ilias Diakonikolas and Daniel Kane and Sihan Liu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1X5zpwWoHwu}\n}", "github": "", "project": "", "reviewers": "YQdn;oQwe;VmGT;ua2e", "pdf_size": 445612, "rating": "6;7;8;8", "confidence": "3;3;4;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "contribution": "3;3;3;4", "wc_summary": "38;179;268;188", "wc_strengths_and_weaknesses": "238;116;55;61", "wc_questions": "136;44;1;123", "wc_limitations": "1;26;1;10", "wc_review": "413;365;325;382", "wc_reply_reviewers": "0;0;0;9", "wc_reply_authors": "173;126;152;183", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 168.25, 82.79605968885234 ], "wc_strengths_and_weaknesses_avg": [ 117.5, 73.52040533076514 ], "wc_questions_avg": [ 76.0, 55.80770556114989 ], "wc_limitations_avg": [ 9.5, 10.21028892833107 ], "wc_review_avg": [ 371.25, 31.767711595266032 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 158.5, 21.84605227495348 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2561525479920594895&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "sydney.edu.au;wisc.edu;ucsd.edu;cse.ucsd.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Sydney;University of Wisconsin;University of California, San Diego", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sydney.edu.au;https://www.wisc.edu;https://www.ucsd.edu", "aff_unique_abbr": "USYD;UW;UCSD", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Madison;San Diego", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Australia;United States" }, { "title": "GAGA: Deciphering Age-path of Generalized Self-paced Regularizer", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54400", "id": "1Xb3eVZdWp7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/cf62e560e900f38362990b24f1c3e706-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1Xb3eVZdWp7", "openreview": "https://openreview.net/forum?id=1Xb3eVZdWp7", "poster": "/media/PosterPDFs/NeurIPS%202022/54400.png?t=1668449981.8017557", "slides": "https://nips.cc/virtual/2022/poster/54400", "video": "https://nips.cc/virtual/2022/poster/54400", "author_site": "Xingyu Qu, Diyang Li, Xiaohan Zhao, Bin Gu", "tldr": "Tracking the solution path of age parameter in self-paced learning for generalized self-paced regularizer.", "abstract": "Nowadays self-paced learning (SPL) is an important machine learning paradigm that mimics the cognitive process of humans and animals. The SPL regime involves a self-paced regularizer and a gradually increasing age parameter, which plays a key role in SPL but where to optimally terminate this process is still non-trivial to determine. A natural idea is to compute the solution path w.r.t. age parameter (i.e., age-path). However, current age-path algorithms are either limited to the simplest regularizer, or lack solid theoretical understanding as well as computational efficiency. To address this challenge, we propose a novel Generalized Age-path Algorithm (GAGA) for SPL with various self-paced regularizers based on ordinary differential equations (ODEs) and sets control, which can learn the entire solution spectrum w.r.t. a range of age parameters. To the best of our knowledge, GAGA is the first exact path-following algorithm tackling the age-path for general self-paced regularizer. Finally the algorithmic steps of classic SVM and Lasso are described in detail. We demonstrate the performance of GAGA on real-world datasets, and find considerable speedup between our algorithm and competing baselines. ", "keywords": "Self-paced Learning;Solution Path;Biconvex Optimization;Partial Optimum", "primary_area": "", "supplementary_material": "/attachment/ea5d4faa21829cac6f4a2519305567cb94f2a2ad.pdf", "author": "Xingyu Qu;Diyang Li;Xiaohan Zhao;Bin Gu", "authorids": "~Xingyu_Qu1;~Diyang_Li1;~Xiaohan_Zhao3;~Bin_Gu1", "gender": "M;M;M;M", "homepage": "https://xingyu-qu.notion.site/;;https://mbzuai.ac.ae/study/faculty/bin-gu/;https://github.com/XiaohanZhao123", "dblp": "80/8549;127/2830;29/1758-1;75/781", "google_scholar": "mFmoJAMAAAAJ;_EhwwgMAAAAJ;Vo8OgCgAAAAJ;PliLuD4AAAAJ", "orcid": ";;0000-0001-6049-1815;0009-0005-2793-3526", "linkedin": "xingyu-qu-b31278215/;;;", "or_profile": "~Xingyu_Qu1;~Diyang_Li1;~Bin_Gu1;~Zhao_Xiaohan1", "aff": "Sichuan University;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Nanjing University of Information Science and Technology", "aff_domain": "scu.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;nuist.edu.cn", "position": "Undergrad student;Intern;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nqu2022gaga,\ntitle={{GAGA}: Deciphering Age-path of Generalized Self-paced Regularizer},\nauthor={Xingyu Qu and Diyang Li and Xiaohan Zhao and Bin Gu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1Xb3eVZdWp7}\n}", "github": "", "project": "", "reviewers": "sP8f;Qt26;oJry;ZV4b", "pdf_size": 613561, "rating": "6;6;6;8", "confidence": "2;3;3;3", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "contribution": "3;3;2;3", "wc_summary": "45;42;108;127", "wc_strengths_and_weaknesses": "104;133;150;291", "wc_questions": "478;53;90;226", "wc_limitations": "17;1;2;23", "wc_review": "644;229;350;667", "wc_reply_reviewers": "28;23;0;0", "wc_reply_authors": "1249;1321;1517;775", "reply_reviewers": "1;1;0;0", "reply_authors": "3;3;4;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 37.61980861195336 ], "wc_strengths_and_weaknesses_avg": [ 169.5, 72.05032963144582 ], "wc_questions_avg": [ 211.75, 166.67089577967715 ], "wc_limitations_avg": [ 10.75, 9.496709956611289 ], "wc_review_avg": [ 472.5, 188.10967545557034 ], "wc_reply_reviewers_avg": [ 12.75, 12.871965661856 ], "wc_reply_authors_avg": [ 1215.5, 272.57797049651685 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14470576664606509488&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "scu.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;nuist.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Sichuan University;Mohamed bin Zayed University of Artificial Intelligence;Nanjing University of Information Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scu.edu.cn;https://mbzuai.ac.ae;http://www.nuist.edu.cn", "aff_unique_abbr": "SCU;MBZUAI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Teacher Forcing Recovers Reward Functions for Text Generation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53950", "id": "1_gypPuWUC3", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/51ae7d9db3423ae96cd6afeb01529819-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1_gypPuWUC3", "openreview": "https://openreview.net/forum?id=1_gypPuWUC3", "poster": "/media/PosterPDFs/NeurIPS%202022/53950.png?t=1668733368.9817529", "slides": "https://nips.cc/virtual/2022/poster/53950", "video": "https://nips.cc/virtual/2022/poster/53950", "author_site": "Yongchang Hao, Yuxin Liu, Lili Mou", "tldr": "We derive a reward function for text generation via the lens of inverse reinforcement learning.", "abstract": "Reinforcement learning (RL) has been widely used in text generation to alleviate the exposure bias issue or to utilize non-parallel datasets. The reward function plays an important role in making RL training successful. However, previous reward functions are typically task-specific and sparse, restricting the use of RL. In our work, we propose a task-agnostic approach that derives a step-wise reward function directly from a model trained with teacher forcing. We additionally propose a simple modification to stabilize the RL training on non-parallel datasets with our induced reward function. Empirical results show that our method outperforms self-training and reward regression methods on several text generation tasks, confirming the effectiveness of our reward function.\n", "keywords": "Text Generation;Natural Language Processing;Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/03fe3ce537b8dfdb0dce4f503af260d28882fd18.pdf", "author": "Yongchang Hao;Yuxin Liu;Lili Mou", "authorids": "~Yongchang_Hao1;~Yuxin_Liu6;~Lili_Mou1", "gender": "M;F;M", "homepage": "https://yongchanghao.github.io;;https://lili-mou.github.io/", "dblp": "277/4987;;", "google_scholar": "sRqHvoYAAAAJ;ZQoOjaIAAAAJ;https://scholar.google.com.hk/schhp?hl=en", "orcid": ";;", "linkedin": "yongchang-hao/;yuxin-claire-liu-ab0927192/;", "or_profile": "~Yongchang_Hao1;~Yuxin_Liu6;~Lili_Mou1", "aff": "University of Alberta;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca", "position": "MS student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhao2022teacher,\ntitle={Teacher Forcing Recovers Reward Functions for Text Generation},\nauthor={Yongchang Hao and Yuxin Liu and Lili Mou},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1_gypPuWUC3}\n}", "github": "", "project": "", "reviewers": "bSdL;iPjS;s2sH;BSmj", "pdf_size": 822812, "rating": "6;6;7;7", "confidence": "3;2;4;3", "soundness": "3;3;2;3", "novelty": "3;3;2;4", "presentation": "3;3;3;3", "contribution": "3;3;2;4", "wc_summary": "100;66;120;65", "wc_strengths_and_weaknesses": "190;67;514;82", "wc_questions": "20;272;187;115", "wc_limitations": "1;6;76;10", "wc_review": "311;411;897;272", "wc_reply_reviewers": "0;259;13;0", "wc_reply_authors": "576;1036;2406;369", "reply_reviewers": "0;2;1;0", "reply_authors": "2;3;6;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.75, 23.34925052330374 ], "wc_strengths_and_weaknesses_avg": [ 213.25, 180.00468743896644 ], "wc_questions_avg": [ 148.5, 92.69439033727986 ], "wc_limitations_avg": [ 23.25, 30.621683493890405 ], "wc_review_avg": [ 472.75, 250.13233997226348 ], "wc_reply_reviewers_avg": [ 68.0, 110.4015398443337 ], "wc_reply_authors_avg": [ 1096.75, 793.5090973013479 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8015164160931191027&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "ualberta.ca;ualberta.ca;ualberta.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Biologically Inspired Dynamic Thresholds for Spiking Neural Networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55073", "id": "1bE24ZURBqm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2858f8c8683aaa8c12d487354cf328dc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1bE24ZURBqm", "openreview": "https://openreview.net/forum?id=1bE24ZURBqm", "poster": "/media/PosterPDFs/NeurIPS%202022/55073.png?t=1669356739.938124", "slides": "https://nips.cc/virtual/2022/poster/55073", "video": "https://nips.cc/virtual/2022/poster/55073", "author_site": "Jianchuan Ding, Bo Dong, Felix Heide, Yufei Ding, Yunduo Zhou, Baocai Yin, Xin Yang", "tldr": "We propose a bioinspired dynamic threshold scheme to endow spiking neural networks (SNNs) with the bioplausible homeostasis, dramatically enhancing SNNs' generalizability in real-world robot tasks.", "abstract": "The dynamic membrane potential threshold, as one of the essential properties of a biological neuron, is a spontaneous regulation mechanism that maintains neuronal homeostasis, i.e., the constant overall spiking firing rate of a neuron. As such, the neuron firing rate is regulated by a dynamic spiking threshold, which has been extensively studied in biology. Existing work in the machine learning community does not employ bioinspired spiking threshold schemes. This work aims at bridging this gap by introducing a novel bioinspired dynamic energy-temporal threshold (BDETT) scheme for spiking neural networks (SNNs). The proposed BDETT scheme mirrors two bioplausible observations: a dynamic threshold has 1) a positive correlation with the average membrane potential and 2) a negative correlation with the preceding rate of depolarization. We validate the effectiveness of the proposed BDETT on robot obstacle avoidance and continuous control tasks under both normal conditions and various degraded conditions, including noisy observations, weights, and dynamic environments. We find that the BDETT outperforms existing static and heuristic threshold approaches by significant margins in all tested conditions, and we confirm that the proposed bioinspired dynamic threshold scheme offers homeostasis to SNNs in complex real-world tasks.", "keywords": "Spiking Neural Networks;dynamic threshold;robot obstacle avoidance;robot continuous control", "primary_area": "", "supplementary_material": "/attachment/d90f4c9749855e6a76bfd483b60fbf0457d5b591.zip", "author": "Jianchuan Ding;Bo Dong;Felix Heide;Yufei Ding;Yunduo Zhou;Baocai Yin;Xin Yang", "authorids": "~Jianchuan_Ding1;~Bo_Dong7;~Felix_Heide2;~Yufei_Ding1;~Yunduo_Zhou1;~Baocai_Yin1;~Xin_Yang8", "gender": ";M;;M;M;M;F", "homepage": ";https://dongshuhao.github.io/;https://www.cs.princeton.edu/~fheide/;;https://www.bjut.edu.cn/info/1059/1568.htm;https://xinyangdut.github.io/;https://cse.ucsd.edu/~yufeiding", "dblp": ";45/5631-4;01/9396;322/0940;;44/1152-11;127/9591", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;gRqzSHsAAAAJ;;;exfFfaAAAAAJ;MiPxo9UAAAAJ", "orcid": "0000-0003-1890-6903;0000-0001-9189-9506;;0000-0002-7817-3724;0000-0003-3121-1823;0000-0002-8046-722X;", "linkedin": ";bodong2015/;;;;;", "or_profile": "~Jianchuan_Ding1;~Bo_Dong7;~Felix_Heide2;~Yunduo_Zhou1;~Baocai_Yin1;~Xin_Yang8;~Yufei_Ding2", "aff": "Dalian University of Technology;Princeton University;Algolux;Dalian University of Technology;Beijing University of Technology;Dalian University of Technology;UC Santa Barbara", "aff_domain": "dlut.edu.cn;princeton.edu;algolux.com;dlut.edu.cn;bjut.edu.cn;dlut.edu.cn;ucsb.edu", "position": "MS student;Researcher;CTO;MS student;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nding2022biologically,\ntitle={Biologically Inspired Dynamic Thresholds for Spiking Neural Networks},\nauthor={Jianchuan Ding and Bo Dong and Felix Heide and Yufei Ding and Yunduo Zhou and Baocai Yin and Xin Yang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1bE24ZURBqm}\n}", "github": "", "project": "", "reviewers": "r3Sy;qTH8;T4kg;wR8D", "pdf_size": 6224559, "rating": "5;5;7;8", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "contribution": "2;2;3;3", "wc_summary": "139;78;58;75", "wc_strengths_and_weaknesses": "237;336;155;61", "wc_questions": "90;90;4;34", "wc_limitations": "34;1;1;14", "wc_review": "500;505;218;184", "wc_reply_reviewers": "16;38;0;0", "wc_reply_authors": "1518;793;743;576", "reply_reviewers": "1;1;0;0", "reply_authors": "4;3;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.5, 30.696090956341656 ], "wc_strengths_and_weaknesses_avg": [ 197.25, 101.46520339505558 ], "wc_questions_avg": [ 54.5, 37.050641020095725 ], "wc_limitations_avg": [ 12.5, 13.5 ], "wc_review_avg": [ 351.75, 151.23884256367475 ], "wc_reply_reviewers_avg": [ 13.5, 15.580436450882884 ], "wc_reply_authors_avg": [ 907.5, 361.51521406435995 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11286381370071225235&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 7, "email": "dlut.edu.cn;princeton.edu;algolux.com;dlut.edu.cn;bjut.edu.cn;dlut.edu.cn;ucsb.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0;4", "aff_unique_norm": "Dalian University of Technology;Princeton University;Algolux;Beijing University of Technology;University of California, Santa Barbara", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.princeton.edu;https://www.algolux.com;http://www.bjut.edu.cn;https://www.ucsb.edu", "aff_unique_abbr": "DUT;Princeton;;BJUT;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1;2;0;0;0;1", "aff_country_unique": "China;United States;Sweden" }, { "title": "Jump Self-attention: Capturing High-order Statistics in Transformers", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53207", "id": "1beC9_dmOQ0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/71ec377d5df1fc61ee7770857820519b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1beC9_dmOQ0", "openreview": "https://openreview.net/forum?id=1beC9_dmOQ0", "poster": "/media/PosterPDFs/NeurIPS%202022/53207.png?t=1669479510.54701", "slides": "https://nips.cc/virtual/2022/poster/53207", "video": "https://nips.cc/virtual/2022/poster/53207", "author_site": "Haoyi Zhou, Siyang Xiao, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li", "tldr": "Jump Self-attention", "abstract": "The recent success of Transformer has benefited many real-world applications, with its capability of building long dependency through pairwise dot-products. However, the strong assumption that elements are directly attentive to each other limits the performance of tasks with high-order dependencies such as natural language understanding and Image captioning. To solve such problems, we are the first to define the Jump Self-attention (JAT) to build Transformers. Inspired by the pieces moving of English Draughts, we introduce the spectral convolutional technique to calculate JAT on the dot-product feature map. This technique allows JAT's propagation in each self-attention head and is interchangeable with the canonical self-attention. We further develop the higher-order variants under the multi-hop assumption to increase the generality. Moreover, the proposed architecture is compatible with the pre-trained models. With extensive experiments, we empirically show that our methods significantly increase the performance on ten different tasks.", "keywords": "Neural Network;Transformer;Self-attention", "primary_area": "", "supplementary_material": "/attachment/7302fac70165c88d9d559345a41e4c0f5472dc4b.zip", "author": "Haoyi Zhou;Siyang Xiao;Shanghang Zhang;Jieqi Peng;Shuai Zhang;Jianxin Li", "authorids": "~Haoyi_Zhou1;xiaosy@act.buaa.edu.cn;~Shanghang_Zhang4;~Jieqi_Peng1;~Shuai_Zhang8;~Jianxin_Li3", "gender": "M;;;;M;M", "homepage": "https://www.zhouhaoyi.com/;;;https://github.com/cookieminions;https://scholar.google.com.sg/citations?user=VpCt3hMAAAAJ&hl=en;http://myjianxin.github.io", "dblp": "162/1287;;;;71/208-26;l/JianxinLi-2.html", "google_scholar": "mbrFlN0AAAAJ;;;;https://scholar.google.com.sg/citations?user=VpCt3hMAAAAJ;EY2lqD0AAAAJ", "orcid": "0000-0002-2393-3634;;;;0000-0001-8502-2927;0000-0001-5152-0055", "linkedin": "haoyi-zhou-54a7a69a/;;;;;", "or_profile": "~Haoyi_Zhou1;xiaosy@act.buaa.edu.cn;~Shanghang_Zhang4;~Jieqi_Peng1;~Shuai_Zhang8;~Jianxin_Li3", "aff": "Beihang University;;;Beihang University;Beihang University;Beihang University ", "aff_domain": "buaa.edu.cn;;;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "Assistant Professor;;;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhou2022jump,\ntitle={Jump Self-attention: Capturing High-order Statistics in Transformers},\nauthor={Haoyi Zhou and Siyang Xiao and Shanghang Zhang and Jieqi Peng and Shuai Zhang and Jianxin Li},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1beC9_dmOQ0}\n}", "github": "", "project": "", "reviewers": "mSLD;5zAj;MTPd;YzKV", "pdf_size": 541367, "rating": "6;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "contribution": "3;3;3;3", "wc_summary": "176;229;59;81", "wc_strengths_and_weaknesses": "191;64;137;76", "wc_questions": "14;273;75;167", "wc_limitations": "10;88;10;0", "wc_review": "391;654;281;324", "wc_reply_reviewers": "0;7;19;31", "wc_reply_authors": "712;1575;1193;662", "reply_reviewers": "0;1;1;1", "reply_authors": "1;4;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 136.25, 69.28699372898207 ], "wc_strengths_and_weaknesses_avg": [ 117.0, 50.90677754484171 ], "wc_questions_avg": [ 132.25, 97.8247795806359 ], "wc_limitations_avg": [ 27.0, 35.45419580247167 ], "wc_review_avg": [ 412.5, 144.83525123394512 ], "wc_reply_reviewers_avg": [ 14.25, 11.818946653572814 ], "wc_reply_authors_avg": [ 1035.5, 374.17275421922426 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8770771896010157409&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;;;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Brain Network Transformer", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54219", "id": "1cJ1cbA6NLN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a408234a9b80604a9cf6ca518e474550-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1cJ1cbA6NLN", "openreview": "https://openreview.net/forum?id=1cJ1cbA6NLN", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54219", "video": "https://nips.cc/virtual/2022/poster/54219", "author_site": "Xuan Kan, Wei Dai, Hejie Cui, Zilong Zhang, Ying Guo, Carl Yang", "tldr": "We study graph transformer models for brain network analysis", "abstract": "Human brains are commonly modeled as networks of Regions of Interest (ROIs) and their connections for the understanding of brain functions and mental disorders. Recently, Transformer-based models have been studied over different types of data, including graphs, shown to bring performance gains widely. In this work, we study Transformer-based models for brain network analysis. Driven by the unique properties of data, we model brain networks as graphs with nodes of fixed size and order, which allows us to (1) use connection profiles as node features to provide natural and low-cost positional information and (2) learn pair-wise connection strengths among ROIs with efficient attention weights across individuals that are predictive towards downstream analysis tasks. Moreover, we propose an Orthonormal Clustering Readout operation based on self-supervised soft clustering and orthonormal projection. This design accounts for the underlying functional modules that determine similar behaviors among groups of ROIs, leading to distinguishable cluster-aware node embeddings and informative graph embeddings. Finally, we re-standardize the evaluation pipeline on the only one publicly available large-scale brain network dataset of ABIDE, to enable meaningful comparison of different models. Experiment results show clear improvements of our proposed Brain Network Transformer on both the public ABIDE and our restricted ABCD datasets. The implementation is available at https://github.com/Wayfear/BrainNetworkTransformer.", "keywords": "Brain Network;Graph Transformer;Graph Neural Network", "primary_area": "", "supplementary_material": "/attachment/f5b0c3298f7b5b3bb17ad756fb24214d16e606a3.pdf", "author": "Xuan Kan;Wei Dai;Hejie Cui;Zilong Zhang;Ying Guo;Carl Yang", "authorids": "~Xuan_Kan1;~Wei_Dai11;~Hejie_Cui1;~Zilong_Zhang1;yguo2@emory.edu;~Carl_Yang1", "gender": ";M;F;M;;M", "homepage": "http://kanxuan.live;https://dd.works/;https://hejiecui.com/;;;https://cs.emory.edu/~jyang71/", "dblp": "211/5244;;221/7865;;;305/0254", "google_scholar": "https://scholar.google.com/citations?hl=en;N1x7v90AAAAJ;r0Vh6GEAAAAJ;;;mOINlwcAAAAJ", "orcid": ";0000-0002-1936-0407;0000-0001-6388-2619; 0000-0002-0855-656X;;0000-0001-9145-4531", "linkedin": "xuan-kan-90077782/;;hejie-cui-b1071b13b/;;;", "or_profile": "~Xuan_Kan1;~Wei_Dai11;~Hejie_Cui1;~Zilong_Zhang1;yguo2@emory.edu;~Carl_Yang1", "aff": "Emory University;Emory University;Emory University;University of International Business and Economics;;Emory University", "aff_domain": "emory.edu;emory.edu;emory.edu;uibe.edu.cn;;emory.edu", "position": "PhD student;Undergrad student;PhD student;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\nkan2022brain,\ntitle={Brain Network Transformer},\nauthor={Xuan Kan and Wei Dai and Hejie Cui and Zilong Zhang and Ying Guo and Carl Yang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1cJ1cbA6NLN}\n}", "github": "", "project": "", "reviewers": "CYN4;wuUg;baE1;xfsP", "pdf_size": 2604588, "rating": "3;6;6;7", "confidence": "5;4;2;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;3;3;3", "contribution": "1;3;3;3", "wc_summary": "59;66;50;116", "wc_strengths_and_weaknesses": "476;101;113;1504", "wc_questions": "95;122;9;35", "wc_limitations": "56;1;1;112", "wc_review": "686;290;173;1767", "wc_reply_reviewers": "0;0;28;382", "wc_reply_authors": "2530;965;611;3305", "reply_reviewers": "0;0;1;2", "reply_authors": "5;3;4;7", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 72.75, 25.606395685453272 ], "wc_strengths_and_weaknesses_avg": [ 548.5, 571.8725819621011 ], "wc_questions_avg": [ 65.25, 45.234804078275836 ], "wc_limitations_avg": [ 42.5, 45.98097432634502 ], "wc_review_avg": [ 729.0, 628.7189356143173 ], "wc_reply_reviewers_avg": [ 102.5, 161.7737617786024 ], "wc_reply_authors_avg": [ 1852.75, 1106.541995362128 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.75, 1.479019945774904 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5353033790313108, "gs_citation": 168, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10818376030441199053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "emory.edu;emory.edu;emory.edu;uibe.edu.cn;;emory.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Emory University;University of International Business and Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.emory.edu;http://www.uibe.edu.cn", "aff_unique_abbr": "Emory;UIBE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "SCL-WC: Cross-Slide Contrastive Learning for Weakly-Supervised Whole-Slide Image Classification", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54569", "id": "1fKJLRTUdo", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/726204cea3ec27790a644e5b379175e3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1fKJLRTUdo", "openreview": "https://openreview.net/forum?id=1fKJLRTUdo", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54569", "video": "https://nips.cc/virtual/2022/poster/54569", "author_site": "Xiyue Wang, Jinxi Xiang, Jun Zhang, Sen Yang, Zhongyi Yang, Ming-Hui Wang, Jing Zhang, Wei Yang, Junzhou Huang, Xiao Han", "tldr": "", "abstract": "Weakly-supervised whole-slide image (WSI) classification (WSWC) is a challenging task where a large number of unlabeled patches (instances) exist within each WSI (bag) while only a slide label is given. Despite recent progress for the multiple instance learning (MIL)-based WSI analysis, the major limitation is that it usually focuses on the easy-to-distinguish diagnosis-positive regions while ignoring positives that occupy a small ratio in the entire WSI. To obtain more discriminative features, we propose a novel weakly-supervised classification method based on cross-slide contrastive learning (called SCL-WC), which depends on task-agnostic self-supervised feature pre-extraction and task-specific weakly-supervised feature refinement and aggregation for WSI-level prediction. To enable both intra-WSI and inter-WSI information interaction, we propose a positive-negative-aware module (PNM) and a weakly-supervised cross-slide contrastive learning (WSCL) module, respectively. The WSCL aims to pull WSIs with the same disease types closer and push different WSIs away. The PNM aims to facilitate the separation of tumor-like patches and normal ones within each WSI. Extensive experiments demonstrate state-of-the-art performance of our method in three different classification tasks (e.g., over 2% of AUC in Camelyon16, 5% of F1 score in BRACS, and 3% of AUC in DiagSet). Our method also shows superior flexibility and scalability in weakly-supervised localization and semi-supervised classification experiments (e.g., first place in the BRIGHT challenge). Our code will be available at https://github.com/Xiyue-Wang/SCL-WC. \n\n", "keywords": "Histopathology;Whole slide image;Multiple instance learning;Contrastive Learning", "primary_area": "", "supplementary_material": "/attachment/b8a067300c923ac84289426ebd550858a1bff5e7.pdf", "author": "Xiyue Wang;Jinxi Xiang;Jun Zhang;Sen Yang;Zhongyi Yang;Ming-Hui Wang;Jing Zhang;Yang Wei;Junzhou Huang;Xiao Han", "authorids": "~Xiyue_Wang1;~Jinxi_Xiang1;~Jun_Zhang17;~Sen_Yang5;~Zhongyi_Yang1;~Ming-Hui_Wang1;~Jing_Zhang28;~Yang_Wei2;~Junzhou_Huang2;~Xiao_Han2", "gender": "F;M;M;;M;M;M;M;M;M", "homepage": ";https://jinxixiang.netlify.app/;https://junzhang.org;;https://github.com/YangZyyyy;https://rsmd.scu.edu.cn/info/1063/1151.htm;https://bme.scu.edu.cn/info/1090/1455.htm;;http://ranger.uta.edu/~huang/;", "dblp": ";227/4249;29/4190-18.html;;246/6510;;;03/1094-32.html;22/1170.html;01/2095-7", "google_scholar": "OxfZXwwAAAAJ;Zn-0LioAAAAJ;;I9y7C2UAAAAJ;;;;;https://scholar.google.com.tw/citations?user=X7KrguAAAAAJ;XGVV3gEAAAAJ", "orcid": ";;0000-0001-5579-7094;;;;;;0000-0002-9548-1227;", "linkedin": ";;;;;;;;;xiaohan2009", "or_profile": "~Xiyue_Wang1;~Jinxi_Xiang1;~Jun_Zhang17;~Sen_Yang5;~Zhongyi_Yang1;~Ming-Hui_Wang1;~Jing_Zhang28;~Yang_Wei2;~Junzhou_Huang2;~Xiao_Han2", "aff": "Sichuan University;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Xi'an Jiaotong University;Santa Clara University;;Tencent AI Lab;University of Texas, Arlington;Tencent AI Lab", "aff_domain": "scu.edu.cn;tencent.com;tencent.com;tencent.com;xjtu.edu.cn;scu.edu;;tencent.com;uta.edu;tencent.com", "position": "PhD student;Researcher;Principal Researcher;Researcher;MS student;Full Professor;;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nwang2022sclwc,\ntitle={{SCL}-{WC}: Cross-Slide Contrastive Learning for Weakly-Supervised Whole-Slide Image Classification},\nauthor={Xiyue Wang and Jinxi Xiang and Jun Zhang and Sen Yang and Zhongyi Yang and Ming-Hui Wang and Jing Zhang and Yang Wei and Junzhou Huang and Xiao Han},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1fKJLRTUdo}\n}", "github": "", "project": "", "reviewers": "s5WU;oepB;nwVH;B23m", "pdf_size": 1187286, "rating": "3;5;7;7", "confidence": "5;5;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;4;3", "contribution": "2;3;3;3", "wc_summary": "73;49;139;69", "wc_strengths_and_weaknesses": "346;89;116;73", "wc_questions": "90;32;90;30", "wc_limitations": "311;2;16;15", "wc_review": "820;172;361;187", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "3098;1231;563;363", "reply_reviewers": "0;0;0;0", "reply_authors": "6;2;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 33.86369737639409 ], "wc_strengths_and_weaknesses_avg": [ 156.0, 110.76777509727276 ], "wc_questions_avg": [ 60.5, 29.508473359359 ], "wc_limitations_avg": [ 86.0, 130.0211521253369 ], "wc_review_avg": [ 385.0, 261.90360822256724 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1313.75, 1079.113843623554 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8181818181818182, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13480559653074432181&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "scu.edu.cn;tencent.com;tencent.com;tencent.com;xjtu.edu.cn;scu.edu;;tencent.com;uta.edu;tencent.com", "author_num": 10, "aff_unique_index": "0;1;1;1;2;3;1;4;1", "aff_unique_norm": "Sichuan University;Tencent;Xi'an Jiao Tong University;Santa Clara University;University of Texas at Arlington", "aff_unique_dep": ";Tencent AI Lab;;;", "aff_unique_url": "https://www.scu.edu.cn;https://ai.tencent.com;https://www.xjtu.edu.cn;https://www.scu.edu;https://www.uta.edu", "aff_unique_abbr": "SCU;Tencent AI Lab;XJTU;SCU;UTA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Arlington", "aff_country_unique_index": "0;0;0;0;0;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "FLAIR: Federated Learning Annotated Image Repository", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55624", "id": "1kIZiRelqFt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f64e55d03e2fe61aa4114e49cb654acb-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=1kIZiRelqFt", "openreview": "https://openreview.net/forum?id=1kIZiRelqFt", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55624", "video": "https://nips.cc/virtual/2022/poster/55624", "author_site": "Congzheng Song, Filip Granqvist, Kunal Talwar", "tldr": "This paper describes the FLAIR dataset that we are releasing later this month to accelerate research in Federated Learning. This is a large image dataset that is heterogenous, with images grouped by Flicker users and annotated by human.", "abstract": "Cross-device federated learning is an emerging machine learning (ML) paradigm where a large population of devices collectively train an ML model while the data remains on the devices.\nThis research field has a unique set of practical challenges, and to systematically make advances, new datasets curated to be compatible with this paradigm are needed.\nExisting federated learning benchmarks in the image domain do not accurately capture the scale and heterogeneity of many real-world use cases. \nWe introduce FLAIR, a challenging large-scale annotated image dataset for multi-label classification suitable for federated learning.\nFLAIR has 429,078 images from 51,414 Flickr users and captures many of the intricacies typically encountered in federated learning, such as heterogeneous user data and a long-tailed label distribution.\nWe implement multiple baselines in different learning setups for different tasks on this dataset. \nWe believe FLAIR can serve as a challenging benchmark for advancing the state-of-the art in federated learning.\nDataset access and the code for the benchmark are available at https://github.com/apple/ml-flair.\n", "keywords": "Federated Learning;Differential Privacy;Image Classification", "primary_area": "", "supplementary_material": "/attachment/2c15e926e1f7fbd0096f8c1544ff941c2a5569eb.pdf", "author": "Congzheng Song;Filip Granqvist;Kunal Talwar", "authorids": "~Congzheng_Song2;~Filip_Granqvist1;~Kunal_Talwar1", "gender": "M;;M", "homepage": "https://csong27.github.io/;;http://www.kunaltalwar.org", "dblp": ";;06/3696", "google_scholar": "lkPKfjgAAAAJ;;XD_01h8AAAAJ", "orcid": ";;", "linkedin": ";filip-granqvist-112017149/;kunal-talwar-128a6159", "or_profile": "~Congzheng_Song2;~Filip_Granqvist1;~Kunal_Talwar1", "aff": "Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com", "position": "Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nsong2022flair,\ntitle={{FLAIR}: Federated Learning Annotated Image Repository},\nauthor={Congzheng Song and Filip Granqvist and Kunal Talwar},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=1kIZiRelqFt}\n}", "github": "", "project": "", "reviewers": "2BNV;eA2n;eJgq;XSNs;s4gW", "pdf_size": 2526770, "rating": "6;7;8;8;9", "confidence": "5;4;3;3;5", "wc_summary_and_contributions": "14;89;71;153;249", "wc_strengths": "64;46;78;81;81", "wc_weaknesses": "83;47;47;77;446", "wc_correctness": "10;17;10;104;25", "wc_clarity": "5;5;12;18;10", "wc_relation_to_prior_work": "85;56;20;9;65", "wc_documentation": "30;1;14;39;19", "wc_additional_feedback": "43;19;1;196;76", "wc_review": "334;280;253;677;971", "wc_reply_reviewers": "0;0;0;37;95", "wc_reply_authors": "404;122;282;263;476", "reply_reviewers": "0;0;0;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 7.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "wc_summary_and_contributions_avg": [ 115.2, 80.2655592393151 ], "wc_strengths_avg": [ 70.0, 13.549907748763458 ], "wc_weaknesses_avg": [ 140.0, 153.72182668703883 ], "wc_correctness_avg": [ 33.2, 35.82959670440068 ], "wc_clarity_avg": [ 10.0, 4.857983120596447 ], "wc_relation_to_prior_work_avg": [ 47.0, 28.36194633659686 ], "wc_documentation_avg": [ 20.6, 13.093509842666327 ], "wc_additional_feedback_avg": [ 67.0, 69.22138397923 ], "wc_review_avg": [ 503.0, 279.3170241857807 ], "wc_reply_reviewers_avg": [ 26.4, 37.173108559817805 ], "wc_reply_authors_avg": [ 309.4, 122.25154395753046 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.21926450482675733, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3690272585566553585&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "apple.com;apple.com;apple.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Finite-Sample Maximum Likelihood Estimation of Location", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52851", "id": "1l5hEEK_j13", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c27cfb05a2e9eb579698419b25234ffb-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1l5hEEK_j13", "openreview": "https://openreview.net/forum?id=1l5hEEK_j13", "poster": "/media/PosterPDFs/NeurIPS%202022/52851.png?t=1669355742.7378929", "slides": "https://nips.cc/virtual/2022/poster/52851", "video": "https://nips.cc/virtual/2022/poster/52851", "author_site": "Shivam Gupta, Jasper Lee, Eric Price, Paul Valiant", "tldr": "", "abstract": "We consider 1-dimensional location estimation, where we estimate a parameter $\\lambda$ from $n$ samples $\\lambda + \\eta_i$, with each $\\eta_i$ drawn i.i.d. from a known distribution $f$. For fixed $f$ the maximum-likelihood estimate (MLE) is well-known to be optimal in the limit as $n \\to \\infty$: it is asymptotically normal with variance matching the Cramer-Rao lower bound of $\\frac{1}{n\\mathcal{I}}$, where $\\mathcal{I}$ is the Fisher information of $f$. However, this bound does not hold for finite $n$, or when $f$ varies with $n$. We show for arbitrary $f$ and $n$ that one can recover a similar theory based on the Fisher information of a smoothed version of $f$, where the smoothing radius decays with $n$.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/779f75d16b883692213158a25778c4f91ca11526.pdf", "author": "Shivam Gupta;Jasper C.H. Lee;Eric Price;Paul Valiant", "authorids": "~Shivam_Gupta1;~Jasper_C.H._Lee1;~Eric_Price1;~Paul_Valiant1", "gender": "M;M;;M", "homepage": "https://shivamgupta2.github.io/;https://jasperchlee.github.io/;;https://www.cs.purdue.edu/homes/pvaliant/", "dblp": "29/8830-2;150/4950;;", "google_scholar": "HsbPV-EAAAAJ;z0Y4snAAAAAJ;;abUcBIkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shivam_Gupta1;~Jasper_C.H._Lee1;~Eric_Price1;~Paul_Valiant1", "aff": "University of Texas, Austin;University of Wisconsin - Madison;;Purdue University", "aff_domain": "utexas.edu;wisc.edu;;purdue.edu", "position": "PhD student;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\ngupta2022finitesample,\ntitle={Finite-Sample Maximum Likelihood Estimation of Location},\nauthor={Shivam Gupta and Jasper C.H. Lee and Eric Price and Paul Valiant},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1l5hEEK_j13}\n}", "github": "", "project": "", "reviewers": "6bYV;64zQ;hCp9;dJ3A", "pdf_size": 828012, "rating": "6;6;6;7", "confidence": "3;4;3;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;2;2", "contribution": "3;3;2;3", "wc_summary": "217;94;505;129", "wc_strengths_and_weaknesses": "360;68;3;468", "wc_questions": "212;289;3;12", "wc_limitations": "1;21;1;12", "wc_review": "790;472;512;621", "wc_reply_reviewers": "22;0;0;0", "wc_reply_authors": "550;651;395;290", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 236.25, 161.50445040307713 ], "wc_strengths_and_weaknesses_avg": [ 224.75, 194.42656068551952 ], "wc_questions_avg": [ 129.0, 124.55320148434564 ], "wc_limitations_avg": [ 8.75, 8.37779804005802 ], "wc_review_avg": [ 598.75, 123.14904587531322 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 471.5, 138.9037436500543 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14599269915688701650&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "utexas.edu;wisc.edu;;purdue.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;University of Wisconsin-Madison;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.wisc.edu;https://www.purdue.edu", "aff_unique_abbr": "UT Austin;UW-Madison;Purdue", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Austin;Madison;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Minimax Optimal Online Imitation Learning via Replay Estimation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53956", "id": "1mFfKXYMg5a", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2e809adc337594e0fee330a64acbb982-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1mFfKXYMg5a", "openreview": "https://openreview.net/forum?id=1mFfKXYMg5a", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53956", "video": "https://nips.cc/virtual/2022/poster/53956", "author_site": "Gokul Swamy, Nived Rajaraman, Matt Peng, Sanjiban Choudhury, J. Bagnell, Steven Wu, Jiantao Jiao, Kannan Ramchandran", "tldr": "We develop a minimax-optimal extension of moment matching algorithms for imitation learning and validate it empirically.", "abstract": "Online imitation learning is the problem of how best to mimic expert demonstrations, given access to the environment or an accurate simulator. Prior work has shown that in the \\textit{infinite} sample regime, exact moment matching achieves value equivalence to the expert policy. However, in the \\textit{finite} sample regime, even if one has no optimization error, empirical variance can lead to a performance gap that scales with $H^2 / N_{\\text{exp}}$ for behavioral cloning and $H / N_{\\text{exp}}$ for online moment matching, where $H$ is the horizon and $N_{\\text{exp}}$ is the size of the expert dataset. We introduce the technique of ``replay estimation'' to reduce this empirical variance: by repeatedly executing cached expert actions in a stochastic simulator, we compute a smoother expert visitation distribution estimate to match. In the presence of general function approximation, we prove a meta theorem reducing the performance gap of our approach to the \\textit{parameter estimation error} for offline classification (i.e. learning the expert policy). In the tabular setting or with linear function approximation, our meta theorem shows that the performance gap incurred by our approach achieves the optimal $\\widetilde{O} \\left( \\min( H^{3/2} / N_{\\text{exp}}, H / \\sqrt{N_{\\text{exp}}} \\right)$ dependency, under significantly weaker assumptions compared to prior work. We implement multiple instantiations of our approach on several continuous control tasks and find that we are able to significantly improve policy performance across a variety of dataset sizes.", "keywords": "imitation learning", "primary_area": "", "supplementary_material": "/attachment/6924201a6ff8c6cf1689722ce3433baa9862b013.pdf", "author": "Gokul Swamy;Nived Rajaraman;Matt Peng;Sanjiban Choudhury;Drew Bagnell;Steven Wu;Jiantao Jiao;Kannan Ramchandran", "authorids": "~Gokul_Swamy1;~Nived_Rajaraman1;~Matt_Peng1;~Sanjiban_Choudhury2;~Drew_Bagnell2;~Steven_Wu1;~Jiantao_Jiao1;~Kannan_Ramchandran1", "gender": ";M;;M;;M;M;M", "homepage": "https://gokul.dev/;https://people.eecs.berkeley.edu/~nived.rajaraman/;;http://www.sanjibanchoudhury.com/;https://robotwhisperer.org/;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en;https://www.eecs.berkeley.edu/~kannanr/;https://zstevenwu.com/", "dblp": "31/11509;229/4215;;;;43/8919;53/5765;137/8350", "google_scholar": "Sbpra_AAAAAJ;7hb2BM8AAAAJ;8op46U4AAAAJ;;7t4jbPQAAAAJ;aO8KpGcAAAAJ;https://scholar.google.com.tw/citations?user=DcV-5RAAAAAJ;MbF6rTEAAAAJ", "orcid": ";;;;;;0000-0002-4567-328X;", "linkedin": ";;;;;;;zstevenwu/", "or_profile": "~Gokul_Swamy1;~Nived_Rajaraman1;~Matt_Peng1;~Sanjiban_Choudhury2;~Drew_Bagnell2;~Jiantao_Jiao1;~Kannan_Ramchandran1;~Zhiwei_Steven_Wu1", "aff": "Carnegie Mellon University;University of California, Berkeley;University of California, Berkeley;;Carnegie Mellon University;University of California, Berkeley;University of California, Berkeley;Carnegie Mellon University", "aff_domain": "cmu.edu;berkeley.edu;berkeley.edu;;cmu.edu;berkeley.edu;berkeley.edu;cmu.edu", "position": "PhD student;PhD student;Undergrad student;;Associate Professor;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nswamy2022minimax,\ntitle={Minimax Optimal Online Imitation Learning via Replay Estimation},\nauthor={Gokul Swamy and Nived Rajaraman and Matt Peng and Sanjiban Choudhury and Drew Bagnell and Steven Wu and Jiantao Jiao and Kannan Ramchandran},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1mFfKXYMg5a}\n}", "github": "", "project": "", "reviewers": "hKQj;Jnmr;kTcU", "pdf_size": 2153375, "rating": "7;7;8", "confidence": "4;4;2", "soundness": "2;2;3", "novelty": "3;2;4", "presentation": "3;3;2", "contribution": "3;2;4", "wc_summary": "237;122;38", "wc_strengths_and_weaknesses": "296;740;452", "wc_questions": "288;130;6", "wc_limitations": "693;13;2", "wc_review": "1514;1005;498", "wc_reply_reviewers": "1584;183;0", "wc_reply_authors": "1500;769;569", "reply_reviewers": "4;2;0", "reply_authors": "4;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 132.33333333333334, 81.56932974810786 ], "wc_strengths_and_weaknesses_avg": [ 496.0, 183.9130229211624 ], "wc_questions_avg": [ 141.33333333333334, 115.40460225754528 ], "wc_limitations_avg": [ 236.0, 323.1790009679878 ], "wc_review_avg": [ 1005.6666666666666, 414.780530990654 ], "wc_reply_reviewers_avg": [ 589.0, 707.5266779422526 ], "wc_reply_authors_avg": [ 946.0, 400.1558029901187 ], "reply_reviewers_avg": [ 2.0, 1.632993161855452 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17967164041276198597&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cmu.edu;berkeley.edu;berkeley.edu;;cmu.edu;berkeley.edu;berkeley.edu;cmu.edu", "author_num": 8, "aff_unique_index": "0;1;1;0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu", "aff_unique_abbr": "CMU;UC Berkeley", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Regret Bounds for Information-Directed Reinforcement Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55255", "id": "1pHC-yZfaTK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b733cdd80ed2ae7e3156d8c33108c5d5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1pHC-yZfaTK", "openreview": "https://openreview.net/forum?id=1pHC-yZfaTK", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55255", "video": "https://nips.cc/virtual/2022/poster/55255", "author_site": "Botao Hao, Tor Lattimore", "tldr": "We derived the first Bayesian regret bounds for information-directed sampling in RL.", "abstract": "Information-directed sampling (IDS) has revealed its potential as a data-efficient algorithm for reinforcement learning (RL). However, theoretical understanding of IDS for Markov Decision Processes (MDPs) is still limited. We develop novel information-theoretic tools to bound the information ratio and cumulative information gain about the learning target. Our theoretical results shed light on the importance of choosing the learning target such that the practitioners can balance the computation and regret bounds. As a consequence, we derive prior-free Bayesian regret bounds for vanilla-IDS which learns the whole environment under tabular finite-horizon MDPs. In addition, we propose a computationally-efficient regularized-IDS that maximizes an additive form rather than the ratio form and show that it enjoys the same regret bound as vanilla-IDS. With the aid of rate-distortion theory, we improve the regret bound by learning a surrogate, less informative environment. Furthermore, we extend our analysis to linear MDPs and prove similar regret bounds for Thompson sampling as a by-product.", "keywords": "information-directed sampling;regret bound", "primary_area": "", "supplementary_material": "/attachment/0b5270cebac65adcc61d2b80d726c9480c3118de.pdf", "author": "Botao Hao;Tor Lattimore", "authorids": "~Botao_Hao1;~Tor_Lattimore1", "gender": ";M", "homepage": "https://haobotao000.github.io/;http://tor-lattimore.com", "dblp": "222/2211;44/9886", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Botao_Hao1;~Tor_Lattimore1", "aff": "Google Deepmind;Google DeepMind", "aff_domain": "google.com;google.com", "position": "Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nhao2022regret,\ntitle={Regret Bounds for Information-Directed Reinforcement Learning},\nauthor={Botao Hao and Tor Lattimore},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1pHC-yZfaTK}\n}", "github": "", "project": "", "reviewers": "hin3;Dwor;dRgt;uxAT", "pdf_size": 281539, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "4;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "contribution": "2;2;3;3", "wc_summary": "49;29;67;96", "wc_strengths_and_weaknesses": "265;65;188;397", "wc_questions": "52;29;6;95", "wc_limitations": "12;62;1;14", "wc_review": "378;185;262;602", "wc_reply_reviewers": "124;41;9;6", "wc_reply_authors": "547;559;91;237", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.25, 24.631027181179434 ], "wc_strengths_and_weaknesses_avg": [ 228.75, 120.516337066806 ], "wc_questions_avg": [ 45.5, 32.882366094914765 ], "wc_limitations_avg": [ 22.25, 23.47738273317535 ], "wc_review_avg": [ 356.75, 157.3807087924057 ], "wc_reply_reviewers_avg": [ 45.0, 47.62877281643944 ], "wc_reply_authors_avg": [ 358.5, 201.2777931119079 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3319478714948847406&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "DeepMind;Google", "aff_unique_dep": "DeepMind;Google DeepMind", "aff_unique_url": "https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "DeepMind;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Let Images Give You More: Point Cloud Cross-Modal Training for Shape Analysis", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55376", "id": "1qXIyIxLbEu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d0d82e8f202648128e912c959b2b9968-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1qXIyIxLbEu", "openreview": "https://openreview.net/forum?id=1qXIyIxLbEu", "poster": "/media/PosterPDFs/NeurIPS%202022/9461cce28ebe3e76fb4b931c35a169b0.png?t=1666413399.3135955", "slides": "https://nips.cc/virtual/2022/poster/55376", "video": "https://nips.cc/virtual/2022/poster/55376", "author_site": "Xu Yan, Heshen Zhan, Chaoda Zheng, Jiantao Gao, Ruimao Zhang, Shuguang Cui, Zhen Li", "tldr": "", "abstract": "Although recent point cloud analysis achieves impressive progress, the paradigm of representation learning from single modality gradually meets its bottleneck. In this work, we take a step towards more discriminative 3D point cloud representation using 2D images, which inherently contain richer appearance information, e.g., texture, color, and shade. Specifically, this paper introduces a simple but effective point cloud cross-modality training (PointCMT) strategy, which utilizes view-images, i.e., rendered or projected 2D images of the 3D object, to boost point cloud classification. In practice, to effectively acquire auxiliary knowledge from view-images, we develop a teacher-student framework and formulate the cross-modal learning as a knowledge distillation problem. Through novel feature and classifier enhancement criteria, PointCMT eliminates the distribution discrepancy between different modalities and avoid potential negative transfer effectively. Note that PointCMT efficiently improves the point-only representation without any architecture modification. Sufficient experiments verify significant gains on various datasets based on several backbones, i.e., equipped with PointCMT, PointNet++ and PointMLP achieve state-of-the-art performance on two benchmarks, i.e., 94.4% and 86.7% accuracy on ModelNet40 and ScanObjectNN, respectively.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/e45bcf704b7a4accdd10081b8fd85a56a85c036d.pdf", "author": "Xu Yan;Heshen Zhan;Chaoda Zheng;Jiantao Gao;Ruimao Zhang;Shuguang Cui;Zhen Li", "authorids": "~Xu_Yan3;~Heshen_Zhan1;~Chaoda_Zheng1;~Jiantao_Gao1;~Ruimao_Zhang1;~Shuguang_Cui1;~Zhen_Li6", "gender": "M;;M;M;M;M;M", "homepage": "https://yanx27.github.io/;;;;http://zhangruimao.site/#;https://sse.cuhk.edu.cn/en/content/1415;https://mypage.cuhk.edu.cn/academics/lizhen/", "dblp": "03/4702-14;;247/8254;265/1310;54/10697;48/4914;74/2397-26", "google_scholar": ";;3YuWG1QAAAAJ;;ZJwZdtgAAAAJ;https://scholar.google.com.hk/citations?user=1o_qvR0AAAAJ;https://scholar.google.com.hk/citations?user=0TTt3QsAAAAJ", "orcid": ";;;0000-0001-5057-0229;;0000-0003-2608-775X;0000-0002-7669-2686", "linkedin": ";%E8%B4%BA%E6%B7%B1-%E5%8D%A0-a614321b2/;;;;;", "or_profile": "~Xu_Yan3;~Heshen_Zhan1;~Chaoda_Zheng1;~Jiantao_Gao1;~Ruimao_Zhang1;~Shuguang_Cui1;~Zhen_LI_Jason1", "aff": "The Chinese University of Hong Kong;The Chinese University of HongKong, ShenZhen;The Chinese University of Hong Kong, Shenzhen;shanghai university;The Chinese University of Hong Kong (Shenzhen);Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "link.cuhk.edu.hk;link.cuhk.edu.cn;cuhk.edu.cn;shu.edu.cn;cuhk.edu.cn;sribd.cn;edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Vice Executive Director;Assistant Professor", "bibtex": "@inproceedings{\nyan2022let,\ntitle={Let Images Give You More: Point Cloud Cross-Modal Training for Shape Analysis},\nauthor={Xu Yan and Heshen Zhan and Chaoda Zheng and Jiantao Gao and Ruimao Zhang and Shuguang Cui and Zhen Li},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1qXIyIxLbEu}\n}", "github": "", "project": "", "reviewers": "vTJ8;J8fY;N54s;R3qG", "pdf_size": 961210, "rating": "5;5;6;7", "confidence": "4;4;3;5", "soundness": "3;1;3;3", "novelty": "2;1;3;3", "presentation": "3;3;2;3", "contribution": "2;1;3;3", "wc_summary": "76;55;71;63", "wc_strengths_and_weaknesses": "231;12;179;221", "wc_questions": "32;549;24;50", "wc_limitations": "8;19;1;40", "wc_review": "347;635;275;374", "wc_reply_reviewers": "21;442;0;11", "wc_reply_authors": "234;1353;298;307", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 66.25, 7.980444849756184 ], "wc_strengths_and_weaknesses_avg": [ 160.75, 88.06921993523049 ], "wc_questions_avg": [ 163.75, 222.62342082539294 ], "wc_limitations_avg": [ 17.0, 14.747881203752625 ], "wc_review_avg": [ 407.75, 136.10175421352952 ], "wc_reply_reviewers_avg": [ 118.5, 186.92043761986008 ], "wc_reply_authors_avg": [ 548.0, 465.61840599357754 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6492952168831138382&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "link.cuhk.edu.hk;link.cuhk.edu.cn;cuhk.edu.cn;shu.edu.cn;cuhk.edu.cn;sribd.cn;edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai University;Shenzhen Research Institute of Big Data", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.shu.edu.cn;http://www.sribd.cn", "aff_unique_abbr": "CUHK;SHU;", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Hong Kong SAR;Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1r1GDXPtuWz", "title": "Detecting danger in gridworlds using Gromov's Link Condition", "track": "main", "status": "Reject", "tldr": "By representing all possible configurations of multi-agent gridworlds as a single geometric space, we show positive curvature detects potential collisions.", "abstract": "Gridworlds have been long-utilised in AI research, particularly in reinforcement learning, as they provide simple yet scalable models for many real-world applications such as robot navigation, emergent behaviour, and operations research. We initiate a study of gridworlds using the mathematical framework of reconfigurable systems and state complexes due to Abrams, Ghrist & Peterson. State complexes represent all possible configurations of a system as a single geometric space, thus making them conducive to study using geometric, topological, or combinatorial methods. The main contribution of this work is a modification to the original Abrams, Ghrist & Peterson setup which we introduce to capture agent braiding and thereby more naturally represent the topology of gridworlds. With this modification, the state complexes may exhibit geometric defects (failure of Gromov's Link Condition). Serendipitously, we discover these failures occur exactly where undesirable or dangerous states appear in the gridworld. Our results therefore provide a novel method for seeking guaranteed safety limitations in discrete task environments with single or multiple agents, and offer useful safety information (in geometric and topological forms) for incorporation in or analysis of machine learning systems. More broadly, our work introduces tools from geometric group theory and combinatorics to the AI community and demonstrates a proof-of-concept for this geometric viewpoint of the task domain through the example of simple gridworld environments.", "keywords": "safety;multi-agent navigation;geometry;topology;braiding;collision-avoidance;curvature;cube complex;gridworld;configuration space", "primary_area": "", "supplementary_material": "/attachment/fe48bf096341712ff6931678d72d0e0299e372c6.zip", "author": "Thomas F Burns;Robert Tang", "authorids": "~Thomas_F_Burns1;robert.tang@xjtlu.edu.cn", "gender": "M;", "homepage": "https://tfburns.com/;", "dblp": "311/5096;", "google_scholar": "xifCmHAAAAAJ;", "orcid": "0000-0002-1123-2929;", "linkedin": "tfburns/;", "or_profile": "~Thomas_F_Burns1;robert.tang@xjtlu.edu.cn", "aff": "Araya Inc.;", "aff_domain": "araya.org;", "position": "Research Intern;", "bibtex": "@misc{\nburns2022detecting,\ntitle={Detecting danger in gridworlds using Gromov's Link Condition},\nauthor={Thomas F Burns and Robert Tang},\nyear={2022},\nurl={https://openreview.net/forum?id=1r1GDXPtuWz}\n}", "github": "", "project": "", "reviewers": "2SSW;5vs3;txoD", "site": "https://openreview.net/forum?id=1r1GDXPtuWz", "pdf_size": 5161091, "rating": "2;3;4", "confidence": "3;4;2", "soundness": "2;2;3", "novelty": "1;2;2", "presentation": "1;2;3", "contribution": "1;2;2", "wc_summary": "168;26;120", "wc_strengths_and_weaknesses": "111;102;368", "wc_questions": "49;2;116", "wc_limitations": "534;2;18", "wc_review": "862;132;622", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "698;421;781", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 3.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 104.66666666666667, 58.97645481225726 ], "wc_strengths_and_weaknesses_avg": [ 193.66666666666666, 123.32702686579108 ], "wc_questions_avg": [ 55.666666666666664, 46.77843757782235 ], "wc_limitations_avg": [ 184.66666666666666, 247.10231798903783 ], "wc_review_avg": [ 538.6666666666666, 303.7908637350519 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 633.3333333333334, 153.91844882563262 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2864782641276401843&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Araya Inc.", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Riemannian Neural SDE: Learning Stochastic Representations on Manifolds", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55178", "id": "1ryTomA0iKa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/098491b37deebbe6c007e69815729e09-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1ryTomA0iKa", "openreview": "https://openreview.net/forum?id=1ryTomA0iKa", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55178", "video": "https://nips.cc/virtual/2022/poster/55178", "author_site": "Sung Woo Park, Hyomin Kim, Kyungjae Lee, Junseok Kwon", "tldr": "We express the stochastic representation with the Riemannian neural SDE (RNSDE), which extends the conventional Euclidean NSDE.", "abstract": "In recent years, the neural stochastic differential equation (NSDE) has gained attention for modeling stochastic representations with great success in various types of applications. However, it typically loses expressivity when the data representation is manifold-valued. To address this issue, we suggest a principled method for expressing the stochastic representation with the Riemannian neural SDE (RNSDE), which extends the conventional Euclidean NSDE. Empirical results for various tasks demonstrate that the proposed method significantly outperforms baseline methods.", "keywords": "Stochastic representation on Manifolds;Riemannian neural stochastic differential equation", "primary_area": "", "supplementary_material": "/attachment/74604d9a6fde1184edaf14a3913956f0237caf44.pdf", "author": "Sung Woo Park;Hyomin Kim;Kyungjae Lee;Junseok Kwon", "authorids": "~Sung_Woo_Park2;~Hyomin_Kim3;~Kyungjae_Lee1;~Junseok_Kwon5", "gender": "M;F;M;M", "homepage": ";;https://sites.google.com/view/kyungjaelee;https://sites.google.com/view/cau-cvml/", "dblp": "92/6585;;13/7265-1;04/425", "google_scholar": "B1xpjO8AAAAJ;;https://scholar.google.co.kr/citations?user=OZZJagIAAAAJ;lwsaTnEAAAAJ", "orcid": ";;0000-0003-0147-2715;", "linkedin": ";hyomin-kim-27a004179/;;", "or_profile": "~Sung_Woo_Park2;~Hyomin_Kim3;~Kyungjae_Lee1;~Junseok_Kwon5", "aff": "ChungAng University;Chung-Ang University;ChungAng University;Chung-Ang University", "aff_domain": "cau.ac.kr;cau.ac.kr;cau.ac.kr;cau.ac.kr", "position": "PhD student;Undergrad student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npark2022riemannian,\ntitle={Riemannian Neural {SDE}: Learning Stochastic Representations on Manifolds},\nauthor={Sung Woo Park and Hyomin Kim and Kyungjae Lee and Junseok Kwon},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1ryTomA0iKa}\n}", "github": "", "project": "", "reviewers": "hLQB;mk19;rDVp", "pdf_size": 11414010, "rating": "6;6;6", "confidence": "2;4;3", "soundness": "3;4;3", "novelty": "3;3;2", "presentation": "4;4;3", "contribution": "3;3;2", "wc_summary": "90;30;39", "wc_strengths_and_weaknesses": "281;177;229", "wc_questions": "182;30;101", "wc_limitations": "34;1;25", "wc_review": "587;238;394", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "363;245;668", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 53.0, 26.419689627245813 ], "wc_strengths_and_weaknesses_avg": [ 229.0, 42.45782220824175 ], "wc_questions_avg": [ 104.33333333333333, 62.098488083223266 ], "wc_limitations_avg": [ 20.0, 13.92838827718412 ], "wc_review_avg": [ 406.3333333333333, 142.7453054297136 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 425.3333333333333, 178.2252008617811 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5715103887041254942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cau.ac.kr;cau.ac.kr;cau.ac.kr;cau.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Chungang University;Chung-Ang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.cau.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "CAU;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Do Residual Neural Networks discretize Neural Ordinary Differential Equations?", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53298", "id": "1tCuRbPts3J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ecc38927fe5148c66bee64ee8fed1e76-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1tCuRbPts3J", "openreview": "https://openreview.net/forum?id=1tCuRbPts3J", "poster": "/media/PosterPDFs/NeurIPS%202022/53298.png?t=1669698865.0004997", "slides": "https://nips.cc/virtual/2022/poster/53298", "video": "https://nips.cc/virtual/2022/poster/53298", "author_site": "Michael Sander, Pierre Ablin, Gabriel Peyr\u00e9", "tldr": "We investigate whether the discrete dynamics defined by a ResNet are close to the continuous one of a Neural ODE.", "abstract": "Neural Ordinary Differential Equations (Neural ODEs) are the continuous analog of Residual Neural Networks (ResNets). We investigate whether the discrete dynamics defined by a ResNet are close to the continuous one of a Neural ODE. We first quantify the distance between the ResNet's hidden state trajectory and the solution of its corresponding Neural ODE. Our bound is tight and, on the negative side, does not go to $0$ with depth $N$ if the residual functions are not smooth with depth. On the positive side, we show that this smoothness is preserved by gradient descent for a ResNet with linear residual functions and small enough initial loss. It ensures an implicit regularization towards a limit Neural ODE at rate $\\frac1N$, uniformly with depth and optimization time. As a byproduct of our analysis, we consider the use of a memory-free discrete adjoint method to train a ResNet by recovering the activations on the fly through a backward pass of the network, and show that this method theoretically succeeds at large depth if the residual functions are Lipschitz with the input. We then show that Heun's method, a second order ODE integration scheme, allows for better gradient estimation with the adjoint method when the residual functions are smooth with depth. We experimentally validate that our adjoint method succeeds at large depth, and that Heun\u2019s method needs fewer layers to succeed. We finally use the adjoint method successfully for fine-tuning very deep ResNets without memory consumption in the residual layers.", "keywords": "Deep Learning theory;ResNets;Neural ODEs", "primary_area": "", "supplementary_material": "/attachment/633a4ead8f48e36318313ca199eab96dd339d049.pdf", "author": "Michael Eli Sander;Pierre Ablin;Gabriel Peyr\u00e9", "authorids": "~Michael_Eli_Sander1;~Pierre_Ablin2;~Gabriel_Peyr\u00e92", "gender": "M;M;M", "homepage": "https://michaelsdr.github.io/;https://pierreablin.com/;http://gpeyre.com/", "dblp": "285/5131;174/0980.html;65/1759", "google_scholar": "COqAqcMAAAAJ;1ZsunaYAAAAJ;https://scholar.google.fr/citations?user=KqA1dYcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Michael_Eli_Sander1;~Pierre_Ablin2;~Gabriel_Peyr\u00e92", "aff": "Google;Universit\u00e9 Paris-Dauphine (Paris IX);CNRS", "aff_domain": "google.com;lamsade.dauphine.fr;cnrs.fr", "position": "Intern;Researcher;Researcher", "bibtex": "@inproceedings{\nsander2022do,\ntitle={Do Residual Neural Networks discretize Neural Ordinary Differential Equations?},\nauthor={Michael Eli Sander and Pierre Ablin and Gabriel Peyr{\\'e}},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1tCuRbPts3J}\n}", "github": "", "project": "", "reviewers": "rbYF;9X5i;hARH", "pdf_size": 1112162, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;2;3", "contribution": "2;2;3", "wc_summary": "135;149;208", "wc_strengths_and_weaknesses": "211;120;908", "wc_questions": "77;52;37", "wc_limitations": "7;29;1", "wc_review": "430;350;1154", "wc_reply_reviewers": "0;0;508", "wc_reply_authors": "1393;345;1009", "reply_reviewers": "0;0;2", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 164.0, 31.63331577098213 ], "wc_strengths_and_weaknesses_avg": [ 413.0, 351.98390114700794 ], "wc_questions_avg": [ 55.333333333333336, 16.49915822768611 ], "wc_limitations_avg": [ 12.333333333333334, 12.036980056845191 ], "wc_review_avg": [ 644.6666666666666, 361.6308738048908 ], "wc_reply_reviewers_avg": [ 169.33333333333334, 239.47349656184406 ], "wc_reply_authors_avg": [ 915.6666666666666, 432.90440309867745 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17421555297762106570&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;lamsade.dauphine.fr;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Universit\u00e9 Paris-Dauphine;Centre National de la Recherche Scientifique", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.univ-paris-dauphine.fr;https://www.cnrs.fr", "aff_unique_abbr": "Google;UPD;CNRS", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Paris;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;France" }, { "title": "Dynamic Graph Neural Networks Under Spatio-Temporal Distribution Shift", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55422", "id": "1tIUqrUuJxx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2857242c9e97de339ce642e75b15ff24-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1tIUqrUuJxx", "openreview": "https://openreview.net/forum?id=1tIUqrUuJxx", "poster": "/media/PosterPDFs/NeurIPS%202022/55422.png?t=1669520268.5241423", "slides": "https://nips.cc/virtual/2022/poster/55422", "video": "https://nips.cc/virtual/2022/poster/55422", "author_site": "Zeyang Zhang, Xin Wang, Ziwei Zhang, Haoyang Li, Zhou Qin, Wenwu Zhu", "tldr": "", "abstract": "Dynamic graph neural networks (DyGNNs) have demonstrated powerful predictive abilities by exploiting graph structural and temporal dynamics. However, the existing DyGNNs fail to handle distribution shifts, which naturally exist in dynamic graphs, mainly because the patterns exploited by DyGNNs may be variant with respect to labels under distribution shifts. In this paper, we propose to handle spatio-temporal distribution shifts in dynamic graphs by discovering and utilizing {\\it invariant patterns}, i.e., structures and features whose predictive abilities are stable across distribution shifts, which faces two key challenges: 1) How to discover the complex variant and invariant spatio-temporal patterns in dynamic graphs, which involve both time-varying graph structures and node features. 2) How to handle spatio-temporal distribution shifts with the discovered variant and invariant patterns. To tackle these challenges, we propose the Disentangled Intervention-based Dynamic graph Attention networks (DIDA). Our proposed method can effectively handle spatio-temporal distribution shifts in dynamic graphs by discovering and fully utilizing invariant spatio-temporal patterns. Specifically, we first propose a disentangled spatio-temporal attention network to capture the variant and invariant patterns. Then, we design a spatio-temporal intervention mechanism to create multiple interventional distributions by sampling and reassembling variant patterns across neighborhoods and time stamps to eliminate the spurious impacts of variant patterns. Lastly, we propose an invariance regularization term to minimize the variance of predictions in intervened distributions so that our model can make predictions based on invariant patterns with stable predictive abilities and therefore handle distribution shifts. Experiments on three real-world datasets and one synthetic dataset demonstrate the superiority of our method over state-of-the-art baselines under distribution shifts. Our work is the first study of spatio-temporal distribution shifts in dynamic graphs, to the best of our knowledge.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/6a2bc73b31c47aab9f3e0900208fb774a430d38a.pdf", "author": "Zeyang Zhang;Xin Wang;Ziwei Zhang;Haoyang Li;Zhou Qin;Wenwu Zhu", "authorids": "~Zeyang_Zhang1;~Xin_Wang17;~Ziwei_Zhang1;~Haoyang_Li1;~Zhou_Qin2;~Wenwu_Zhu1", "gender": ";M;;M;M;M", "homepage": "https://zzythu.com;http://mn.cs.tsinghua.edu.cn/xinwang/;;https://haoyang.li;https://github.com/archwalker;http://media.cs.tsinghua.edu.cn/en/zww", "dblp": "236/0242;10/5630-19;;118/0004-1.html;;97/6308-1.html", "google_scholar": "w_njVcAAAAAJ;YPOBHYUAAAAJ;;86RE16gAAAAJ;;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ", "orcid": "0000-0003-1329-1313;0000-0002-0351-2939;;0000-0003-3544-5563;;0000-0003-2236-9290", "linkedin": "zeyang-zhang-a7a039159;;;;;", "or_profile": "~Zeyang_Zhang1;~Xin_Wang17;~Ziwei_Zhang1;~Haoyang_Li1;~Zhou_Qin2;~Wenwu_Zhu1", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;;PhD student;;Full Professor", "bibtex": "@inproceedings{\nzhang2022dynamic,\ntitle={Dynamic Graph Neural Networks Under Spatio-Temporal Distribution Shift},\nauthor={Zeyang Zhang and Xin Wang and Ziwei Zhang and Haoyang Li and Zhou Qin and Wenwu Zhu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1tIUqrUuJxx}\n}", "github": "", "project": "", "reviewers": "8AMq;vUM5;nS4E;zQjJ", "pdf_size": 1484018, "rating": "5;6;7;7", "confidence": "2;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "contribution": "2;3;3;3", "wc_summary": "16;36;75;74", "wc_strengths_and_weaknesses": "44;99;112;104", "wc_questions": "12;56;33;4", "wc_limitations": "35;1;21;1", "wc_review": "107;192;241;183", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "647;289;487;105", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 50.25, 25.262373206015305 ], "wc_strengths_and_weaknesses_avg": [ 89.75, 26.81767141270845 ], "wc_questions_avg": [ 26.25, 20.17888748172208 ], "wc_limitations_avg": [ 14.5, 14.378803844548406 ], "wc_review_avg": [ 180.75, 47.96027001592047 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 382.0, 204.10046545757803 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3224162194839003211&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;;tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Efficient 3D Object Detection with Knowledge Distillation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55367", "id": "1tnVNogPUz9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/8625a8c2be8ba5197b7a14833dbea8ac-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1tnVNogPUz9", "openreview": "https://openreview.net/forum?id=1tnVNogPUz9", "poster": "/media/PosterPDFs/NeurIPS%202022/97e8527feaf77a97fc38f34216141515.png?t=1665987805.2237492", "slides": "https://nips.cc/virtual/2022/poster/55367", "video": "https://nips.cc/virtual/2022/poster/55367", "author_site": "Jihan Yang, Shaoshuai Shi, Runyu Ding, Zhe Wang, Xiaojuan Qi", "tldr": "In this paper, we conduct the first systematic study on knowledge distillation for developing high-performance and efficient 3D LiDAR-based detectors.", "abstract": "Despite substantial progress in 3D object detection, advanced 3D detectors often suffer from heavy computation overheads. To this end, we explore the potential of knowledge distillation (KD) for developing efficient 3D object detectors, focusing on popular pillar- and voxel-based detectors. In the absence of well-developed teacher-student pairs, we first study how to obtain student models with good trade offs between accuracy and efficiency from the perspectives of model compression and input resolution reduction. Then, we build a benchmark to assess existing KD methods developed in the 2D domain for 3D object detection upon six well-constructed teacher-student pairs. Further, we propose an improved KD pipeline incorporating an enhanced logit KD method that performs KD on only a few pivotal positions determined by teacher classification response and a teacher-guided student model initialization to facilitate transferring teacher model's feature extraction ability to students through weight inheritance. Finally, we conduct extensive experiments on the Waymo dataset. Our best performing model achieves $65.75\\%$ LEVEL 2 mAPH surpassing its teacher model and requiring only $44\\%$ of teacher flops. Our most efficient model runs 51 FPS on an NVIDIA A100, which is $2.2\\times$ faster than PointPillar with even higher accuracy. Code will be available.", "keywords": "3D object detection;knowledge distillation", "primary_area": "", "supplementary_material": "/attachment/01fe2231eece209902e989760b7a4bf049cdc4e9.pdf", "author": "Jihan Yang;Shaoshuai Shi;Runyu Ding;Zhe Wang;XIAOJUAN QI", "authorids": "~Jihan_Yang1;~Shaoshuai_Shi1;~Runyu_Ding1;~Zhe_Wang2;~XIAOJUAN_QI2", "gender": "M;M;F;M;F", "homepage": "https://jihanyang.github.io/;https://shishaoshuai.com/;https://dingry.github.io/;https://wang-zhe.me;https://xjqi.github.io/", "dblp": "230/4254;202/5922;289/1652;75/3158-6;176/1445-1.html", "google_scholar": "zWfNZnIAAAAJ;DC9wzBgAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;https://scholar.google.com.hk/citations?hl=en;bGn0uacAAAAJ", "orcid": ";;;;", "linkedin": ";;;wang-zhe-2ab56761/;", "or_profile": "~Jihan_Yang1;~Shaoshuai_Shi1;~Runyu_Ding1;~Zhe_Wang2;~XIAOJUAN_QI2", "aff": "University of Hong Kong;Saarland Informatics Campus, Max-Planck Institute;Electrical and Electronic Engineering, University of Hong Kong;Sensetime;University of Hong Kong", "aff_domain": "eee.hku.hk;mpi-inf.mpg.de;eee.hku.hk;sensetime.com;hku.hk", "position": "PhD student;Postdoc;PhD student;Director;Assistant Professor", "bibtex": "@inproceedings{\nyang2022towards,\ntitle={Towards Efficient 3D Object Detection with Knowledge Distillation},\nauthor={Jihan Yang and Shaoshuai Shi and Runyu Ding and Zhe Wang and XIAOJUAN QI},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1tnVNogPUz9}\n}", "github": "", "project": "", "reviewers": "Sm5s;hUXR;Qi4p;nWAU", "pdf_size": 474158, "rating": "6;6;7;8", "confidence": "5;4;5;2", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "contribution": "3;3;3;3", "wc_summary": "126;150;71;96", "wc_strengths_and_weaknesses": "185;376;139;171", "wc_questions": "174;62;5;42", "wc_limitations": "1;10;10;11", "wc_review": "486;598;225;320", "wc_reply_reviewers": "0;61;0;19", "wc_reply_authors": "1854;1321;732;650", "reply_reviewers": "0;1;0;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.75, 29.877876430563134 ], "wc_strengths_and_weaknesses_avg": [ 217.75, 92.87457940685384 ], "wc_questions_avg": [ 70.75, 63.02132575565195 ], "wc_limitations_avg": [ 8.0, 4.06201920231798 ], "wc_review_avg": [ 407.25, 144.4080589856397 ], "wc_reply_reviewers_avg": [ 20.0, 24.9098374141623 ], "wc_reply_authors_avg": [ 1139.25, 487.1136289409279 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4669452180689530857&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "eee.hku.hk;mpi-inf.mpg.de;eee.hku.hk;sensetime.com;hku.hk", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of Hong Kong;Max-Planck Institute;SenseTime", "aff_unique_dep": ";Informatics;", "aff_unique_url": "https://www.hku.hk;https://www.mpi-sws.org;https://www.sensetime.com", "aff_unique_abbr": "HKU;MPI-SWS;SenseTime", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Hong Kong SAR;Saarland;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Germany" }, { "title": "Benchopt: Reproducible, efficient and collaborative optimization benchmarks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53105", "id": "1uSzacpyWLH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a30769d9b62c9b94b72e21e0ca73f338-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1uSzacpyWLH", "openreview": "https://openreview.net/forum?id=1uSzacpyWLH", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53105", "video": "https://nips.cc/virtual/2022/poster/53105", "author_site": "Thomas Moreau, Mathurin Massias, Alexandre Gramfort, Pierre Ablin, Pierre-Antoine Bannier, Benjamin Charlier, Mathieu Dagr\u00e9ou, Tom Dupre la Tour, Ghislain DURIF, Cassio F. Dantas, Quentin Klopfenstein, Johan Larsson, En Lai, Tanguy Lefort, Beno\u00eet Mal\u00e9zieux, Badr MOUFAD, Binh T. Nguyen, Alain Rakotomamonjy, Zaccharie Ramzi, Joseph Salmon, Samuel Vaiter", "tldr": "Collaborative framework to automate, publish and reproduce optimization benchmarks in machine learning across programming languages and hardware architectures.", "abstract": "Numerical validation is at the core of machine learning research as it allows us to assess the actual impact of new methods, and to confirm the agreement between theory and practice. Yet, the rapid development of the field poses several challenges: researchers are confronted with a profusion of methods to compare, limited transparency and consensus on best practices, as well as tedious re-implementation work. As a result, validation is often very partial, which can lead to wrong conclusions that slow down the progress of research. We propose Benchopt, a collaborative framework to automatize, publish and reproduce optimization benchmarks in machine learning across programming languages and hardware architectures. Benchopt simplifies benchmarking for the community by providing an off-the-shelf tool for running, sharing and extending experiments. To demonstrate its broad usability, we showcase benchmarks on three standard ML tasks: $\\ell_2$-regularized logistic regression, Lasso and ResNet18 training for image classification. These benchmarks highlight key practical findings that give a more nuanced view of state-of-the-art for these problems, showing that for practical evaluation, the devil is in the details.", "keywords": "reproducibility;optimization;lasso;resnet;logistic regression;open source software;benchmark", "primary_area": "", "supplementary_material": "/attachment/d8708e912f165f4be2efe3c7669804a6b89f886e.zip", "author": "Thomas Moreau;Mathurin Massias;Alexandre Gramfort;Pierre Ablin;Pierre-Antoine Bannier;Benjamin Charlier;Mathieu Dagr\u00e9ou;Tom Dupre la Tour;Ghislain Durif;C\u00e1ssio Fraga Dantas;Quentin Klopfenstein;Johan Larsson;En Lai;Tanguy Lefort;Beno\u00eet Mal\u00e9zieux;Badr Moufad;Binh Nguyen;Alain Rakotomamonjy;Zaccharie Ramzi;Joseph Salmon;Samuel Vaiter", "authorids": "~Thomas_Moreau2;~Mathurin_Massias1;~Alexandre_Gramfort1;~Pierre_Ablin2;pierreantoine.bannier@gmail.com;~Benjamin_Charlier1;~Mathieu_Dagr\u00e9ou1;~Tom_Dupre_la_Tour1;ghislain.durif@umontpellier.fr;cassio.fraga-dantas@umontpellier.fr;quentin.klopfenstein@uni.lu;~Johan_Larsson2;~En_Lai1;~Tanguy_Lefort1;~Beno\u00eet_Mal\u00e9zieux1;badr.moufad@inria.fr;~Binh_Nguyen2;~Alain_Rakotomamonjy1;~Zaccharie_Ramzi1;~Joseph_Salmon2;~Samuel_Vaiter1", "gender": ";;M;M;;M;M;M;;;;M;F;M;;;M;;M;Unspecified;M", "homepage": ";https://mathurinm.github.io;http://alexandre.gramfort.net;https://pierreablin.com/;;https://imag.umontpellier.fr/~charlier/;https://matdag.github.io;http://tomdlt.github.io/;;;;https://jolars.co;;https://tanglef.github.io;;;https://tbng.github.io/;;https://zaccharieramzi.fr/;http://josephsalmon.eu/;https://samuelvaiter.com", "dblp": ";198/0455;15/7980;174/0980.html;;144/7428;312/6626.html;https://dblp.uni-trier.de/pid/201/7222.html;;;;54/1760-2;;;;;241/2542;;266/7212;72/8107.html;51/10261.html", "google_scholar": ";https://scholar.google.fr/citations?user=kaTDZS0AAAAJ;fhxshS0AAAAJ;1ZsunaYAAAAJ;;zFoo9xAAAAAJ;_AYpVTMAAAAJ;https://scholar.google.fr/citations?user=LuzAM-4AAAAJ;;;;DCJvywYAAAAJ;;BWIOpfEAAAAJ;;;6rpHj_YAAAAJ;;rTgYLN8AAAAJ;https://scholar.google.fr/citations?user=m7OEDmoAAAAJ;HkXkm7IAAAAJ", "orcid": ";;0000-0001-9791-4404;;;;0000-0002-6578-2213;0000-0002-2674-1670;;;;0000-0002-4029-5945;;;;;;;0000-0002-5888-8749;0000-0002-3181-0634;0000-0002-4077-708X", "linkedin": ";;alexandregramfort/;;;;;tomdlt/;;;;;linkedin.com/in/en-lai-3199781b6;;benoit-malezieux-203283148/;;;;zaccharie-ramzi-043476a5/;;", "or_profile": "~Thomas_Moreau2;~Mathurin_Massias1;~Alexandre_Gramfort1;~Pierre_Ablin2;pierreantoine.bannier@gmail.com;~Benjamin_Charlier1;~Mathieu_Dagr\u00e9ou1;~Tom_Dupre_la_Tour1;ghislain.durif@umontpellier.fr;cassio.fraga-dantas@umontpellier.fr;quentin.klopfenstein@uni.lu;~Johan_Larsson2;~En_Lai1;~Tanguy_Lefort1;~Beno\u00eet_Mal\u00e9zieux1;badr.moufad@inria.fr;~Binh_Nguyen2;~Alain_Rakotomamonjy1;~Zaccharie_Ramzi1;~Joseph_Salmon2;~Samuel_Vaiter1", "aff": ";INRIA;INRIA;Universit\u00e9 Paris-Dauphine (Paris IX);;Univ Montpellier;Inria;University of California, Berkeley;;;;Lund University;\u00c9cole Polytechnique;University of Montpellier France;INRIA;;T\u00e9l\u00e9com ParisTech;;CEA;Univ. Montpellier;CNRS", "aff_domain": ";inria.fr;inria.fr;lamsade.dauphine.fr;;umontpellier.fr;inria.fr;berkeley.edu;;;;stat.lu.se;polytechnique.edu;umontpellier.fr;inria.fr;;telecom-paristech.fr;;cea.fr;umontpellier.fr;cnrs.fr", "position": ";Researcher;Full Professor;Researcher;;Associate Professor;PhD student;Postdoc;;;;PhD student;Undergrad student;PhD student;PhD student;;Postdoc;;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nmoreau2022benchopt,\ntitle={Benchopt: Reproducible, efficient and collaborative optimization benchmarks},\nauthor={Thomas Moreau and Mathurin Massias and Alexandre Gramfort and Pierre Ablin and Pierre-Antoine Bannier and Benjamin Charlier and Mathieu Dagr{\\'e}ou and Tom Dupre la Tour and Ghislain Durif and C{\\'a}ssio Fraga Dantas and Quentin Klopfenstein and Johan Larsson and En Lai and Tanguy Lefort and Beno{\\^\\i}t Mal{\\'e}zieux and Badr Moufad and Binh Nguyen and Alain Rakotomamonjy and Zaccharie Ramzi and Joseph Salmon and Samuel Vaiter},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1uSzacpyWLH}\n}", "github": "", "project": "", "reviewers": "Fv41;4Y29;nYXq", "pdf_size": 1447201, "rating": "4;7;7", "confidence": "3;4;4", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "3;3;3", "contribution": "2;3;3", "wc_summary": "22;120;38", "wc_strengths_and_weaknesses": "245;262;559", "wc_questions": "9;161;127", "wc_limitations": "51;40;55", "wc_review": "327;583;779", "wc_reply_reviewers": "73;0;0", "wc_reply_authors": "748;400;617", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.0, 42.926293418680665 ], "wc_strengths_and_weaknesses_avg": [ 355.3333333333333, 144.18121313896003 ], "wc_questions_avg": [ 99.0, 65.13575566972925 ], "wc_limitations_avg": [ 48.666666666666664, 6.342099196813483 ], "wc_review_avg": [ 563.0, 185.0693563685427 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 34.41253001774532 ], "wc_reply_authors_avg": [ 588.3333333333334, 143.50919444024794 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 21, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3504541958783431314&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 24, "email": ";inria.fr;inria.fr;lamsade.dauphine.fr;;umontpellier.fr;inria.fr;berkeley.edu;;;;stat.lu.se;polytechnique.edu;umontpellier.fr;inria.fr;;telecom-paristech.fr;;cea.fr;umontpellier.fr;cnrs.fr", "author_num": 21, "aff_unique_index": "0;0;1;2;0;3;4;5;2;0;6;7;2;8", "aff_unique_norm": "INRIA;Universit\u00e9 Paris-Dauphine;University of Montpellier;University of California, Berkeley;Lund University;Ecole Polytechnique;T\u00e9l\u00e9com ParisTech;Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;;;;;;;", "aff_unique_url": "https://www.inria.fr;https://www.univ-paris-dauphine.fr;https://www.univ-montp1.fr;https://www.berkeley.edu;https://www.lunduniversity.lu.se;https://www.polytechnique.edu;https://www.telecom-paristech.fr;https://www cea fr;https://www.cnrs.fr", "aff_unique_abbr": "INRIA;UPD;UM;UC Berkeley;LU;X;TP;CEA;CNRS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Paris;Berkeley", "aff_country_unique_index": "0;0;0;0;0;1;2;0;0;0;0;0;0;0", "aff_country_unique": "France;United States;Sweden" }, { "title": "Autoregressive Perturbations for Data Poisoning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52801", "id": "1vusesyN7E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/af66ac99716a64476c07ae8b089d59f8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1vusesyN7E", "openreview": "https://openreview.net/forum?id=1vusesyN7E", "poster": "/media/PosterPDFs/NeurIPS%202022/5a1106fcb6c23317695f2f619988ef41.png?t=1667845510.4506493", "slides": "https://nips.cc/virtual/2022/poster/52801", "video": "https://nips.cc/virtual/2022/poster/52801", "author_site": "Pedro Sandoval-Segura, Vasu Singla, Jonas Geiping, Micah Goldblum, Tom Goldstein, David Jacobs", "tldr": "", "abstract": "The prevalence of data scraping from social media as a means to obtain datasets has led to growing concerns regarding unauthorized use of data. Data poisoning attacks have been proposed as a bulwark against scraping, as they make data ``unlearnable'' by adding small, imperceptible perturbations. Unfortunately, existing methods require knowledge of both the target architecture and the complete dataset so that a surrogate network can be trained, the parameters of which are used to generate the attack. In this work, we introduce autoregressive (AR) poisoning, a method that can generate poisoned data without access to the broader dataset. The proposed AR perturbations are generic, can be applied across different datasets, and can poison different architectures. Compared to existing unlearnable methods, our AR poisons are more resistant against common defenses such as adversarial training and strong data augmentations. Our analysis further provides insight into what makes an effective data poison. ", "keywords": "autoregressive processes;poisons;data poisoning;data protection;imperceptible perturbations;adversarial machine learning", "primary_area": "", "supplementary_material": "/attachment/48e028ca357be59b14adfc7e8d331451c7a5e451.zip", "author": "Pedro Sandoval-Segura;Vasu Singla;Jonas Geiping;Micah Goldblum;Tom Goldstein;David W. Jacobs", "authorids": "~Pedro_Sandoval-Segura1;~Vasu_Singla1;~Jonas_Geiping1;~Micah_Goldblum1;~Tom_Goldstein1;~David_W._Jacobs1", "gender": "M;M;;M;M;M", "homepage": "https://www.cs.umd.edu/people/vsingla;https://jonasgeiping.github.io/;;https://www.cs.umd.edu/~tomg/;http://www.cs.umd.edu/~djacobs;http://cs.umd.edu/~psando", "dblp": "270/9234;190/7229;241/7231;25/8184;j/DavidWJacobs.html;242/4604", "google_scholar": "geHpT2IAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;pGDKzuUAAAAJ;KmSuVtgAAAAJ;WH2KmRgAAAAJ;x-0RKroAAAAJ", "orcid": ";;;;;0000-0003-1932-8092", "linkedin": ";;;;;", "or_profile": "~Vasu_Singla1;~Jonas_Geiping1;~Micah_Goldblum1;~Tom_Goldstein1;~David_W._Jacobs1;~Pedro_Sandoval_Segura2", "aff": "Mitsubishi Electric Research Labs;University of Maryland, College Park;New York University;University of Maryland, College Park;University of Maryland, College Park;University of Maryland", "aff_domain": "merl.com;umd.edu;nyu.edu;umd.edu;umd.edu;umd.edu", "position": "Intern;Postdoc;Postdoc;Associate Professor;Professor;PhD student", "bibtex": "@inproceedings{\nsandoval-segura2022autoregressive,\ntitle={Autoregressive Perturbations for Data Poisoning},\nauthor={Pedro Sandoval-Segura and Vasu Singla and Jonas Geiping and Micah Goldblum and Tom Goldstein and David W. Jacobs},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1vusesyN7E}\n}", "github": "", "project": "", "reviewers": "wQLZ;FXVx;Jf4X;GeFk", "pdf_size": 1667652, "rating": "5;6;6;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "contribution": "2;3;3;3", "wc_summary": "62;137;116;46", "wc_strengths_and_weaknesses": "263;206;38;289", "wc_questions": "32;79;146;63", "wc_limitations": "48;34;1;11", "wc_review": "405;456;301;409", "wc_reply_reviewers": "156;0;0;10", "wc_reply_authors": "1303;945;979;608", "reply_reviewers": "2;0;0;1", "reply_authors": "4;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.25, 37.43243914040334 ], "wc_strengths_and_weaknesses_avg": [ 199.0, 97.68060196374714 ], "wc_questions_avg": [ 80.0, 41.68333000133266 ], "wc_limitations_avg": [ 23.5, 18.527007313648905 ], "wc_review_avg": [ 392.75, 56.64086422363275 ], "wc_reply_reviewers_avg": [ 41.5, 66.23254487032791 ], "wc_reply_authors_avg": [ 958.75, 246.03493146299368 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17109390722215919135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "merl.com;umd.edu;nyu.edu;umd.edu;umd.edu;umd.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Mitsubishi Electric Research Laboratories;University of Maryland;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.merl.com;https://www/umd.edu;https://www.nyu.edu", "aff_unique_abbr": "MERL;UMD;NYU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Policy Optimization with Advantage Regularization for Long-Term Fairness in Decision Systems", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54729", "id": "1wVBLK1Xuc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/36b76e1f69bbba80d3463f7d6c02bc3d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1wVBLK1Xuc", "openreview": "https://openreview.net/forum?id=1wVBLK1Xuc", "poster": "/media/PosterPDFs/NeurIPS%202022/54729.png?t=1669091860.8983514", "slides": "https://nips.cc/virtual/2022/poster/54729", "video": "https://nips.cc/virtual/2022/poster/54729", "author_site": "Eric Yu, Zhizhen Qin, Min Kyung Lee, Sicun Gao", "tldr": "We use policy optimization with advantage regularization to improve long-term fairness of decision-making policies. ", "abstract": "Long-term fairness is an important factor of consideration in designing and deploying learning-based decision systems in high-stake decision-making contexts. Recent work has proposed the use of Markov Decision Processes (MDPs) to formulate decision-making with long-term fairness requirements in dynamically changing environments, and demonstrated major challenges in directly deploying heuristic and rule-based policies that worked well in static environments. We show that policy optimization methods from deep reinforcement learning can be used to find strictly better decision policies that can often achieve both higher overall utility and less violation of the fairness requirements, compared to previously-known strategies. In particular, we propose new methods for imposing fairness requirements in policy optimization by regularizing the advantage evaluation of different actions. Our proposed methods make it easy to impose fairness constraints without reward engineering or sacrificing training efficiency. We perform detailed analyses in three established case studies, including attention allocation in incident monitoring, bank loan approval, and vaccine distribution in population networks. ", "keywords": "fairness;reinforcement learning;policy optimization;algorithmic decision making", "primary_area": "", "supplementary_material": "/attachment/16dc4b068e915f0ef1f818960150c2b1b0295101.pdf", "author": "Eric Yang Yu;Zhizhen Qin;Min Kyung Lee;Sicun Gao", "authorids": "~Eric_Yang_Yu1;~Zhizhen_Qin1;minkyung.lee@austin.utexas.edu;~Sicun_Gao1", "gender": "M;M;;M", "homepage": "https://ericyangyu.github.io/;https://zhizhenqin.github.io;;", "dblp": "331/8331;294/2353;;22/8296", "google_scholar": "6ebcOw8AAAAJ;4OMmbNwAAAAJ;;", "orcid": ";;;", "linkedin": "eric-yu-engineer/;zhizhenqin/;;", "or_profile": "~Eric_Yang_Yu1;~Zhizhen_Qin1;minkyung.lee@austin.utexas.edu;~Sicun_Gao1", "aff": "University of California, San Diego;University of California, San Diego;;", "aff_domain": "ucsd.edu;ucsd.edu;;", "position": "Undergrad student;PhD student;;", "bibtex": "@inproceedings{\nyu2022policy,\ntitle={Policy Optimization with Advantage Regularization for Long-Term Fairness in Decision Systems},\nauthor={Eric Yang Yu and Zhizhen Qin and Min Kyung Lee and Sicun Gao},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1wVBLK1Xuc}\n}", "github": "", "project": "", "reviewers": "sDf6;MEKu;pkLZ;oAdk", "pdf_size": 502070, "rating": "5;5;6;6", "confidence": "4;3;3;3", "soundness": "3;2;4;3", "novelty": "2;3;4;3", "presentation": "3;3;4;3", "contribution": "2;3;4;3", "wc_summary": "65;30;50;28", "wc_strengths_and_weaknesses": "57;126;234;136", "wc_questions": "30;189;79;42", "wc_limitations": "14;31;1;15", "wc_review": "166;376;364;221", "wc_reply_reviewers": "16;44;59;10", "wc_reply_authors": "596;693;1298;483", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 43.25, 15.22128443988877 ], "wc_strengths_and_weaknesses_avg": [ 138.25, 63.0966520506437 ], "wc_questions_avg": [ 85.0, 62.701674618785106 ], "wc_limitations_avg": [ 15.25, 10.638961415476606 ], "wc_review_avg": [ 281.75, 90.46649932433553 ], "wc_reply_reviewers_avg": [ 32.25, 20.07952937695503 ], "wc_reply_authors_avg": [ 767.5, 315.17177855893124 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14223207610228521971&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 8, "email": "ucsd.edu;ucsd.edu;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimal Query Complexities for Dynamic Trace Estimation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53406", "id": "1wz-ksUupt2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/e3abc125ecacb71786cefb9f67b08c5d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1wz-ksUupt2", "openreview": "https://openreview.net/forum?id=1wz-ksUupt2", "poster": "/media/PosterPDFs/NeurIPS%202022/7241bd19bb709da0f46807bde88aed25.png?t=1666656422.5703864", "slides": "https://nips.cc/virtual/2022/poster/53406", "video": "https://nips.cc/virtual/2022/poster/53406", "author_site": "David Woodruff, Fred Zhang, Richard Zhang", "tldr": "We give tight bounds for implicity trace estimation in a dynamic setting. ", "abstract": "We consider the problem of minimizing the number of matrix-vector queries needed for accurate trace estimation in the dynamic setting where our underlying matrix is changing slowly, such as during an optimization process. Specifically, for any $m$ matrices $\\mathbf{A}_1,...,\\mathbf{A}_m$ with consecutive differences bounded in Schatten-$1$ norm by $\\alpha$, we provide a novel binary tree summation procedure that simultaneously estimates all $m$ traces up to $\\epsilon$ error with $\\delta$ failure probability with an optimal query complexity of $\\widetilde{O}(m \\alpha\\sqrt{\\log(1/\\delta)}/\\epsilon + m\\log(1/\\delta))$, improving the dependence on both $\\alpha$ and $\\delta$ from Dharangutte and Musco (NeurIPS, 2021). Our procedure works without additional norm bounds on $\\mathbf{A}_i$ and can be generalized to a bound for the $p$-th Schatten norm for $p \\in [1,2]$, giving a complexity of $\\widetilde{O}(m \\alpha(\\sqrt{\\log(1/\\delta)}/\\epsilon)^p +m \\log(1/\\delta))$. By using novel reductions to communication complexity and information-theoretic analyses of Gaussian matrices, we provide matching lower bounds for static and dynamic trace estimation in all relevant parameters, including the failure probability. Our lower bounds (1) give the first tight bounds for Hutchinson's estimator in the matrix-vector product model with Frobenius norm error {\\it even in the static setting}, and (2) are the first unconditional lower bounds for dynamic trace estimation, resolving open questions of prior work.", "keywords": "trace estimation;numerical linear algebra;query complexity lower bound", "primary_area": "", "supplementary_material": "/attachment/a8d527e8c7587809334593f1bb354d791b63c542.pdf", "author": "David Woodruff;Fred Zhang;Qiuyi Zhang", "authorids": "~David_Woodruff1;~Fred_Zhang1;~Qiuyi_Zhang1", "gender": "M;M;M", "homepage": "http://www.cs.cmu.edu/~dwoodruf/;http://fredzhang.me/;https://qiuyiz.github.io", "dblp": "w/DPWoodruff;232/9071;133/8559", "google_scholar": "https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;guJ_kBQAAAAJ;mE11hO8AAAAJ", "orcid": ";;", "linkedin": ";fred-zhang-0/;", "or_profile": "~David_Woodruff1;~Fred_Zhang1;~Qiuyi_Zhang1", "aff": "Carnegie Mellon University;University of California, Berkeley;Google", "aff_domain": "cmu.edu;berkeley.edu;google.com", "position": "Associate Professor;PhD student;Researcher", "bibtex": "@inproceedings{\nwoodruff2022optimal,\ntitle={Optimal Query Complexities for Dynamic Trace Estimation},\nauthor={David Woodruff and Fred Zhang and Qiuyi Zhang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1wz-ksUupt2}\n}", "github": "", "project": "", "reviewers": "5gFw;2eEg;dsKZ", "pdf_size": 628827, "rating": "7;7;7", "confidence": "4;4;3", "soundness": "4;4;3", "novelty": "3;3;4", "presentation": "3;2;3", "contribution": "3;3;4", "wc_summary": "112;349;211", "wc_strengths_and_weaknesses": "487;402;254", "wc_questions": "12;151;1", "wc_limitations": "30;1;19", "wc_review": "641;903;485", "wc_reply_reviewers": "66;57;55", "wc_reply_authors": "515;434;36", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 224.0, 97.19053451854249 ], "wc_strengths_and_weaknesses_avg": [ 381.0, 96.2739147779224 ], "wc_questions_avg": [ 54.666666666666664, 68.26582030725349 ], "wc_limitations_avg": [ 16.666666666666668, 11.953614051360738 ], "wc_review_avg": [ 676.3333333333334, 172.46706609926687 ], "wc_reply_reviewers_avg": [ 59.333333333333336, 4.784233364802441 ], "wc_reply_authors_avg": [ 328.3333333333333, 209.33917189310006 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4329621469764637956&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": "cmu.edu;berkeley.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "CMU;UC Berkeley;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Local Spatiotemporal Representation Learning for Longitudinally-consistent Neuroimage Analysis", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54733", "id": "1xqE9fRZch5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/57da66da25d0ce77e0129b246f358851-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=1xqE9fRZch5", "openreview": "https://openreview.net/forum?id=1xqE9fRZch5", "poster": "/media/PosterPDFs/NeurIPS%202022/54733.png?t=1669519073.2957382", "slides": "https://nips.cc/virtual/2022/poster/54733", "video": "https://nips.cc/virtual/2022/poster/54733", "author_site": "Mengwei Ren, Neel Dey, Martin Styner, Kelly Botteron, Guido Gerig", "tldr": "Given longitudinal neuroimages with scarce annotation, this paper develops a self-supervised spatiotemporal representation learning method and a consistency-regularization term for image-to-image networks.", "abstract": "Recent self-supervised advances in medical computer vision exploit the global and local anatomical self-similarity for pretraining prior to downstream tasks such as segmentation. However, current methods assume i.i.d. image acquisition, which is invalid in clinical study designs where follow-up longitudinal scans track subject-specific temporal changes. Further, existing self-supervised methods for medically-relevant image-to-image architectures exploit only spatial or temporal self-similarity and do so via a loss applied only at a single image-scale, with naive multi-scale spatiotemporal extensions collapsing to degenerate solutions. To these ends, this paper makes two contributions: (1) It presents a local and multi-scale spatiotemporal representation learning method for image-to-image architectures trained on longitudinal images. It exploits the spatiotemporal self-similarity of learned multi-scale intra-subject image features for pretraining and develops several feature-wise regularizations that avoid degenerate representations; (2) During finetuning, it proposes a surprisingly simple self-supervised segmentation consistency regularization to exploit intra-subject correlation. Benchmarked across various segmentation tasks, the proposed framework outperforms both well-tuned randomly-initialized baselines and current self-supervised techniques designed for both i.i.d. and longitudinal datasets. These improvements are demonstrated across both longitudinal neurodegenerative adult MRI and developing infant brain MRI and yield both higher performance and longitudinal consistency.", "keywords": "neuroimaging;medical image analysis;spatiotemporal representation learning;longitudinal learning;medical image segmentation", "primary_area": "", "supplementary_material": "/attachment/bc2332a5b7ce0c1933c293b6a5849040d6f0358b.pdf", "author": "Mengwei Ren;Neel Dey;Martin Andreas Styner;Kelly Botteron;Guido Gerig", "authorids": "~Mengwei_Ren1;~Neel_Dey1;~Martin_Andreas_Styner1;~Kelly_Botteron1;~Guido_Gerig1", "gender": "F;M;M;;M", "homepage": "https://www.mengweiren.com/;https://www.neeldey.com/;http://www.cs.unc.edu/~styner/;;http://engineering.nyu.edu/people/guido-gerig/", "dblp": "210/2614;239/1845;s/MAStyner;;https://dblp.uni-trier.de/pid/g/GuidoGerig", "google_scholar": "https://scholar.google.com/citations?hl=en;yEmcuHcAAAAJ;waEzpjgAAAAJ;;https://scholar.google.com.tw/citations?user=P5CovF0AAAAJ", "orcid": ";0000-0003-1427-6406;0000-0002-8747-5118;;", "linkedin": ";neel-dey/;martinstyner/;;", "or_profile": "~Mengwei_Ren1;~Neel_Dey1;~Martin_Andreas_Styner1;~Kelly_Botteron1;~Guido_Gerig1", "aff": "New York University;New York University;University of North Carolina, Chapel Hill;;New York University", "aff_domain": "nyu.edu;nyu.edu;unc.edu;;nyu.edu", "position": "PhD student;PhD student;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nren2022local,\ntitle={Local Spatiotemporal Representation Learning for Longitudinally-consistent Neuroimage Analysis},\nauthor={Mengwei Ren and Neel Dey and Martin Andreas Styner and Kelly Botteron and Guido Gerig},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=1xqE9fRZch5}\n}", "github": "", "project": "", "reviewers": "de4N;r9Zh;xiZS", "pdf_size": 5269035, "rating": "7;7;7", "confidence": "4;4;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "contribution": "3;3;3", "wc_summary": "138;144;78", "wc_strengths_and_weaknesses": "139;48;91", "wc_questions": "48;53;11", "wc_limitations": "6;7;2", "wc_review": "331;252;182", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1424;600;135", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 120.0, 29.79932885150268 ], "wc_strengths_and_weaknesses_avg": [ 92.66666666666667, 37.16928241916375 ], "wc_questions_avg": [ 37.333333333333336, 18.732028424302822 ], "wc_limitations_avg": [ 5.0, 2.160246899469287 ], "wc_review_avg": [ 255.0, 60.86597297888753 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 719.6666666666666, 532.9917656232808 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8437472979024832790&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "email": "nyu.edu;nyu.edu;unc.edu;;nyu.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "New York University;University of North Carolina", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.unc.edu", "aff_unique_abbr": "NYU;UNC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Finding Correlated Equilibrium of Constrained Markov Game: A Primal-Dual Approach", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53989", "id": "2-CflpDkezH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a3f8f584febcc88ed8cdeb30b096db34-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2-CflpDkezH", "openreview": "https://openreview.net/forum?id=2-CflpDkezH", "poster": "/media/PosterPDFs/NeurIPS%202022/15f99f2165aa8c86c9dface16fefd281.png?t=1666054835.8094454", "slides": "https://nips.cc/virtual/2022/poster/53989", "video": "https://nips.cc/virtual/2022/poster/53989", "author_site": "Ziyi Chen, Shaocong Ma, Yi Zhou", "tldr": "We proposed correlated equilibrium (CE) for constrained Markov game and developed the first primal-dual algorithm with non-asymptotic convergence to CE.", "abstract": "Constrained Markov game is a fundamental problem that covers many applications, where multiple players compete with each other under behavioral constraints. The existing literature has proved the existence of Nash equilibrium for constrained Markov games, which turns out to be PPAD-complete and cannot be computed in polynomial time. In this work, we propose a surrogate notion of correlated equilibrium (CE) for constrained Markov games that can be computed in polynomial time, and study its fundamental properties. We show that the modification structure of CE of constrained Markov games is fundamentally different from that of unconstrained Markov games. Moreover, we prove that the corresponding Lagrangian function has zero duality gap. Based on this result, we develop the first primal-dual algorithm that provably converges to CE of constrained Markov games. In particular, we prove that both the duality gap and the constraint violation of the output policy converge at the rate $\\mathcal{O}(\\frac{1}{\\sqrt{T}})$. Moreover, when adopting the V-learning algorithm as the subroutine in the primal update, our algorithm achieves an approximate CE with $\\epsilon$ duality gap with the sample complexity $\\mathcal{O}(H^9|\\mathcal{S}||\\mathcal{A}|^{2} \\epsilon^{-4})$.", "keywords": "constrained Markov game;correlated equilibrium;strong duality;reinforcement learning;primal-dual algorithm", "primary_area": "", "supplementary_material": "/attachment/4fe890242f74c4ccb2e6950c944b10230c50bc77.pdf", "author": "Ziyi Chen;Shaocong Ma;Yi Zhou", "authorids": "~Ziyi_Chen2;~Shaocong_Ma1;~Yi_Zhou2", "gender": "M;M;M", "homepage": ";https://mshaocong.github.io/;https://sites.google.com/site/yizhouhomepage/home", "dblp": "37/1439-2;270/3742;", "google_scholar": "zjSBVOIAAAAJ;;4fK8bYIAAAAJ", "orcid": ";;", "linkedin": "ziyi-chen-84616184/;;", "or_profile": "~Ziyi_Chen2;~Shaocong_Ma1;~Yi_Zhou2", "aff": "University of Utah;Lawrence Livermore National Labs;University of Utah", "aff_domain": "utah.edu;llnl.gov;utah.edu", "position": "PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nchen2022finding,\ntitle={Finding Correlated Equilibrium of Constrained Markov Game: A Primal-Dual Approach},\nauthor={Ziyi Chen and Shaocong Ma and Yi Zhou},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2-CflpDkezH}\n}", "github": "", "project": "", "reviewers": "jkm3;zKK3;z16W;ybg9;ddwR", "pdf_size": 304304, "rating": "6;6;6;6;7", "confidence": "2;3;4;2;4", "soundness": "3;4;3;3;4", "novelty": "2;2;3;3;3", "presentation": "3;3;4;3;4", "contribution": "2;2;3;3;3", "wc_summary": "72;52;58;69;156", "wc_strengths_and_weaknesses": "145;363;96;108;42", "wc_questions": "236;73;257;21;29", "wc_limitations": "1;30;3;9;4", "wc_review": "454;518;414;207;231", "wc_reply_reviewers": "147;0;195;0;36", "wc_reply_authors": "488;1278;1549;240;309", "reply_reviewers": "1;0;2;0;1", "reply_authors": "2;2;4;1;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "contribution_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 81.4, 37.997894678521334 ], "wc_strengths_and_weaknesses_avg": [ 150.8, 111.11687540603363 ], "wc_questions_avg": [ 123.2, 102.43515021710077 ], "wc_limitations_avg": [ 9.4, 10.63202708800161 ], "wc_review_avg": [ 364.8, 123.81502332108168 ], "wc_reply_reviewers_avg": [ 75.6, 80.44028841320747 ], "wc_reply_authors_avg": [ 772.8, 536.2482261042921 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5590169943749475, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13134081368196028248&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "utah.edu;llnl.gov;utah.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Utah;Lawrence Livermore National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.utah.edu;https://www.llnl.gov", "aff_unique_abbr": "Utah;LLNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fully Convolutional One-Stage 3D Object Detection on LiDAR Range Images", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55239", "id": "2-REuflJDT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/e1f418450107c4a0ddc16d008d131573-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2-REuflJDT", "openreview": "https://openreview.net/forum?id=2-REuflJDT", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55239", "video": "https://nips.cc/virtual/2022/poster/55239", "author_site": "Zhi Tian, Xiangxiang Chu, Xiaoming Wang, Xiaolin Wei, Chunhua Shen", "tldr": "A simple yet effective fully convolutional one-stage 3D object detector on LiDAR range images.", "abstract": "We present a simple yet effective fully convolutional one-stage 3D object detector for LiDAR point clouds of autonomous driving scenes, termed FCOS-LiDAR. Unlike the dominant methods that use the bird-eye view (BEV), our proposed detector detects objects from the range view (RV, a.k.a. range image) of the LiDAR points. Due to the range view's compactness and compatibility with the LiDAR sensors' sampling process on self-driving cars, the range view-based object detector can be realized by solely exploiting the vanilla 2D convolutions, departing from the BEV-based methods which often involve complicated voxelization operations and sparse convolutions.\n \nFor the first time, we show that an RV-based 3D detector with standard 2D convolutions alone can achieve comparable performance to state-of-the-art BEV-based detectors while being significantly faster and simpler. More importantly, almost all previous range view-based detectors only focus on single-frame point clouds since it is challenging to fuse multi-frame point clouds into a single range view. In this work, we tackle this challenging issue with a novel range view projection mechanism, and for the first time demonstrate the benefits of fusing multi-frame point clouds for a range-view based detector. Extensive experiments on nuScenes show the superiority of our proposed method and we believe that our work can be strong evidence that an RV-based 3D detector can compare favourably with the current mainstream BEV-based detectors. Code will be made publicly available.", "keywords": "3D object detection;LiDAR point clould;autonomous driving", "primary_area": "", "supplementary_material": "/attachment/2e7511ec2149360ed11ac0d9488eb89a9cbe8723.pdf", "author": "Zhi Tian;Xiangxiang Chu;Xiaoming Wang;Xiaolin Wei;Chunhua Shen", "authorids": "~Zhi_Tian2;~Xiangxiang_Chu1;~Xiaoming_Wang4;~Xiaolin_Wei1;~Chunhua_Shen2", "gender": "M;M;F;;", "homepage": ";https://cxxgtxy.github.io/;https://github.com/wangxiaoming1886;;", "dblp": ";207/8002;;;", "google_scholar": "xSF3BBoAAAAJ;jn21pUsAAAAJ;;;", "orcid": ";0000-0003-2548-0605;;;", "linkedin": ";;;;", "or_profile": "~Zhi_Tian2;~Xiangxiang_Chu1;~Xiaoming_Wang4;~Xiaolin_Wei1;~Chunhua_Shen2", "aff": "Meituan Inc.;MeiTuan;Northwest Polytechnical University Xi'an;;", "aff_domain": "meituan.com;meituan.com;nwpu.edu.cn;;", "position": "Researcher;Senior Engineer;PhD student;;", "bibtex": "@inproceedings{\ntian2022fully,\ntitle={Fully Convolutional One-Stage 3D Object Detection on Li{DAR} Range Images},\nauthor={Zhi Tian and Xiangxiang Chu and Xiaoming Wang and Xiaolin Wei and Chunhua Shen},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2-REuflJDT}\n}", "github": "", "project": "", "reviewers": "Uzj9;6wwh;pS8M", "pdf_size": 992643, "rating": "4;6;7", "confidence": "3;4;4", "soundness": "2;3;4", "novelty": "2;2;3", "presentation": "3;3;4", "contribution": "2;2;3", "wc_summary": "68;45;78", "wc_strengths_and_weaknesses": "188;112;157", "wc_questions": "43;89;126", "wc_limitations": "1;16;25", "wc_review": "300;262;386", "wc_reply_reviewers": "0;0;106", "wc_reply_authors": "371;804;584", "reply_reviewers": "0;0;2", "reply_authors": "2;2;3", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 13.816254517375139 ], "wc_strengths_and_weaknesses_avg": [ 152.33333333333334, 31.201851796897497 ], "wc_questions_avg": [ 86.0, 33.95094500402996 ], "wc_limitations_avg": [ 14.0, 9.899494936611665 ], "wc_review_avg": [ 316.0, 51.87163643713842 ], "wc_reply_reviewers_avg": [ 35.333333333333336, 49.968879203849355 ], "wc_reply_authors_avg": [ 586.3333333333334, 176.7792094362029 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1182214878703167432&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "meituan.com;meituan.com;nwpu.edu.cn;;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Meituan Inc.;Meituan;Northwest Polytechnical University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.meituan.com;https://www.meituan.com;http://www.nwpu.edu.cn", "aff_unique_abbr": "Meituan;MeiTuan;NWPU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Parameter-free Dynamic Graph Embedding for Link Prediction", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54304", "id": "215KQFiU65l", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b14d7175755b180dc2163e15e3110cb6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=215KQFiU65l", "openreview": "https://openreview.net/forum?id=215KQFiU65l", "poster": "/media/PosterPDFs/NeurIPS%202022/57e4f98889f96942ec0691d6a5995dad.png?t=1666517854.2047794", "slides": "https://nips.cc/virtual/2022/poster/54304", "video": "https://nips.cc/virtual/2022/poster/54304", "author_site": "Jiahao Liu, Dongsheng Li, Hansu Gu, Tun Lu, Peng Zhang, Ning Gu", "tldr": "", "abstract": "Dynamic interaction graphs have been widely adopted to model the evolution of user-item interactions over time. There are two crucial factors when modelling user preferences for link prediction in dynamic interaction graphs: 1) collaborative relationship among users and 2) user personalized interaction patterns. Existing methods often implicitly consider these two factors together, which may lead to noisy user modelling when the two factors diverge. In addition, they usually require time-consuming parameter learning with back-propagation, which is prohibitive for real-time user preference modelling. To this end, this paper proposes FreeGEM, a parameter-free dynamic graph embedding method for link prediction. Firstly, to take advantage of the collaborative relationships, we propose an incremental graph embedding engine to obtain user/item embeddings, which is an Online-Monitor-Offline architecture consisting of an Online module to approximately embed users/items over time, a Monitor module to estimate the approximation error in real time and an Offline module to calibrate the user/item embeddings when the online approximation errors exceed a threshold. Meanwhile, we integrate attribute information into the model, which enables FreeGEM to better model users belonging to some under represented groups. Secondly, we design a personalized dynamic interaction pattern modeller, which combines dynamic time decay with attention mechanism to model user short-term interests. Experimental results on two link prediction tasks show that FreeGEM can outperform the state-of-the-art methods in accuracy while achieving over 36X improvement in efficiency. All code and datasets can be found in https://github.com/FudanCISL/FreeGEM.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/443aaed571da79d4c3f117f363ced4cb01a2795d.pdf", "author": "Jiahao Liu;Dongsheng Li;Hansu Gu;Tun Lu;Peng Zhang;Ning Gu", "authorids": "~Jiahao_Liu5;~Dongsheng_Li2;~Hansu_Gu1;~Tun_Lu1;~Peng_Zhang31;~Ning_Gu2", "gender": "M;M;;M;M;M", "homepage": ";http://recmind.cn;;;https://cscw.fudan.edu.cn/pengzhang/list.htm;https://cscw.fudan.edu.cn/", "dblp": ";254/0830-2.html;00/7447;41/2472;;", "google_scholar": ";VNg5rA8AAAAJ;;;;https://scholar.google.com.au/citations?user=AUnPpaUAAAAJ", "orcid": "0000-0002-5654-5902;0000-0003-3103-8442;;0000-0002-6633-4826;;0000-0002-2915-974X", "linkedin": ";;;;;", "or_profile": "~Jiahao_Liu5;~Dongsheng_Li2;~Hansu_Gu1;~Tun_Lu1;~Peng_Zhang31;~Ning_Gu2", "aff": "Fudan University;Microsoft Research Asia;Amazon;Fudan University;;Fudan University", "aff_domain": "fudan.edu.cn;microsoft.com;amazon.com;fudan.edu.cn;;fudan.edu.cn", "position": "PhD student;Principal Researcher;Researcher;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nliu2022parameterfree,\ntitle={Parameter-free Dynamic Graph Embedding for Link Prediction},\nauthor={Jiahao Liu and Dongsheng Li and Hansu Gu and Tun Lu and Peng Zhang and Ning Gu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=215KQFiU65l}\n}", "github": "", "project": "", "reviewers": "uRAL;coS6;vE5r", "pdf_size": 657216, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "contribution": "2;3;3", "wc_summary": "26;151;59", "wc_strengths_and_weaknesses": "131;82;154", "wc_questions": "49;141;11", "wc_limitations": "104;14;1", "wc_review": "310;388;225", "wc_reply_reviewers": "2;11;0", "wc_reply_authors": "1322;1078;1473", "reply_reviewers": "1;1;0", "reply_authors": "3;3;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.66666666666667, 52.89192334899115 ], "wc_strengths_and_weaknesses_avg": [ 122.33333333333333, 30.02591473303612 ], "wc_questions_avg": [ 67.0, 54.57716250105594 ], "wc_limitations_avg": [ 39.666666666666664, 45.79907810814052 ], "wc_review_avg": [ 307.6666666666667, 66.56492236072157 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 4.784233364802441 ], "wc_reply_authors_avg": [ 1291.0, 162.74110318744513 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=737985874382634688&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 7, "email": "fudan.edu.cn;microsoft.com;amazon.com;fudan.edu.cn;;fudan.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Fudan University;Microsoft;Amazon", "aff_unique_dep": ";Research;Amazon.com, Inc.", "aff_unique_url": "https://www.fudan.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.amazon.com", "aff_unique_abbr": "Fudan;MSR Asia;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Constrained Update Projection Approach to Safe Policy Optimization", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54057", "id": "22hMrSbQXzt", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3ba7560b4c3e66d760fbdd472cf4a5a9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=22hMrSbQXzt", "openreview": "https://openreview.net/forum?id=22hMrSbQXzt", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54057", "video": "https://nips.cc/virtual/2022/poster/54057", "author_site": "Long Yang, Jiaming Ji, Juntao Dai, Linrui Zhang, Binbin Zhou, Pengfei Li, Yaodong Yang, Gang Pan", "tldr": "We propose the constrained update projection (CUP) algorithm with a theoretical safety guarantee, and provide a practical implementation of CUP that does not depend on any convex approximation to adapt high-dimensional safe reinforcement learning.", "abstract": "Safe reinforcement learning (RL) studies problems where an intelligent agent has to not only maximize reward but also avoid exploring unsafe areas. In this study, we propose CUP, a novel policy optimization method based on Constrained Update Projection framework that enjoys rigorous safety guarantee. Central to our CUP development is the newly proposed surrogate functions along with the performance bound. Compared to previous safe reinforcement learning meth- ods, CUP enjoys the benefits of 1) CUP generalizes the surrogate functions to generalized advantage estimator (GAE), leading to strong empirical performance. 2) CUP unifies performance bounds, providing a better understanding and in- terpretability for some existing algorithms; 3) CUP provides a non-convex im- plementation via only first-order optimizers, which does not require any strong approximation on the convexity of the objectives. To validate our CUP method, we compared CUP against a comprehensive list of safe RL baselines on a wide range of tasks. Experiments show the effectiveness of CUP both in terms of reward and safety constraint satisfaction. We have opened the source code of CUP at https://github.com/zmsn-2077/CUP-safe-rl.", "keywords": "Reinforcement Learning;Constrained MDP;Deep Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/8f528c3853cefe8330f713e04d9353ce1cd737aa.pdf", "author": "Long Yang;Jiaming Ji;Juntao Dai;Linrui Zhang;Binbin Zhou;Pengfei Li;Yaodong Yang;Gang Pan", "authorids": "~Long_Yang4;~Jiaming_Ji1;~Juntao_Dai1;~Linrui_Zhang1;~Binbin_Zhou1;~Pengfei_Li4;~Yaodong_Yang1;~Gang_Pan1", "gender": "M;M;M;M;F;M;M;", "homepage": "https://person.zju.edu.cn/longyang;https://github.com/jijiaming-bit;https://person.zju.edu.cn/jtdai;;;https://pfl-cs.github.io;https://www.yangyaodong.com;", "dblp": ";;;;71/8605.html;;170/1496-1;", "google_scholar": ";;;;6dVBWwcAAAAJ;;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;", "orcid": ";;;;0000-0002-9141-8474;;0000-0001-8132-5613;", "linkedin": ";;;%E9%BA%9F%E7%9D%BF-%E5%BC%A0-bb5312222/;;;yaodong-yang;", "or_profile": "~Long_Yang4;~Jiaming_Ji1;~Juntao_Dai1;~Linrui_Zhang1;~Binbin_Zhou1;~Pengfei_Li4;~Yaodong_Yang1;~Gang_Pan1", "aff": "Peking University;Zhejiang University;Zhejiang University;Tsinghua University;Zhejiang University City College;Alibaba Group;King's College London;", "aff_domain": "pku.edu.cn;zju.edu.cn;zju.edu.cn;mails.tsinghua.edu.cn;zucc.edu.cn;alibaba-inc.com;kcl.ac.uk;", "position": "Postdoc;MS student;PhD student;MS student;Associate Professor;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nyang2022constrained,\ntitle={Constrained Update Projection Approach to Safe Policy Optimization},\nauthor={Long Yang and Jiaming Ji and Juntao Dai and Linrui Zhang and Binbin Zhou and Pengfei Li and Yaodong Yang and Gang Pan},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=22hMrSbQXzt}\n}", "github": "", "project": "", "reviewers": "tK1z;iQjH;px3X;8pzD", "pdf_size": 2725556, "rating": "5;5;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;2;4;3", "contribution": "3;2;3;3", "wc_summary": "54;57;33;87", "wc_strengths_and_weaknesses": "294;177;200;270", "wc_questions": "7;20;7;48", "wc_limitations": "18;1;5;32", "wc_review": "373;255;245;437", "wc_reply_reviewers": "95;13;16;47", "wc_reply_authors": "1211;1000;544;452", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;1;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.75, 19.253246479490155 ], "wc_strengths_and_weaknesses_avg": [ 235.25, 48.204641892664235 ], "wc_questions_avg": [ 20.5, 16.740669042783207 ], "wc_limitations_avg": [ 14.0, 12.144957801491119 ], "wc_review_avg": [ 327.5, 80.81305587589173 ], "wc_reply_reviewers_avg": [ 42.75, 32.97252644247935 ], "wc_reply_authors_avg": [ 801.75, 314.4633325206613 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11474083293288692708&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "pku.edu.cn;zju.edu.cn;zju.edu.cn;mails.tsinghua.edu.cn;zucc.edu.cn;alibaba-inc.com;kcl.ac.uk;", "author_num": 8, "aff_unique_index": "0;1;1;2;3;4;5", "aff_unique_norm": "Peking University;Zhejiang University;Tsinghua University;Zhejiang University City College;Alibaba Group;King's College London", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.zju.edu.cn;https://www.tsinghua.edu.cn;http://www.zucc.edu.cn;https://www.alibaba.com;https://www.kcl.ac.uk", "aff_unique_abbr": "Peking U;ZJU;THU;;Alibaba;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Few-Shot Non-Parametric Learning with Deep Latent Variable Model", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53890", "id": "24fiAU_9vT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a92519f525c00085095fa41c5c46cdb5-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=24fiAU_9vT", "openreview": "https://openreview.net/forum?id=24fiAU_9vT", "poster": "/media/PosterPDFs/NeurIPS%202022/53890.png?t=1669279223.2309074", "slides": "https://nips.cc/virtual/2022/poster/53890", "video": "https://nips.cc/virtual/2022/poster/53890", "author_site": "Zhiying Jiang, Yiqin Dai, Ji Xin, Ming Li, Jimmy Lin", "tldr": "neural compressors for few-shot learning", "abstract": "Most real-world problems that machine learning algorithms are expected to solve face the situation with (1) unknown data distribution; (2) little domain-specific knowledge; and (3) datasets with limited annotation. We propose Non-Parametric learning by Compression with Latent Variables (NPC-LV), a learning framework for any dataset with abundant unlabeled data but very few labeled ones. By only training a generative model in an unsupervised way, the framework utilizes the data distribution to build a compressor. Using a compressor-based distance metric derived from Kolmogorov complexity, together with few labeled data, NPC-LV classifies without further training. We show that NPC-LV outperforms supervised methods on all three datasets on image classification in the low data regime and even outperforms semi-supervised learning methods on CIFAR-10. We demonstrate how and when negative evidence lowerbound (nELBO) can be used as an approximate compressed length for classification. By revealing the correlation between compression rate and classification accuracy, we illustrate that under NPC-LV how the improvement of generative models can enhance downstream classification accuracy.", "keywords": "Data Compression;Kolmogorov Complexity;Few-Shot Learning;Generative Model", "primary_area": "", "supplementary_material": "/attachment/e146cb002af6e6cd2b2d3e64b0c6af79548a0914.zip", "author": "Zhiying Jiang;Yiqin Dai;Ji Xin;Ming Li;Jimmy Lin", "authorids": "~Zhiying_Jiang2;phinodadai@gmail.com;~Ji_Xin1;mli@uwaterloo.ca;~Jimmy_Lin2", "gender": ";;;;", "homepage": "https://bazingagin.github.io/;;https://ji-xin.gitlab.io;;https://cs.uwaterloo.ca/~jimmylin/", "dblp": ";;218/7227;;00/7739", "google_scholar": "eJ5MnJ8AAAAJ;;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhiying_Jiang2;phinodadai@gmail.com;~Ji_Xin1;mli@uwaterloo.ca;~Jimmy_Lin2", "aff": "University of Waterloo;;University of Waterloo;;University of Waterloo", "aff_domain": "uwaterloo.ca;;uwaterloo.ca;;waterloo.ca", "position": "PhD student;;PhD student;;Full Professor", "bibtex": "@inproceedings{\njiang2022fewshot,\ntitle={Few-Shot Non-Parametric Learning with Deep Latent Variable Model},\nauthor={Zhiying Jiang and Yiqin Dai and Ji Xin and Ming Li and Jimmy Lin},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=24fiAU_9vT}\n}", "github": "", "project": "", "reviewers": "oRFK;679F;PPrq", "pdf_size": 510094, "rating": "6;7;9", "confidence": "4;3;3", "soundness": "3;3;4", "novelty": "3;2;4", "presentation": "3;3;4", "contribution": "3;2;4", "wc_summary": "51;96;76", "wc_strengths_and_weaknesses": "330;94;236", "wc_questions": "136;22;37", "wc_limitations": "1;11;34", "wc_review": "518;223;383", "wc_reply_reviewers": "23;0;7", "wc_reply_authors": "788;644;636", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 74.33333333333333, 18.408935028645434 ], "wc_strengths_and_weaknesses_avg": [ 220.0, 97.0085906848804 ], "wc_questions_avg": [ 65.0, 50.57667446560717 ], "wc_limitations_avg": [ 15.333333333333334, 13.816254517375135 ], "wc_review_avg": [ 374.6666666666667, 120.57731498457282 ], "wc_reply_reviewers_avg": [ 10.0, 9.626352718795768 ], "wc_reply_authors_avg": [ 689.3333333333334, 69.84427121978024 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5747089598767317661&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uwaterloo.ca;;uwaterloo.ca;;waterloo.ca", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "SecureFedYJ: a safe feature Gaussianization protocol for Federated Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54828", "id": "25XIE30VHZE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ed3c686f9cda57e56cc859402c775414-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=25XIE30VHZE", "openreview": "https://openreview.net/forum?id=25XIE30VHZE", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54828", "video": "https://nips.cc/virtual/2022/poster/54828", "author_site": "Tanguy Marchand, Boris Muzellec, Constance B\u00e9guier, Jean Ogier du Terrail, Mathieu Andreux", "tldr": "", "abstract": "The Yeo-Johnson (YJ) transformation is a standard parametrized per-feature unidimensional transformation often used to Gaussianize features in machine learning. In this paper, we investigate the problem of applying the YJ transformation in a cross-silo Federated Learning setting under privacy constraints. For the first time, we prove that the YJ negative log-likelihood is in fact convex, which allows us to optimize it with exponential search. We numerically show that the resulting algorithm is more stable than the state-of-the-art approach based on the Brent minimization method. Building on this simple algorithm and Secure Multiparty Computation routines, we propose SECUREFEDYJ, a federated algorithm that performs a pooled-equivalent YJ transformation without leaking more information than the final fitted parameters do. Quantitative experiments on real data demonstrate that, in addition to being secure, our approach reliably normalizes features across silos as well as if data were pooled, making it a viable approach for safe federated feature Gaussianization.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/84dea3158e9b37cdc064dab94ca7d4f1e18f9944.pdf", "author": "Tanguy Marchand;Boris Muzellec;Constance B\u00e9guier;Jean Du Terrail;Mathieu Andreux", "authorids": "~Tanguy_Marchand1;~Boris_Muzellec2;constancebeguier@gmail.com;~Jean_Du_Terrail1;~Mathieu_Andreux1", "gender": ";M;;;", "homepage": ";https://borismuzellec.github.io;;;", "dblp": ";https://dblp.org/pers/hd/m/Muzellec:Boris;;;", "google_scholar": "l7GidmgAAAAJ;https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tanguy_Marchand1;~Boris_Muzellec2;constancebeguier@gmail.com;~Jean_Du_Terrail1;~Mathieu_Andreux1", "aff": ";Owkin;;;", "aff_domain": ";owkin.com;;;", "position": ";Researcher;;;", "bibtex": "@inproceedings{\nmarchand2022securefedyj,\ntitle={SecureFed{YJ}: a safe feature Gaussianization protocol for Federated Learning},\nauthor={Tanguy Marchand and Boris Muzellec and Constance B{\\'e}guier and Jean Du Terrail and Mathieu Andreux},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=25XIE30VHZE}\n}", "github": "", "project": "", "reviewers": "W31E;Kq6t;5G9f;wXmX", "pdf_size": 792132, "rating": "4;5;6;6", "confidence": "4;3;4;2", "soundness": "3;3;4;4", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "contribution": "3;2;3;3", "wc_summary": "75;36;65;23", "wc_strengths_and_weaknesses": "243;288;238;227", "wc_questions": "79;27;42;1", "wc_limitations": "33;58;18;1", "wc_review": "430;409;363;252", "wc_reply_reviewers": "0;38;68;8", "wc_reply_authors": "1480;1291;710;210", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 49.75, 21.063890903629368 ], "wc_strengths_and_weaknesses_avg": [ 249.0, 23.24865587512534 ], "wc_questions_avg": [ 37.25, 28.216794644324857 ], "wc_limitations_avg": [ 27.5, 20.93442141545832 ], "wc_review_avg": [ 363.5, 68.78408246098802 ], "wc_reply_reviewers_avg": [ 28.5, 26.846787517317598 ], "wc_reply_authors_avg": [ 922.75, 499.85266579263134 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16800247351361186410&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";owkin.com;;;", "author_num": 5, "aff_unique_index": "0", "aff_unique_norm": "Owkin", "aff_unique_dep": "", "aff_unique_url": "https://www.owkin.io", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Follow-the-Perturbed-Leader for Adversarial Markov Decision Processes with Bandit Feedback", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54395", "id": "25XwID3wKsi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/4a5c76c63f83ea45fb136d62db6c7104-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=25XwID3wKsi", "openreview": "https://openreview.net/forum?id=25XwID3wKsi", "poster": "/media/PosterPDFs/NeurIPS%202022/1fd09c5f59a8ff35d499c0ee25a1d47e.png?t=1666854880.242098", "slides": "https://nips.cc/virtual/2022/poster/54395", "video": "https://nips.cc/virtual/2022/poster/54395", "author_site": "Yan Dai, Haipeng Luo, Liyu Chen", "tldr": "We consider Follow-the-Perturbed-Leader (FTPL) algorithms for Adversarial Markov Decision Processes (AMDPs) in episodic settings. We also extend them to delayed AMDPs as well as infinite-horizon communicating AMDPs.", "abstract": "We consider regret minimization for Adversarial Markov Decision Processes (AMDPs), where the loss functions are changing over time and adversarially chosen, and the learner only observes the losses for the visited state-action pairs (i.e., bandit feedback). While there has been a surge of studies on this problem using Online-Mirror-Descent (OMD) methods, very little is known about the Follow-the-Perturbed-Leader (FTPL) methods, which are usually computationally more efficient and also easier to implement since it only requires solving an offline planning problem. Motivated by this, we take a closer look at FTPL for learning AMDPs, starting from the standard episodic finite-horizon setting. We find some unique and intriguing difficulties in the analysis and propose a workaround to eventually show that FTPL is also able to achieve near-optimal regret bounds in this case. More importantly, we then find two significant applications: First, the analysis of FTPL turns out to be readily generalizable to delayed bandit feedback with order-optimal regret, while OMD methods exhibit extra difficulties (Jin et al., 2022). Second, using FTPL, we also develop the first no-regret algorithm for learning communicating AMDPs in the infinite-horizon setting with bandit feedback and stochastic transitions. Our algorithm is efficient assuming access to an offline planning oracle, while even for the easier full-information setting, the only existing algorithm (Chandrasekaran and Tewari, 2021) is computationally inefficient.", "keywords": "Reinforcement Learning;Follow-the-Perturbed-Leader;Adversarial Markov Decision Process;Online Learning", "primary_area": "", "supplementary_material": "/attachment/2779c5ea537293904af79b99b6a4357b254c9e97.pdf", "author": "Yan Dai;Haipeng Luo;Liyu Chen", "authorids": "~Yan_Dai1;~Haipeng_Luo1;~Liyu_Chen1", "gender": "M;M;M", "homepage": "https://yandaichn.github.io/;https://haipeng-luo.net/;https://lchenat.github.io/", "dblp": "132/2047-2;62/2576;", "google_scholar": "gkG4z3IAAAAJ;ct2hw4UAAAAJ;FmNqZAYAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yan_Dai1;~Haipeng_Luo1;~Liyu_Chen1", "aff": "Tsinghua University;University of Southern California;University of Southern California", "aff_domain": "tsinghua.edu.cn;usc.edu;usc.edu", "position": "Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ndai2022followtheperturbedleader,\ntitle={Follow-the-Perturbed-Leader for Adversarial Markov Decision Processes with Bandit Feedback},\nauthor={Yan Dai and Haipeng Luo and Liyu Chen},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=25XwID3wKsi}\n}", "github": "", "project": "", "reviewers": "Pgbo;Lks2;Yxms;xS2b", "pdf_size": 411163, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;4;4;4", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "contribution": "3;3;3;3", "wc_summary": "120;76;65;68", "wc_strengths_and_weaknesses": "581;163;241;103", "wc_questions": "266;51;29;90", "wc_limitations": "2;15;19;56", "wc_review": "969;305;354;317", "wc_reply_reviewers": "19;14;0;14", "wc_reply_authors": "489;134;135;605", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.25, 22.16275028059469 ], "wc_strengths_and_weaknesses_avg": [ 272.0, 184.98918887329606 ], "wc_questions_avg": [ 109.0, 93.23894036291918 ], "wc_limitations_avg": [ 23.0, 20.062402647738878 ], "wc_review_avg": [ 486.25, 279.30035356225386 ], "wc_reply_reviewers_avg": [ 11.75, 7.084313657652377 ], "wc_reply_authors_avg": [ 340.75, 210.28834370929835 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8788785147219415307&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;usc.edu;usc.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.usc.edu", "aff_unique_abbr": "THU;USC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Efficient Training of Low-Curvature Neural Networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54125", "id": "2B2xIJ299rx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a6ec568ede6584b20dccfb6c2e4f2b58-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2B2xIJ299rx", "openreview": "https://openreview.net/forum?id=2B2xIJ299rx", "poster": "/media/PosterPDFs/NeurIPS%202022/aff82e881075d9c1ec306f86ae15c833.png?t=1666999423.827189", "slides": "https://nips.cc/virtual/2022/poster/54125", "video": "https://nips.cc/virtual/2022/poster/54125", "author_site": "Suraj Srinivas, Kyle Matoba, Himabindu Lakkaraju, Fran\u00e7ois Fleuret", "tldr": "We propose a practical method to train neural networks such that they have a low curvature, without losing predictive accuracy.", "abstract": "Standard deep neural networks often have excess non-linearity, making them susceptible to issues \nsuch as low adversarial robustness and gradient instability. Common methods to address these \ndownstream issues, such as adversarial training, are expensive and often sacrifice predictive accuracy. \n\nIn this work, we address the core issue of excess non-linearity via curvature, and \ndemonstrate low-curvature neural networks (LCNNs) that obtain drastically lower curvature \nthan standard models while exhibiting similar predictive performance. This leads to improved \nrobustness and stable gradients, at a fraction of the cost of standard adversarial training. \nTo achieve this, we decompose overall model curvature in terms of curvatures and slopes of \nits constituent layers. To enable efficient curvature minimization of constituent layers, \nwe introduce two novel architectural components: first, a non-linearity called centered-softplus \nthat is a stable variant of the softplus non-linearity, and second, a Lipschitz-constrained \nbatch normalization layer.\n\nOur experiments show that LCNNs have lower curvature, more stable gradients and increased \noff-the-shelf adversarial robustness when compared to standard neural networks, all without \naffecting predictive performance. Our approach is easy to use and can be readily incorporated \ninto existing neural network architectures.", "keywords": "Deep neural networks;curvature;spectral norm;Lipschitz constant;robustness", "primary_area": "", "supplementary_material": "/attachment/691c3175a68958811b472dbf8c5f72244775b3bc.pdf", "author": "Suraj Srinivas;Kyle Matoba;Himabindu Lakkaraju;Fran\u00e7ois Fleuret", "authorids": "~Suraj_Srinivas1;~Kyle_Matoba1;~Himabindu_Lakkaraju1;~Fran\u00e7ois_Fleuret2", "gender": "M;;M;F", "homepage": "https://suraj-srinivas.github.io/;;https://fleuret.org/francois/;http://web.stanford.edu/~himalv", "dblp": "144/0584;https://dblp.uni-trier.de/pid/150/1860.html;90/5265;68/9376", "google_scholar": "https://scholar.google.co.in/citations?user=J2JWgKgAAAAJ;;https://scholar.google.ch/citations?user=Bj1tRlsAAAAJ;", "orcid": ";;0000-0001-9457-7393;", "linkedin": ";;francois-fleuret/;", "or_profile": "~Suraj_Srinivas1;~Kyle_Matoba1;~Francois_Fleuret1;~Hima_Lakkaraju1", "aff": "School of Engineering and Applied Sciences, Harvard University;Swiss Federal Institute of Technology Lausanne;University of Geneva;Harvard University", "aff_domain": "seas.harvard.edu;epfl.ch;unige.ch;harvard.edu", "position": "Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsrinivas2022efficient,\ntitle={Efficient Training of Low-Curvature Neural Networks},\nauthor={Suraj Srinivas and Kyle Matoba and Himabindu Lakkaraju and Fran{\\c{c}}ois Fleuret},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2B2xIJ299rx}\n}", "github": "", "project": "", "reviewers": "1uxf;J6Z1;o9LW;b1TK", "pdf_size": 382749, "rating": "4;4;6;7", "confidence": "4;3;5;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "contribution": "2;2;2;3", "wc_summary": "102;109;69;58", "wc_strengths_and_weaknesses": "325;591;390;238", "wc_questions": "256;14;27;739", "wc_limitations": "106;14;1;10", "wc_review": "789;728;487;1045", "wc_reply_reviewers": "121;728;61;377", "wc_reply_authors": "984;1315;411;429", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 21.5 ], "wc_strengths_and_weaknesses_avg": [ 386.0, 130.063446056146 ], "wc_questions_avg": [ 259.0, 293.36751694759937 ], "wc_limitations_avg": [ 32.75, 42.55217385751285 ], "wc_review_avg": [ 762.25, 198.4935452351033 ], "wc_reply_reviewers_avg": [ 321.75, 262.86058567232936 ], "wc_reply_authors_avg": [ 784.75, 383.11641507510484 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5443310539518174, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11440430154472409359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "seas.harvard.edu;epfl.ch;unige.ch;harvard.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Harvard University;Swiss Federal Institute of Technology Lausanne;University of Geneva", "aff_unique_dep": "School of Engineering and Applied Sciences;;", "aff_unique_url": "https://www.harvard.edu;https://www.epfl.ch;https://www.unige.ch", "aff_unique_abbr": "Harvard;EPFL;UNIGE", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Lausanne;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Learning and Covering Sums of Independent Random Variables with Unbounded Support", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54037", "id": "2Bus7sfjZh8", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/a10946e1f46e1ffc0daf37cb2abfdcad-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2Bus7sfjZh8", "openreview": "https://openreview.net/forum?id=2Bus7sfjZh8", "poster": "/media/PosterPDFs/NeurIPS%202022/54037.png?t=1669429565.3925054", "slides": "https://nips.cc/virtual/2022/poster/54037", "video": "https://nips.cc/virtual/2022/poster/54037", "author_site": "Alkis Kalavasis, Konstantinos Stavropoulos, Emmanouil Zampetakis", "tldr": "We study the problem of covering and learning sums of independent integer-valued random variables with infinite support.", "abstract": "We study the problem of covering and learning sums $X = X_1 + \\cdots + X_n$ of independent integer-valued random variables $X_i$ (SIIRVs) with infinite support. De et al. at FOCS 2018, showed that even when the collective support of $X_i$'s is of size $4$, the maximum value of the support necessarily appears in the sample complexity of learning $X$. In this work, we address two questions: (i) Are there general families of SIIRVs with infinite support that can be learned with sample complexity independent of both $n$ and the maximal element of the support? (ii) Are there general families of SIIRVs with infinite support that admit proper sparse covers in total variation distance? As for question (i), we provide a set of simple conditions that allow the infinitely supported SIIRV to be learned with complexity $ \\text{poly}(1/\\epsilon)$ bypassing the aforementioned lower bound. We further address question (ii) in the general setting where each variable $X_i$ has unimodal probability mass function and is a different member of some, possibly multi-parameter, exponential family $\\mathcal{E}$ that satisfies some structural properties. These properties allow $\\mathcal{E}$ to contain heavy tailed and non log-concave distributions. Moreover, we show that for every $\\epsilon > 0$, and every $k$-parameter family $\\mathcal{E}$ that satisfies some structural assumptions, there exists an algorithm with $\\widetilde{O}(k) \\cdot \\text{poly}(1/\\epsilon)$ samples that learns a sum of $n$ arbitrary members of $\\mathcal{E}$ within $\\epsilon$ in TV distance. The output of the learning algorithm is also a sum of random variables within the family $\\mathcal{E}$. En route, we prove that any discrete unimodal exponential family with bounded constant-degree central moments can be approximated by the family corresponding to a bounded subset of the initial (unbounded) parameter space.", "keywords": "Distribution Learning;Sums of Independent Random Variables;Covering;Density Estimation", "primary_area": "", "supplementary_material": "/attachment/48eacdc257c645ec3dc698f69acaadb96bd79f15.pdf", "author": "Alkis Kalavasis;Konstantinos Stavropoulos;Manolis Zampetakis", "authorids": "~Alkis_Kalavasis1;~Konstantinos_Stavropoulos1;~Manolis_Zampetakis2", "gender": "M;;M", "homepage": "https://alkisk.github.io/;;https://mzampet.com/", "dblp": "269/9425;;", "google_scholar": "NgVIFJwAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alkis_Kalavasis1;~Konstantinos_Stavropoulos1;~Manolis_Zampetakis2", "aff": "National Technical University of Athens;;Yale University", "aff_domain": "ntua.gr;;yale.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nkalavasis2022learning,\ntitle={Learning and Covering Sums of Independent Random Variables with Unbounded Support},\nauthor={Alkis Kalavasis and Konstantinos Stavropoulos and Manolis Zampetakis},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2Bus7sfjZh8}\n}", "github": "", "project": "", "reviewers": "vvMx;HJNQ;9A41", "pdf_size": 417639, "rating": "6;7;8", "confidence": "2;4;2", "soundness": "3;3;4", "novelty": "2;4;4", "presentation": "3;3;4", "contribution": "2;4;4", "wc_summary": "146;247;170", "wc_strengths_and_weaknesses": "169;243;110", "wc_questions": "54;11;111", "wc_limitations": "1;8;1", "wc_review": "370;509;392", "wc_reply_reviewers": "80;24;27", "wc_reply_authors": "670;632;661", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 187.66666666666666, 43.08389748179965 ], "wc_strengths_and_weaknesses_avg": [ 174.0, 54.41200847852123 ], "wc_questions_avg": [ 58.666666666666664, 40.95797304012438 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_review_avg": [ 423.6666666666667, 61.004553564103354 ], "wc_reply_reviewers_avg": [ 43.666666666666664, 25.72072229848057 ], "wc_reply_authors_avg": [ 654.3333333333334, 16.21384867602041 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14678183440085240744&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "ntua.gr;;yale.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National Technical University of Athens;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntua.gr;https://www.yale.edu", "aff_unique_abbr": "NTUA;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Greece;United States" }, { "title": "TVLT: Textless Vision-Language Transformer", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52962", "id": "2DZ9R7GXLY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3ea3134345f2e6228a29f35b86bce24d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2DZ9R7GXLY", "openreview": "https://openreview.net/forum?id=2DZ9R7GXLY", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/52962", "video": "https://nips.cc/virtual/2022/poster/52962", "author_site": "Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal", "tldr": "vision-and-language modeling without text, by using a minimalist transformer which takes only raw visual and audio inputs", "abstract": "In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text. Our code and checkpoints are available at: https://github.com/zinengtang/TVLT", "keywords": "textless vision-and-language modeling;audiovisual;TVLT", "primary_area": "", "supplementary_material": "/attachment/77a24829a1a54698a41896a41c7b32f1067a3eae.pdf", "author": "Zineng Tang;Jaemin Cho;Yixin Nie;Mohit Bansal", "authorids": "~Zineng_Tang1;~Jaemin_Cho1;~Yixin_Nie2;~Mohit_Bansal2", "gender": "M;M;M;M", "homepage": "https://zinengtang.github.io/;https://j-min.io;https://easonnie.github.io;https://www.cs.unc.edu/~mbansal/", "dblp": "251/9569;130/8348-1;205/2725;32/5243.html", "google_scholar": "bZy4vtwAAAAJ;IbQZoHQAAAAJ;g5QpITUAAAAJ;DN8QtscAAAAJ", "orcid": ";0000-0002-1558-6169;;", "linkedin": ";;;", "or_profile": "~Zineng_Tang1;~Jaemin_Cho1;~Yixin_Nie2;~Mohit_Bansal2", "aff": "University of North Carolina, Chapel Hill;University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "unc.edu;unc.edu;cs.unc.edu;unc.edu", "position": "Undergrad student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntang2022tvlt,\ntitle={{TVLT}: Textless Vision-Language Transformer},\nauthor={Zineng Tang and Jaemin Cho and Yixin Nie and Mohit Bansal},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2DZ9R7GXLY}\n}", "github": "", "project": "", "reviewers": "WYuA;yMpw;JbWb", "pdf_size": 951196, "rating": "6;7;8", "confidence": "4;4;4", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "2;4;4", "contribution": "2;3;3", "wc_summary": "74;82;143", "wc_strengths_and_weaknesses": "80;25;173", "wc_questions": "253;127;87", "wc_limitations": "1;9;14", "wc_review": "408;243;417", "wc_reply_reviewers": "107;0;0", "wc_reply_authors": "939;223;578", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 99.66666666666667, 30.81485933045218 ], "wc_strengths_and_weaknesses_avg": [ 92.66666666666667, 61.081002684049714 ], "wc_questions_avg": [ 155.66666666666666, 70.7358152250722 ], "wc_limitations_avg": [ 8.0, 5.354126134736337 ], "wc_review_avg": [ 356.0, 79.98749902328488 ], "wc_reply_reviewers_avg": [ 35.666666666666664, 50.440283724640395 ], "wc_reply_authors_avg": [ 580.0, 292.3091970271662 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10536590157108404477&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "unc.edu;unc.edu;cs.unc.edu;unc.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "2EBn01PJh17", "title": "Adaptive Cholesky Gaussian Processes", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a method to fit exact Gaussian process models to large datasets by considering only a subset of the data. Our approach is novel in that the size of the subset is selected on the fly during exact inference with little computational overhead. From an empirical observation that the log-marginal likelihood often exhibits a linear trend once a sufficient subset of a dataset has been observed, we conclude that many large datasets contain redundant information that only slightly affects the posterior. Based on this, we provide probabilistic bounds on the full model evidence that can identify such subsets. Remarkably, these bounds are largely composed of terms that appear in intermediate steps of the standard Cholesky decomposition, allowing us to modify the algorithm to adaptively stop the decomposition once enough data have been observed. Empirically, we show that our method can be directly plugged into well-known inference schemes to fit exact Gaussian process models to large datasets. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/8dd683484e6cee4185d05db0ed20e7a037da68dd.zip", "author": "Simon Bartels;Kristoffer Stensbo-Smidt;Pablo Moreno-Mu\u00f1oz;Wouter Boomsma;Jes Frellsen;S\u00f8ren Hauberg", "authorids": "~Simon_Bartels1;~Kristoffer_Stensbo-Smidt1;~Pablo_Moreno-Mu\u00f1oz1;~Wouter_Boomsma1;~Jes_Frellsen1;~S\u00f8ren_Hauberg1", "gender": ";M;M;M;M;M", "homepage": ";;https://pmorenoz.github.io/;;https://frellsen.org;http://www2.compute.dtu.dk/~sohau/", "dblp": "180/5936.html;139/4212;220/5334;06/5945;83/8247;39/7226", "google_scholar": ";https://scholar.google.co.uk/citations?user=urhY48QAAAAJ;8vL8iawAAAAJ;EwqU_jsAAAAJ;Yj2sBWkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-2855-3817;0000-0002-7249-2986;0000-0002-8257-3827;0000-0001-9224-1271;", "linkedin": ";;;;frellsen/;", "or_profile": "~Simon_Bartels1;~Kristoffer_Stensbo-Smidt1;~Pablo_Moreno-Mu\u00f1oz1;~Wouter_Boomsma1;~Jes_Frellsen1;~S\u00f8ren_Hauberg1", "aff": "Copenhagen University;Technical University of Denmark;Technical University of Denmark;University of Copenhagen;Technical University of Denmark;Technical University of Denmark", "aff_domain": "ku.dk;dtu.dk;dtu.dk;ku.dk;dtu.dk;dtu.dk", "position": "Postdoc;Postdoc;Postdoc;Full Professor;Associate Professor;Professor", "bibtex": "@misc{\nbartels2022adaptive,\ntitle={Adaptive Cholesky Gaussian Processes},\nauthor={Simon Bartels and Kristoffer Stensbo-Smidt and Pablo Moreno-Mu{\\~n}oz and Wouter Boomsma and Jes Frellsen and S{\\o}ren Hauberg},\nyear={2022},\nurl={https://openreview.net/forum?id=2EBn01PJh17}\n}", "github": "", "project": "", "reviewers": "shrn;rfLj;xgeQ", "site": "https://openreview.net/forum?id=2EBn01PJh17", "pdf_size": 988230, "rating": "3;4;6", "confidence": "4;3;3", "soundness": "2;2;2", "novelty": "2;1;3", "presentation": "2;2;4", "contribution": "2;1;3", "wc_summary": "79;151;255", "wc_strengths_and_weaknesses": "739;243;253", "wc_questions": "24;5;33", "wc_limitations": "1;1;93", "wc_review": "843;400;634", "wc_reply_reviewers": "332;0;0", "wc_reply_authors": "1419;386;620", "reply_reviewers": "2;0;0", "reply_authors": "3;1;1", "rating_avg": [ 4.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 161.66666666666666, 72.24649164876836 ], "wc_strengths_and_weaknesses_avg": [ 411.6666666666667, 231.49562030894285 ], "wc_questions_avg": [ 20.666666666666668, 11.671427600007732 ], "wc_limitations_avg": [ 31.666666666666668, 43.36921591277491 ], "wc_review_avg": [ 625.6666666666666, 180.9499623898521 ], "wc_reply_reviewers_avg": [ 110.66666666666667, 156.5063009026225 ], "wc_reply_authors_avg": [ 808.3333333333334, 442.247542547032 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=361830342362998686&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "University of Copenhagen;Technical University of Denmark", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.tek.dk", "aff_unique_abbr": "UCPH;DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Denmark" }, { "id": "2EQzEE5seF", "title": "Adversarially Perturbed Batch Normalization: A Simple Way to Improve Image Recognition", "track": "main", "status": "Reject", "tldr": "This work proposes an adversarial training method resisting adversarially perturbed statistics of Batch Normalization, to improve recognition on benign images.", "abstract": "Recently, it has been shown that adversarial training (AT) by injecting adversarial samples can improve the quality of recognition. However, the existing AT methods suffer from the performance degradation on the benign samples, leading to a gap between robustness and generalization. We argue that this gap is caused by the inaccurate estimation of the Batch Normalization (BN) layer, due to the distributional discrepancy between the training and test set. To bridge this gap, this paper identifies the adversarial robustness against the indispensable noise in BN statistics. In particular, we proposed a novel strategy that adversarially perturbs the BN layer, termed ARAPT. The ARAPT leverages the gradients to shift BN statistics and helps models resist the shifted statistics to enhance robustness to noise. Then, we introduce ARAPT into a new paradigm of AT called model-based AT, which strengthens models' tolerance to noise in BN. Experiments indicate that the APART can improve model generalization, leading to significant improvements in accuracy on benchmarks like CIFAR-10, CIFAR-100, Tiny-ImageNet, and ImageNet.", "keywords": "Adversarial Training;Image Recognition;Batch Normalization;Robustness;Generalization", "primary_area": "", "supplementary_material": "/attachment/fb03eb69d0af2dc1fb08ce2033056f6ac7653019.pdf", "author": "You Huang;Hong Liu;Xiaoshuai Sun;Xiaopeng Hong;Xianming Lin;YONGJIAN WU;Rongrong Ji", "authorids": "~You_Huang1;~Hong_Liu9;~Xiaoshuai_Sun3;~Xiaopeng_Hong4;~Xianming_Lin1;~YONGJIAN_WU2;~Rongrong_Ji5", "gender": "M;Non-Binary;M;M;M;;M", "homepage": ";https://lynnhongliu.github.io/hliu/;https://sites.google.com/view/xssun;https://hongxiaopeng.com/;;https://open.youtu.qq.com/;http://mac.xmu.edu.cn/rrji-en.html", "dblp": "214/9824;29/5010-9;26/5787.html;06/592.html;146/4014.html;;86/5681", "google_scholar": "WYmFVEMAAAAJ;BC7N2dYAAAAJ;KPMK3B4AAAAJ;x3X-qysAAAAJ;;;", "orcid": ";0000-0001-5318-6388;0000-0003-3912-9306;0000-0002-0611-0636;0000-0003-4739-8936;;", "linkedin": "you-huang-5075251b6/;;;xiaopeng-hong-8b4a9a7a/;;;", "or_profile": "~You_Huang1;~Hong_Liu9;~Xiaoshuai_Sun3;~Xiaopeng_Hong4;~Xianming_Lin1;~YONGJIAN_WU2;~Rongrong_Ji5", "aff": "Xiamen University;National Institute of Informatics;Xiamen University;Harbin Institute of Technology;Xiamen University;;Xiamen University", "aff_domain": "xmu.edu.cn;nii.ac.jp;xmu.edu.cn;hit.edu.cn;xmu.edu.cn;;xmu.edu.cn", "position": "PhD student;Postdoc;Associate Professor;Full Professor;Assistant Professor;;Full Professor", "bibtex": "@misc{\nhuang2022adversarially,\ntitle={Adversarially Perturbed Batch Normalization: A Simple Way to Improve Image Recognition},\nauthor={You Huang and Hong Liu and Xiaoshuai Sun and Xiaopeng Hong and Xianming Lin and YONGJIAN WU and Rongrong Ji},\nyear={2022},\nurl={https://openreview.net/forum?id=2EQzEE5seF}\n}", "github": "", "project": "", "reviewers": "STBH;3xFg;sCRg;fuBX", "site": "https://openreview.net/forum?id=2EQzEE5seF", "pdf_size": 525658, "rating": "3;4;4;5", "confidence": "4;4;3;4", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "contribution": "2;2;3;3", "wc_summary": "18;36;110;154", "wc_strengths_and_weaknesses": "75;134;232;331", "wc_questions": "3;33;56;5", "wc_limitations": "3;23;44;57", "wc_review": "99;226;442;547", "wc_reply_reviewers": "0;59;0;0", "wc_reply_authors": "746;533;880;406", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;2;1", "rating_avg": [ 4.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 55.12485827646181 ], "wc_strengths_and_weaknesses_avg": [ 193.0, 97.42946166329772 ], "wc_questions_avg": [ 24.25, 21.833174299675253 ], "wc_limitations_avg": [ 31.75, 20.559365262575593 ], "wc_review_avg": [ 328.5, 175.9268313816855 ], "wc_reply_reviewers_avg": [ 14.75, 25.54774941164094 ], "wc_reply_authors_avg": [ 641.25, 183.73537356753053 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u606yFKflNoJ:scholar.google.com/&scioq=Adversarially+Perturbed+Batch+Normalization:+A+Simple+Way+to+Improve+Image+Recognition&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Xiamen University;National Institute of Informatics;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.nii.ac.jp/;http://www.hit.edu.cn/", "aff_unique_abbr": "XMU;NII;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Losses Can Be Blessings: Routing Self-Supervised Speech Representations Towards Efficient Multilingual and Multitask Speech Processing", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53604", "id": "2EUJ4e6H4OX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/83d349b6eb8125588b5f091e2d47525c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2EUJ4e6H4OX", "openreview": "https://openreview.net/forum?id=2EUJ4e6H4OX", "poster": "/media/PosterPDFs/NeurIPS%202022/53604.png?t=1669792999.577685", "slides": "https://nips.cc/virtual/2022/poster/53604", "video": "https://nips.cc/virtual/2022/poster/53604", "author_site": "Yonggan Fu, Yang Zhang, Kaizhi Qian, Zhifan Ye, Zhongzhi Yu, Cheng-I Jeff Lai, Celine Lin", "tldr": "We propose a novel framework to finetune the connections of speech SSL models, instead of model weights, to empower efficient multilingual and multitask speech processing.", "abstract": "Self-supervised learning (SSL) for rich speech representations has achieved empirical success in low-resource Automatic Speech Recognition (ASR) and other speech processing tasks, which can mitigate the necessity of a large amount of transcribed speech and thus has driven a growing demand for on-device ASR and other speech processing. However, advanced speech SSL models have become increasingly large, which contradicts the limited on-device resources. This gap could be more severe in multilingual/multitask scenarios requiring simultaneously recognizing multiple languages or executing multiple speech processing tasks. Additionally, strongly overparameterized speech SSL models tend to suffer from overfitting when being finetuned on low-resource speech corpus. This work aims to enhance the practical usage of speech SSL models towards a win-win in both enhanced efficiency and alleviated overfitting via our proposed S$^3$-Router framework, which for the first time discovers that simply discarding no more than 10% of model weights via only finetuning model connections of speech SSL models can achieve better accuracy over standard weight finetuning on downstream speech processing tasks. More importantly, S$^3$-Router can serve as an all-in-one technique to enable (1) a new finetuning scheme, (2) an efficient multilingual/multitask solution, (3) a state-of-the-art pruning technique, and (4) a new tool to quantitatively analyze the learned speech representation. We believe S$^3$-Router has provided a new perspective for practical deployment of speech SSL models. Our codes are available at: https://github.com/GATECH-EIC/S3-Router.", "keywords": "automated speech recognition;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/5d2d72dd6dfb97596f0e6153fa26c061cb584da8.pdf", "author": "Yonggan Fu;Yang Zhang;Kaizhi Qian;Zhifan Ye;Zhongzhi Yu;Cheng-I Lai;Yingyan Lin", "authorids": "~Yonggan_Fu1;~Yang_Zhang3;~Kaizhi_Qian1;~Zhifan_Ye1;~Zhongzhi_Yu1;~Cheng-I_Lai1;~Yingyan_Lin1", "gender": "M;M;;M;M;M;F", "homepage": "https://www.yongganfu.com/;;;https://github.com/LemonAndRabbit;;http://people.csail.mit.edu/clai24/;https://eiclab.scs.gatech.edu/", "dblp": "244/8166;06/6785-1;212/6254;168/9226.html;198/8338;226/2039.html;120/6981", "google_scholar": "https://scholar.google.com/citations?hl=en;_-5PSgQAAAAJ;;zlPfnWEAAAAJ;KjvcaBQAAAAJ;mV4mRm0AAAAJ;dio8IesAAAAJ", "orcid": ";;;0000-0003-0755-8843;;;", "linkedin": "yonggan-fu-b211831b0;;;zhifan-ye/;zhongzhi-yu/;;yingyan-celine-lin-a281211a/", "or_profile": "~Yonggan_Fu1;~Yang_Zhang3;~Kaizhi_Qian1;~Zhifan_Ye1;~Zhongzhi_Yu1;~Cheng-I_Lai1;~Yingyan_Lin1", "aff": "Rice University;International Business Machines;International Business Machines;University of Science and Technology of China;Rice University;Massachusetts Institute of Technology;Rice University", "aff_domain": "rice.edu;ibm.com;ibm.com;ustc.edu.cn;rice.edu;mit.edu;rice.edu", "position": "PhD student;Research Staff Employee;Researcher;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfu2022losses,\ntitle={Losses Can Be Blessings: Routing Self-Supervised Speech Representations Towards Efficient Multilingual and Multitask Speech Processing},\nauthor={Yonggan Fu and Yang Zhang and Kaizhi Qian and Zhifan Ye and Zhongzhi Yu and Cheng-I Lai and Yingyan Lin},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2EUJ4e6H4OX}\n}", "github": "", "project": "", "reviewers": "13fn;2h6A;nV4F", "pdf_size": 487494, "rating": "5;7;7", "confidence": "4;5;4", "soundness": "1;4;2", "novelty": "2;3;3", "presentation": "3;2;3", "contribution": "2;3;3", "wc_summary": "56;143;97", "wc_strengths_and_weaknesses": "512;113;234", "wc_questions": "104;34;246", "wc_limitations": "79;5;30", "wc_review": "751;295;607", "wc_reply_reviewers": "1372;95;224", "wc_reply_authors": "3539;637;1314", "reply_reviewers": "3;1;1", "reply_authors": "7;2;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 1.247219128924647 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 98.66666666666667, 35.537148012873644 ], "wc_strengths_and_weaknesses_avg": [ 286.3333333333333, 167.04157832374815 ], "wc_questions_avg": [ 128.0, 88.19674975114823 ], "wc_limitations_avg": [ 38.0, 30.735430152621365 ], "wc_review_avg": [ 551.0, 190.3260360539251 ], "wc_reply_reviewers_avg": [ 563.6666666666666, 573.9990321323625 ], "wc_reply_authors_avg": [ 1830.0, 1239.6486061246012 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 2.160246899469287 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15574684827691207630&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": "rice.edu;ibm.com;ibm.com;ustc.edu.cn;rice.edu;mit.edu;rice.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;0;3;0", "aff_unique_norm": "Rice University;International Business Machines Corporation;University of Science and Technology of China;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.rice.edu;https://www.ibm.com;http://www.ustc.edu.cn;https://web.mit.edu", "aff_unique_abbr": "Rice;IBM;USTC;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "2EufPS5ABlJ", "title": "Spherical Sliced-Wasserstein", "track": "main", "status": "Reject", "tldr": "We propose a SW discrepancy on the sphere.", "abstract": "Many variants of the Wasserstein distance have been introduced to reduce its original computational burden. In particular the Sliced-Wasserstein distance (SW), which leverages one-dimensional projections for which a closed-form solution of the Wasserstein distance is available, has received a lot of interest. Yet, it is restricted to data living in Euclidean spaces, while the Wasserstein distance has been studied and used recently on manifolds. We focus more specifically on the sphere, for which we define a novel SW discrepancy, which we call spherical Sliced-Wasserstein, making a first step towards defining SW discrepancies on manifolds. Our construction is notably based on closed-form solutions of the Wasserstein distance on the circle, together with a new spherical Radon transform. Along with efficient algorithms and the corresponding implementations, we illustrate its properties in several machine learning use cases where spherical representations of data are at stake: density estimation on the sphere, variational inference or hyperspherical auto-encoders.", "keywords": "Optimal Transport;Sliced-Wasserstein;Sphere", "primary_area": "", "supplementary_material": "/attachment/fd62c7e8c23c93a41c0b55098653d23f02571780.zip", "author": "Cl\u00e9ment Bonet;Paul Berg;Nicolas Courty;Fran\u00e7ois Septier;Lucas Drumetz;Minh-Tan Pham", "authorids": "~Cl\u00e9ment_Bonet1;~Paul_Berg1;~Nicolas_Courty1;francois.septier@univ-ubs.fr;~Lucas_Drumetz1;minh-tan.pham@univ-ubs.fr", "gender": "M;M;M;;M;", "homepage": "https://clbonet.github.io;https://ber.gp;http://people.irisa.fr/Nicolas.Courty/;;https://www.imt-atlantique.fr/en/person/lucas-drumetz;", "dblp": "304/8220;;74/4219;;180/6266;", "google_scholar": "wjCPk5kAAAAJ;;https://scholar.google.fr/citations?user=ibEREjcAAAAJ;;abRvIGAAAAAJ;", "orcid": "0000-0002-3390-1169;0000-0002-6848-5791;0000-0003-1353-0126;;0000-0003-3362-703X;", "linkedin": "cl\u00e9ment-bonet-2840a9153;;;;lucas-drumetz-23725119a/;", "or_profile": "~Cl\u00e9ment_Bonet1;~Paul_Berg1;~Nicolas_Courty1;francois.septier@univ-ubs.fr;~Lucas_Drumetz1;minh-tan.pham@univ-ubs.fr", "aff": "Universit\u00e9 Bretagne Sud;Universit\u00e9 de Bretagne Sud;IRISA;;IMT Atlantique;", "aff_domain": "univ-ubs.fr;univ-ubs.fr;irisa.fr;;imt-atlantique.fr;", "position": "PhD student;PhD student;Full Professor;;Associate Professor;", "bibtex": "@misc{\nbonet2022spherical,\ntitle={Spherical Sliced-Wasserstein},\nauthor={Cl{\\'e}ment Bonet and Paul Berg and Nicolas Courty and Fran{\\c{c}}ois Septier and Lucas Drumetz and Minh-Tan Pham},\nyear={2022},\nurl={https://openreview.net/forum?id=2EufPS5ABlJ}\n}", "github": "", "project": "", "reviewers": "ET9y;yBhy;JCLC;q6N2", "site": "https://openreview.net/forum?id=2EufPS5ABlJ", "pdf_size": 2576824, "rating": "4;5;5;7", "confidence": "2;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "contribution": "3;3;3;3", "wc_summary": "72;97;102;93", "wc_strengths_and_weaknesses": "161;185;63;250", "wc_questions": "46;182;1;38", "wc_limitations": "1;14;3;143", "wc_review": "280;478;169;524", "wc_reply_reviewers": "56;121;56;0", "wc_reply_authors": "359;1127;325;424", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.0, 11.423659658795863 ], "wc_strengths_and_weaknesses_avg": [ 164.75, 67.16537426382735 ], "wc_questions_avg": [ 66.75, 68.67086354488343 ], "wc_limitations_avg": [ 40.25, 59.52887954598172 ], "wc_review_avg": [ 362.75, 144.62948350872307 ], "wc_reply_reviewers_avg": [ 58.25, 42.83908845902303 ], "wc_reply_authors_avg": [ 558.75, 330.0017992375193 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.20751433915982243, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11487437130402632676&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 Bretagne Sud;Universit\u00e9 de Bretagne Sud;Institut de Recherche en Informatique et Automatique;IMT Atlantique", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.univ-ubs.fr;https://www.univ-ubs.fr;https://www.irisa.fr;https://www.imt-atlantique.fr", "aff_unique_abbr": "UBS;UBS;IRISA;IMT Atlantique", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Learning Multi-resolution Functional Maps with Spectral Attention for Robust Shape Matching", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55347", "id": "2EwEWrNADpT", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/bcade016e3004543b289b33e7deb7472-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2EwEWrNADpT", "openreview": "https://openreview.net/forum?id=2EwEWrNADpT", "poster": "/media/PosterPDFs/NeurIPS%202022/3a066bda8c96b9478bb0512f0a43028c.png?t=1666727145.4437401", "slides": "https://nips.cc/virtual/2022/poster/55347", "video": "https://nips.cc/virtual/2022/poster/55347", "author_site": "Lei Li, Nicolas Donati, Maks Ovsjanikov", "tldr": "Our work introduces a novel non-rigid shape matching framework based on multi-resolution functional maps with spectral attention.", "abstract": "In this work, we present a novel non-rigid shape matching framework based on multi-resolution functional maps with spectral attention. Existing functional map learning methods all rely on the critical choice of the spectral resolution hyperparameter, which can severely affect the overall accuracy or lead to overfitting, if not chosen carefully. In this paper, we show that spectral resolution tuning can be alleviated by introducing spectral attention. Our framework is applicable in both supervised and unsupervised settings, and we show that it is possible to train the network so that it can adapt the spectral resolution, depending on the given shape input. More specifically, we propose to compute multi-resolution functional maps that characterize correspondence across a range of spectral resolutions, and introduce a spectral attention network that helps to combine this representation into a single coherent final correspondence. Our approach is not only accurate with near-isometric input, for which a high spectral resolution is typically preferred, but also robust and able to produce reasonable matching even in the presence of significant non-isometric distortion, which poses great challenges to existing methods. We demonstrate the superior performance of our approach through experiments on a suite of challenging near-isometric and non-isometric shape matching benchmarks.", "keywords": "Non-rigid shape matching;functional map;multi-resolution;spectral attention", "primary_area": "", "supplementary_material": "/attachment/6e4d4eb07a80144e8f899f4f0731a03a46d542bc.pdf", "author": "Lei Li;Nicolas Donati;Maks Ovsjanikov", "authorids": "~Lei_Li17;~Nicolas_Donati1;~Maks_Ovsjanikov1", "gender": "M;;M", "homepage": "https://craigleili.github.io;https://www.lix.polytechnique.fr/member/355/view;http://www.lix.polytechnique.fr/~maks/", "dblp": "13/7007-38;;94/5668", "google_scholar": "uzh8LlIAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-4657-4718;;0000-0002-5867-4046", "linkedin": ";;", "or_profile": "~Lei_Li17;~Nicolas_Donati1;~Maks_Ovsjanikov1", "aff": "Ecole Polytechnique;Ecole polytechnique;\u00c9cole Polytechnique", "aff_domain": "polytechnique.fr;polytechnique.edu;polytechnique.edu", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2022learning,\ntitle={Learning Multi-resolution Functional Maps with Spectral Attention for Robust Shape Matching},\nauthor={Lei Li and Nicolas Donati and Maks Ovsjanikov},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2EwEWrNADpT}\n}", "github": "", "project": "", "reviewers": "da7e;6wTY;PHU8", "pdf_size": 10589844, "rating": "6;7;7", "confidence": "4;5;4", "soundness": "3;2;4", "novelty": "2;2;3", "presentation": "3;3;3", "contribution": "2;2;3", "wc_summary": "74;136;89", "wc_strengths_and_weaknesses": "432;147;109", "wc_questions": "90;323;38", "wc_limitations": "72;37;89", "wc_review": "668;643;325", "wc_reply_reviewers": "68;163;99", "wc_reply_authors": "647;1398;674", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 99.66666666666667, 26.411277052720408 ], "wc_strengths_and_weaknesses_avg": [ 229.33333333333334, 144.14421790538654 ], "wc_questions_avg": [ 150.33333333333334, 123.92560492309713 ], "wc_limitations_avg": [ 66.0, 21.64871050817269 ], "wc_review_avg": [ 545.3333333333334, 156.13313407331435 ], "wc_reply_reviewers_avg": [ 110.0, 39.5558676641869 ], "wc_reply_authors_avg": [ 906.3333333333334, 347.8355294612032 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11801194413397973375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "polytechnique.fr;polytechnique.edu;polytechnique.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ecole Polytechnique", "aff_unique_dep": "", "aff_unique_url": "https://www.polytechnique.edu", "aff_unique_abbr": "X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "A Unified Framework for Deep Symbolic Regression", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52836", "id": "2FNnBhwJsHK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/dbca58f35bddc6e4003b2dd80e42f838-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2FNnBhwJsHK", "openreview": "https://openreview.net/forum?id=2FNnBhwJsHK", "poster": "/media/PosterPDFs/NeurIPS%202022/7c1bbdaebec5e20e91db1fe61221228f.png?t=1666403708.5706472", "slides": "https://nips.cc/virtual/2022/poster/52836", "video": "https://nips.cc/virtual/2022/poster/52836", "author_site": "Mikel Landajuela, Chak Shing Lee, Jiachen Yang, Ruben Glatt, Claudio P Santiago, Ignacio Aravena, Terrell Mundhenk, Garrett Mulcahy, Brenden K Petersen", "tldr": "We propose a strategy to integrate five disparate methods for symbolic regression into a unified framework, resulting in a new state-of-the-art on SRBench benchmarks.", "abstract": "The last few years have witnessed a surge in methods for symbolic regression, from advances in traditional evolutionary approaches to novel deep learning-based systems. Individual works typically focus on advancing the state-of-the-art for one particular class of solution strategies, and there have been few attempts to investigate the benefits of hybridizing or integrating multiple strategies. In this work, we identify five classes of symbolic regression solution strategies---recursive problem simplification, neural-guided search, large-scale pre-training, genetic programming, and linear models---and propose a strategy to hybridize them into a single modular, unified symbolic regression framework. Based on empirical evaluation using SRBench, a new community tool for benchmarking symbolic regression methods, our unified framework achieves state-of-the-art performance in its ability to (1) symbolically recover analytical expressions, (2) fit datasets with high accuracy, and (3) balance accuracy-complexity trade-offs, across 252 ground-truth and black-box benchmark problems, in both noiseless settings and across various noise levels. Finally, we provide practical use case-based guidance for constructing hybrid symbolic regression algorithms, supported by extensive, combinatorial ablation studies.", "keywords": "symbolic regression;reinforcement learning;combinatorial optimization", "primary_area": "", "supplementary_material": "/attachment/9a88ea0185abe627e9a4b25a8cbe63159c2767d4.zip", "author": "Mikel Landajuela;Chak Lee;Jiachen Yang;Ruben Glatt;Claudio P. Santiago;Ignacio Aravena;Terrell N. Mundhenk;Garrett Mulcahy;Brenden K. Petersen", "authorids": "~Mikel_Landajuela1;lee1029@llnl.gov;~Jiachen_Yang1;~Ruben_Glatt2;~Claudio_P._Santiago1;aravenasolis1@llnl.gov;~Terrell_N._Mundhenk1;~Garrett_Mulcahy1;~Brenden_K._Petersen1", "gender": "M;;;M;;;M;M;", "homepage": "https://landajuela.github.io/;;;http://www.cowhi.org;;;http://www.mundhenk.com;;", "dblp": "290/1379;;;;;;117/8211;;", "google_scholar": "Tl93fucAAAAJ;;;XVfDYnAAAAAJ;;;;;", "orcid": "0000-0002-4804-6513;;;0000-0002-4401-3810;;;;0000-0002-0602-878X;", "linkedin": "mikel-landajuela-larma-ph-d-139aa5129/;;;https://linkedin.com/in/rubenglatt;;;;;", "or_profile": "~Mikel_Landajuela1;lee1029@llnl.gov;~Jiachen_Yang1;~Ruben_Glatt2;~Claudio_P._Santiago1;aravenasolis1@llnl.gov;~Terrell_N._Mundhenk1;~Garrett_Mulcahy1;~Brenden_K._Petersen1", "aff": "Lawrence Livermore National Labs;;;Lawrence Livermore National Labs;;;Lawrence Livermore National Labs;University of Washington;", "aff_domain": "llnl.gov;;;llnl.gov;;;llnl.gov;washington.edu;", "position": "Researcher;;;Staff Researcher;;;Scientist;PhD student;", "bibtex": "@inproceedings{\nlandajuela2022a,\ntitle={A Unified Framework for Deep Symbolic Regression},\nauthor={Mikel Landajuela and Chak Lee and Jiachen Yang and Ruben Glatt and Claudio P. Santiago and Ignacio Aravena and Terrell N. Mundhenk and Garrett Mulcahy and Brenden K. Petersen},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2FNnBhwJsHK}\n}", "github": "", "project": "", "reviewers": "PqgV;CaGz;xSLH", "pdf_size": 3151418, "rating": "5;5;7", "confidence": "4;4;4", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "contribution": "2;2;3", "wc_summary": "44;53;66", "wc_strengths_and_weaknesses": "94;147;305", "wc_questions": "364;12;331", "wc_limitations": "14;10;76", "wc_review": "516;222;778", "wc_reply_reviewers": "0;0;285", "wc_reply_authors": "1295;759;2311", "reply_reviewers": "0;0;2", "reply_authors": "2;1;4", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 54.333333333333336, 9.030811456096044 ], "wc_strengths_and_weaknesses_avg": [ 182.0, 89.62514528114679 ], "wc_questions_avg": [ 235.66666666666666, 158.728979360698 ], "wc_limitations_avg": [ 33.333333333333336, 30.214051182999096 ], "wc_review_avg": [ 505.3333333333333, 227.11132854958063 ], "wc_reply_reviewers_avg": [ 95.0, 134.35028842544403 ], "wc_reply_authors_avg": [ 1455.0, 643.6230781029117 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4937854312507607850&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "llnl.gov;;;llnl.gov;;;llnl.gov;washington.edu;", "author_num": 9, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Lawrence Livermore National Laboratory;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.llnl.gov;https://www.washington.edu", "aff_unique_abbr": "LLNL;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "M$^4$I: Multi-modal Models Membership Inference", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54833", "id": "2GsQ8dyfe45", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0c79d6ed1788653643a1ac67b6ea32a7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2GsQ8dyfe45", "openreview": "https://openreview.net/forum?id=2GsQ8dyfe45", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54833", "video": "https://nips.cc/virtual/2022/poster/54833", "author_site": "Pingyi Hu, Zihan Wang, Ruoxi Sun, Hu Wang, Minhui Xue", "tldr": "", "abstract": "With the development of machine learning techniques, the attention of research has been moved from single-modal learning to multi-modal learning, as real-world data exist in the form of different modalities. However, multi-modal models often carry more information than single-modal models and they are usually applied in sensitive scenarios, such as medical report generation or disease identification. Compared with the existing membership inference against machine learning classifiers, we focus on the problem that the input and output of the multi-modal models are in different modalities, such as image captioning. This work studies the privacy leakage of multi-modal models through the lens of membership inference attack, a process of determining whether a data record involves in the model training process or not. To achieve this, we propose Multi-modal Models Membership Inference (M$^4$I) with two attack methods to infer the membership status, named metric-based (MB) M$^4$I and feature-based (FB) M$^4$I, respectively. More specifically, MB M$^4$I adopts similarity metrics while attacking to infer target data membership. FB M$^4$I uses a pre-trained shadow multi-modal feature extractor to achieve the purpose of data inference attack by comparing the similarities from extracted input and output features. Extensive experimental results show that both attack methods can achieve strong performances. Respectively, 72.5% and 94.83% of attack success rates on average can be obtained under unrestricted scenarios. Moreover, we evaluate multiple defense mechanisms against our attacks. The source code of M$^4$I attacks is publicly available at https://github.com/MultimodalMI/Multimodal-membership-inference.git.", "keywords": "Membership inference attack;Data privacy leakage;Multimodality", "primary_area": "", "supplementary_material": "/attachment/704293e54eccd75bab2b02998b8b8e2a8f191c38.pdf", "author": "Pingyi Hu;Zihan Wang;Ruoxi Sun;Hu Wang;Minhui Xue", "authorids": "~Pingyi_Hu1;~Zihan_Wang6;~Ruoxi_Sun3;~Hu_Wang1;~Minhui_Xue2", "gender": "M;M;M;M;", "homepage": "https://github.com/Issachu;https://www.zihan.com.au;;https://huwang01.github.io/;", "dblp": ";;72/7683;62/2712-5.html;", "google_scholar": ";;https://scholar.google.com.au/citations?user=Ei4jdwQAAAAJ;https://scholar.google.com.au/citations?user=K_6dgCgAAAAJ;", "orcid": ";;0000-0001-5404-8550;0000-0003-1725-873X;", "linkedin": ";;;;", "or_profile": "~Pingyi_Hu1;~Zihan_Wang6;~Ruoxi_Sun3;~Hu_Wang1;~Minhui_Xue2", "aff": "University of Adelaide;University of Adelaide;University of Adelaide;The University of Adelaide;", "aff_domain": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;", "position": "MS student;Undergrad student;PhD student;Researcher;", "bibtex": "@inproceedings{\nhu2022mi,\ntitle={M\\${\\textasciicircum}4\\$I: Multi-modal Models Membership Inference},\nauthor={Pingyi Hu and Zihan Wang and Ruoxi Sun and Hu Wang and Minhui Xue},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2GsQ8dyfe45}\n}", "github": "", "project": "", "reviewers": "7PRw;jdYs;BXtU", "pdf_size": 4114327, "rating": "4;6;9", "confidence": "4;3;4", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;3;3", "contribution": "2;3;4", "wc_summary": "67;56;86", "wc_strengths_and_weaknesses": "251;61;310", "wc_questions": "43;69;11", "wc_limitations": "1;81;4", "wc_review": "362;267;411", "wc_reply_reviewers": "0;38;0", "wc_reply_authors": "753;721;53", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 69.66666666666667, 12.39175353029407 ], "wc_strengths_and_weaknesses_avg": [ 207.33333333333334, 106.23977074941799 ], "wc_questions_avg": [ 41.0, 23.72059583287626 ], "wc_limitations_avg": [ 28.666666666666668, 37.025516726831626 ], "wc_review_avg": [ 346.6666666666667, 59.7792234438986 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 17.913371790059205 ], "wc_reply_authors_avg": [ 509.0, 322.70523185511985 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1147078669352809, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16858415793589123060&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Adelaide", "aff_unique_dep": "", "aff_unique_url": "https://www.adelaide.edu.au", "aff_unique_abbr": "Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Time-Conditioned Dances with Simplicial Complexes: Zigzag Filtration Curve based Supra-Hodge Convolution Networks for Time-series Forecasting", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53664", "id": "2Ln-TWxVtf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3a899fa79bc4110bca1eaa6649e9a8fa-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2Ln-TWxVtf", "openreview": "https://openreview.net/forum?id=2Ln-TWxVtf", "poster": "/media/PosterPDFs/NeurIPS%202022/53664.png?t=1669590076.7420683", "slides": "https://nips.cc/virtual/2022/poster/53664", "video": "https://nips.cc/virtual/2022/poster/53664", "author_site": "Yuzhou Chen, Yulia Gel, H. Vincent Poor", "tldr": " We propose a novel methodology to time series forecasting which harnesses the strengths of the two emerging concepts: simplicial neural networks and time-conditioned topological knowledge representation in a form of zigzag persistence.", "abstract": "Graph neural networks (GNNs) offer a new powerful alternative for multivariate time series forecasting, demonstrating remarkable success in a variety of spatio-temporal applications, from urban flow monitoring systems to health care informatics to financial analytics. Yet, such GNN models pre-dominantly capture only lower order interactions, that is, pairwise relations among nodes, and also largely ignore intrinsic time-conditioned information on the underlying topology of multivariate time series. To address these limitations, we propose a new time-aware GNN architecture which amplifies the power of the recently emerged simplicial neural networks with a time-conditioned topological knowledge representation in a form of zigzag persistence. That is, our new approach, Zigzag Filtration Curve based Supra-Hodge Convolution Networks (ZFC-SHCN) is built upon the two main components: (i) a new highly computationally efficient\nzigzag persistence curve which allows us to systematically encode time-conditioned topological information, and (ii) a new temporal multiplex graph representation module for learning higher-order network interactions. We discuss theoretical properties of the proposed time-conditioned topological knowledge representation and extensively validate the new time-aware ZFC-SHCN model \nin conjunction with time series forecasting on a broad range of synthetic and real-world datasets: traffic flows, COVID-19 biosurveillance, Ethereum blockchain, surface air temperature, wind energy, and vector autoregressions. Our experiments demonstrate that the ZFC-SHCN achieves the state-of-the-art performance with lower requirements on computational costs.", "keywords": "Multivariate time series;Graph neural networks;Zigzag persistent homology", "primary_area": "", "supplementary_material": "/attachment/5bbf81a5b0a445c727b4b4b999b88838a48a18a1.pdf", "author": "Yuzhou Chen;Yulia Gel;H. Vincent Poor", "authorids": "~Yuzhou_Chen1;~Yulia_Gel1;~H._Vincent_Poor1", "gender": ";;M", "homepage": ";;http://ee.princeton.edu/people/faculty/h-vincent-poor", "dblp": ";;p/HVincentPoor", "google_scholar": ";;Dq93mOUAAAAJ", "orcid": ";;", "linkedin": ";;vince-poor-974a3/", "or_profile": "~Yuzhou_Chen1;~Yulia_Gel1;~H._Vincent_Poor1", "aff": ";;Princeton University", "aff_domain": ";;princeton.edu", "position": ";;Full Professor", "bibtex": "@inproceedings{\nchen2022timeconditioned,\ntitle={Time-Conditioned Dances with Simplicial Complexes: Zigzag Filtration Curve based Supra-Hodge Convolution Networks for Time-series Forecasting},\nauthor={Yuzhou Chen and Yulia Gel and H. Vincent Poor},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2Ln-TWxVtf}\n}", "github": "", "project": "", "reviewers": "vJan;isoH;m8mW;u6Ln", "pdf_size": 373274, "rating": "5;6;6;7", "confidence": "3;3;4;3", "soundness": "2;3;4;3", "novelty": "3;3;2;2", "presentation": "2;3;4;3", "contribution": "3;3;2;2", "wc_summary": "54;94;61;34", "wc_strengths_and_weaknesses": "290;174;114;32", "wc_questions": "3;196;3;14", "wc_limitations": "9;19;1;2", "wc_review": "356;483;179;82", "wc_reply_reviewers": "18;166;32;0", "wc_reply_authors": "1889;2121;480;660", "reply_reviewers": "1;2;1;0", "reply_authors": "6;6;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.75, 21.602951187279945 ], "wc_strengths_and_weaknesses_avg": [ 152.5, 94.03589740093939 ], "wc_questions_avg": [ 54.0, 82.10663797769338 ], "wc_limitations_avg": [ 7.75, 7.189401922274203 ], "wc_review_avg": [ 275.0, 155.15315014526774 ], "wc_reply_reviewers_avg": [ 54.0, 65.6505902486794 ], "wc_reply_authors_avg": [ 1287.5, 724.9718960070107 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.0, 2.0 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15791139031849989092&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";;princeton.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "OpenSRH: optimizing brain tumor surgery using intraoperative stimulated Raman histology", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55676", "id": "2N8JzuiWZ25", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b6b5f50a2001ad1cbccca96e693c4ab4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=2N8JzuiWZ25", "openreview": "https://openreview.net/forum?id=2N8JzuiWZ25", "poster": "/media/PosterPDFs/NeurIPS%202022/55676.png?t=1669429102.5912433", "slides": "https://nips.cc/virtual/2022/poster/55676", "video": "https://nips.cc/virtual/2022/poster/55676", "author_site": "Cheng Jiang, Asadur Chowdury, Xinhai Hou, Akhil Kondepudi, Christian Freudiger, Kyle Conway, Sandra Camelo-Piragua, Daniel Orringer, Honglak Lee, Todd Hollon", "tldr": "OpenSRH is the first ever publicly available stimulated Raman histology (SRH) dataset and benchmark, which will facilitate the clinical translation of rapid optical imaging and real-time ML-based surgical decision support.", "abstract": "Accurate intraoperative diagnosis is essential for providing safe and effective care during brain tumor surgery. Our standard-of-care diagnostic methods are time, resource, and labor intensive, which restricts access to optimal surgical treatments. To address these limitations, we propose an alternative workflow that combines stimulated Raman histology (SRH), a rapid optical imaging method, with deep learning-based automated interpretation of SRH images for intraoperative brain tumor diagnosis and real-time surgical decision support. Here, we present OpenSRH, the first public dataset of clinical SRH images from 300+ brain tumors patients and 1300+ unique whole slide optical images. OpenSRH contains data from the most common brain tumors diagnoses, full pathologic annotations, whole slide tumor segmentations, raw and processed optical imaging data for end-to-end model development and validation. We provide a framework for patch-based whole slide SRH classification and inference using weak (i.e. patient-level) diagnostic labels. Finally, we benchmark two computer vision tasks: multi-class histologic brain tumor classification and patch-based contrastive representation learning. We hope OpenSRH will facilitate the clinical translation of rapid optical imaging and real-time ML-based surgical decision support in order to improve the access, safety, and efficacy of cancer surgery in the era of precision medicine.", "keywords": "Stimulated Raman Histology;Computer Vision;Convolutional Neural Network;Vision Transformer;Contrastive Learning;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/bf3728a54625a1443b333813dd48a564053e8d3d.pdf", "author": "Cheng Jiang;Asadur Zaman Chowdury;Xinhai Hou;Akhil Kondepudi;Christian Freudiger;Kyle Stephen Conway;Sandra Camelo-Piragua;Daniel A Orringer;Honglak Lee;Todd Hollon", "authorids": "~Cheng_Jiang2;~Asadur_Zaman_Chowdury1;~Xinhai_Hou1;~Akhil_Kondepudi1;~Christian_Freudiger1;~Kyle_Stephen_Conway1;~Sandra_Camelo-Piragua1;daniel.orringer@nyulangone.org;~Honglak_Lee2;~Todd_Hollon1", "gender": "M;M;M;Not Specified;M;M;F;;;", "homepage": "https://chengjia.me;;https://renlyh.github.io;;https://www.invenio-imaging.com/;https://www.pathology.med.umich.edu/faculty/kyconway;https://www.pathology.med.umich.edu/faculty/sandraca;;;", "dblp": "15/11195-3;;322/4105;;;;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;TabSRQ8AAAAJ;;;;;;", "orcid": ";0000-0002-5653-9721;;;;;;;;", "linkedin": ";asadurchowdury/;;;;;;;;", "or_profile": "~Cheng_Jiang2;~Asadur_Zaman_Chowdury1;~Xinhai_Hou1;~Akhil_Kondepudi1;~Christian_Freudiger1;~Kyle_Stephen_Conway1;~Sandra_Camelo-Piragua1;daniel.orringer@nyulangone.org;~Honglak_Lee2;~Todd_Hollon1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;;;", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;;umich.edu;umich.edu;;;", "position": "PhD student;Machine Learning Engineer;PhD student;Undergrad student;;Assistant Professor;Associate Professor;;;", "bibtex": "@inproceedings{\njiang2022opensrh,\ntitle={Open{SRH}: optimizing brain tumor surgery using intraoperative stimulated Raman histology},\nauthor={Cheng Jiang and Asadur Zaman Chowdury and Xinhai Hou and Akhil Kondepudi and Christian Freudiger and Kyle Stephen Conway and Sandra Camelo-Piragua and Daniel A Orringer and Honglak Lee and Todd Hollon},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=2N8JzuiWZ25}\n}", "github": "", "project": "", "reviewers": "YwAc;cLcj;8qJ4;xEv2;ZJAZ;6fG8", "pdf_size": 6742054, "rating": "3;5;6;8;9;9", "confidence": "4;3;4;4;4;4", "wc_summary_and_contributions": "42;136;62;104;95;170", "wc_strengths": "35;38;55;105;55;134", "wc_weaknesses": "226;16;101;126;38;98", "wc_correctness": "24;31;10;11;10;45", "wc_clarity": "1;38;86;75;9;265", "wc_relation_to_prior_work": "1;15;45;55;35;29", "wc_documentation": "28;31;27;129;19;60", "wc_additional_feedback": "1;1;1;310;56;258", "wc_review": "358;306;387;915;317;1059", "wc_reply_reviewers": "0;0;0;13;0;0", "wc_reply_authors": "520;229;447;916;221;1458", "reply_reviewers": "0;0;0;1;0;0", "reply_authors": "1;1;1;2;1;3", "rating_avg": [ 6.666666666666667, 2.211083193570267 ], "confidence_avg": [ 3.8333333333333335, 0.3726779962499649 ], "wc_summary_and_contributions_avg": [ 101.5, 42.87870489337724 ], "wc_strengths_avg": [ 70.33333333333333, 36.558932636984295 ], "wc_weaknesses_avg": [ 100.83333333333333, 67.64223696938343 ], "wc_correctness_avg": [ 21.833333333333332, 13.056501147789257 ], "wc_clarity_avg": [ 79.0, 88.81253665258451 ], "wc_relation_to_prior_work_avg": [ 30.0, 17.99073835801818 ], "wc_documentation_avg": [ 49.0, 38.01315561749642 ], "wc_additional_feedback_avg": [ 104.5, 129.28102464527936 ], "wc_review_avg": [ 557.0, 308.0232675194089 ], "wc_reply_reviewers_avg": [ 2.1666666666666665, 4.844813951249545 ], "wc_reply_authors_avg": [ 631.8333333333334, 436.086924311605 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.337099931231621, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4452506073771241817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "umich.edu;umich.edu;umich.edu;umich.edu;;umich.edu;umich.edu;;;", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Maximum a posteriori natural scene reconstruction from retinal ganglion cells with deep denoiser priors", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53894", "id": "2NcrByUfu9", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ae447e9dbfdd1189966e894b85bea062-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2NcrByUfu9", "openreview": "https://openreview.net/forum?id=2NcrByUfu9", "poster": "/media/PosterPDFs/NeurIPS%202022/53894.png?t=1669245957.6800382", "slides": "https://nips.cc/virtual/2022/poster/53894", "video": "https://nips.cc/virtual/2022/poster/53894", "author_site": "Eric Wu, Nora Brackbill, Alexander Sher, Alan Litke, Eero Simoncelli, E.J. Chichilnisky", "tldr": "We develop a method for approximate MAP reconstruction natural images from large populations of experimentally recorded retinal ganglion cells, and show that the method is comparable to or better than current ad hoc reconstruction methods.", "abstract": "Visual information arriving at the retina is transmitted to the brain by signals in the optic nerve, and the brain must rely solely on these signals to make inferences about the visual world. Previous work has probed the content of these signals by directly reconstructing images from retinal activity using linear regression or nonlinear regression with neural networks. Maximum a posteriori (MAP) reconstruction using retinal encoding models and separately-trained natural image priors offers a more general and principled approach. We develop a novel method for approximate MAP reconstruction that combines a generalized linear model for retinal responses to light, including their dependence on spike history and spikes of neighboring cells, with the image prior implicitly embedded in a deep convolutional neural network trained for image denoising. We use this method to reconstruct natural images from ex vivo simultaneously-recorded spikes of hundreds of retinal ganglion cells uniformly sampling a region of the retina. The method produces reconstructions that match or exceed the state-of-the-art in perceptual similarity and exhibit additional fine detail, while using substantially fewer model parameters than previous approaches. The use of more rudimentary encoding models (a linear-nonlinear-Poisson cascade) or image priors (a 1/f spectral model) significantly reduces reconstruction performance, indicating the essential role of both components in achieving high-quality reconstructed images from the retinal signal.", "keywords": "retina;ganglion cell;natural scenes;image reconstruction;image prior;Plug and Play;encoding model;neural coding;neuroscience;neural decoding", "primary_area": "", "supplementary_material": "/attachment/0f6630d8dc18d2aa36a86e859be3ae7b6914d3f9.zip", "author": "Eric Gene Wu;Nora Brackbill;Alexander Sher;Alan Litke;Eero P Simoncelli;EJ Chichilnisky", "authorids": "~Eric_Gene_Wu1;~Nora_Brackbill1;~Alexander_Sher1;~Alan_Litke1;~Eero_P_Simoncelli1;~EJ_Chichilnisky1", "gender": "M;;;;M;M", "homepage": ";;;;https://profiles.stanford.edu/chichilnisky;https://www.cns.nyu.edu/~eero/", "dblp": ";;;;;30/5604", "google_scholar": "FVJEt_AAAAAJ;;;;;MplR7_cAAAAJ", "orcid": "0000-0001-8315-3288;0000-0002-0308-1382;0000-0001-6655-6456;;;0000-0002-1206-527X", "linkedin": ";;;;;eero-simoncelli-445782123", "or_profile": "~Eric_Gene_Wu1;~Nora_Brackbill1;~Alexander_Sher1;~Alan_Litke1;~EJ_Chichilnisky1;~Eero_Peter_Simoncelli1", "aff": "Stanford University;Stanford University;University of California, Santa Cruz;;Stanford University;New York University", "aff_domain": "stanford.edu;stanford.edu;ucsc.edu;;stanford.edu;nyu.edu", "position": "PhD student;PhD student;Full Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2022maximum,\ntitle={Maximum a posteriori natural scene reconstruction from retinal ganglion cells with deep denoiser priors},\nauthor={Eric Gene Wu and Nora Brackbill and Alexander Sher and Alan Litke and Eero P Simoncelli and EJ Chichilnisky},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2NcrByUfu9}\n}", "github": "", "project": "", "reviewers": "wKPR;mbYL;Etny", "pdf_size": 5536282, "rating": "5;6;8", "confidence": "3;3;5", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;4;4", "contribution": "3;3;3", "wc_summary": "69;122;94", "wc_strengths_and_weaknesses": "293;282;86", "wc_questions": "8;96;126", "wc_limitations": "90;34;82", "wc_review": "460;534;388", "wc_reply_reviewers": "0;26;0", "wc_reply_authors": "513;767;461", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.0, 21.64871050817269 ], "wc_strengths_and_weaknesses_avg": [ 220.33333333333334, 95.09410543713469 ], "wc_questions_avg": [ 76.66666666666667, 50.075498555237125 ], "wc_limitations_avg": [ 68.66666666666667, 24.729649321321876 ], "wc_review_avg": [ 460.6666666666667, 59.60611452601896 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 12.256517540566824 ], "wc_reply_authors_avg": [ 580.3333333333334, 133.6895242301688 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15813215597995156228&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 10, "email": "stanford.edu;stanford.edu;ucsc.edu;;stanford.edu;nyu.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Stanford University;University of California, Santa Cruz;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.ucsc.edu;https://www.nyu.edu", "aff_unique_abbr": "Stanford;UCSC;NYU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Stanford;Santa Cruz;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ResT V2: Simpler, Faster and Stronger", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55300", "id": "2OdAggzzF3z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ec3acc7700fc5be9a8e257b38f870855-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2OdAggzzF3z", "openreview": "https://openreview.net/forum?id=2OdAggzzF3z", "poster": "/media/PosterPDFs/NeurIPS%202022/210f760a89db30aa72ca258a3483cc7f.png?t=1665917938.1459868", "slides": "https://nips.cc/virtual/2022/poster/55300", "video": "https://nips.cc/virtual/2022/poster/55300", "author_site": "Qinglong Zhang, Yu-Bin Yang", "tldr": "ResTv2, a simpler, faster, and stronger multi-scale vision Transformer for visual recognition", "abstract": "This paper proposes ResTv2, a simpler, faster, and stronger multi-scale vision Transformer for visual recognition. ResTv2 simplifies the EMSA structure in ResTv1 (i.e., eliminating the multi-head interaction part) and employs an upsample operation to reconstruct the lost medium- and high-frequency information caused by the downsampling operation. In addition, we explore different techniques for better applying ResTv2 backbones to downstream tasks. We find that although combining EMSAv2 and window attention can greatly reduce the theoretical matrix multiply FLOPs, it may significantly decrease the computation density, thus causing lower actual speed. We comprehensively validate ResTv2 on ImageNet classification, COCO detection, and ADE20K semantic segmentation. Experimental results show that the proposed ResTv2 can outperform the recently state-of-the-art backbones by a large margin, demonstrating the potential of ResTv2 as solid backbones. The code and models will be made publicly available at \\url{https://github.com/wofmanaf/ResT}.", "keywords": "multi-scale vision Transformer;downsampling;upsampling;computation density", "primary_area": "", "supplementary_material": "/attachment/6c9d75fbb10d00bf36145a788e69a99b3dc3f035.zip", "author": "Qinglong Zhang;Yu-Bin Yang", "authorids": "~Qinglong_Zhang1;~Yu-Bin_Yang3", "gender": "M;M", "homepage": ";https://cs.nju.edu.cn/yangyubin/", "dblp": "165/0559;", "google_scholar": "LYR7l98AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Qinglong_Zhang1;~Yu-Bin_Yang3", "aff": "Nanjing University;Nanjing University, China", "aff_domain": "nju.edu.cn;nju.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2022rest,\ntitle={ResT V2: Simpler, Faster and Stronger},\nauthor={Qinglong Zhang and Yu-Bin Yang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2OdAggzzF3z}\n}", "github": "", "project": "", "reviewers": "cW5j;21zc;TbMq", "pdf_size": 800210, "rating": "5;6;6", "confidence": "5;5;5", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "3;3;3", "contribution": "3;2;2", "wc_summary": "69;59;108", "wc_strengths_and_weaknesses": "112;154;262", "wc_questions": "120;14;106", "wc_limitations": "19;33;15", "wc_review": "320;260;491", "wc_reply_reviewers": "119;0;88", "wc_reply_authors": "1330;721;1818", "reply_reviewers": "2;0;1", "reply_authors": "5;2;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 78.66666666666667, 21.139746660943903 ], "wc_strengths_and_weaknesses_avg": [ 176.0, 63.182275995725256 ], "wc_questions_avg": [ 80.0, 47.01772715334788 ], "wc_limitations_avg": [ 22.333333333333332, 7.71722460186015 ], "wc_review_avg": [ 357.0, 97.86725703727473 ], "wc_reply_reviewers_avg": [ 69.0, 50.40502620440412 ], "wc_reply_authors_avg": [ 1289.6666666666667, 448.75556325861356 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7008614846201767249&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nju.edu.cn;nju.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Prune and distill: similar reformatting of image information along rat visual cortex and deep neural networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53520", "id": "2OpRgzLhoPQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c2d82a425af4c18a35049899fea5ee82-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2OpRgzLhoPQ", "openreview": "https://openreview.net/forum?id=2OpRgzLhoPQ", "poster": "/media/PosterPDFs/NeurIPS%202022/53520.png?t=1668862113.8607788", "slides": "https://nips.cc/virtual/2022/poster/53520", "video": "https://nips.cc/virtual/2022/poster/53520", "author_site": "Paolo Muratore, Sina Tafazoli, Eugenio Piasini, Alessandro Laio, Davide Zoccolan", "tldr": "Deep CNNs and visual cortex share a similarly tight relationship between dimensionality expansion/reduction of object representations and reformatting of image information.", "abstract": "Visual object recognition has been extensively studied in both neuroscience and computer vision. Recently, the most popular class of artificial systems for this task, deep convolutional neural networks (CNNs), has been shown to provide excellent models for its functional analogue in the brain, the ventral stream in visual cortex. This has prompted questions on what, if any, are the common principles underlying the reformatting of visual information as it flows through a CNN or the ventral stream. Here we consider some prominent statistical patterns that are known to exist in the internal representations of either CNNs or the visual cortex and look for them in the other system. We show that intrinsic dimensionality (ID) of object representations along the rat homologue of the ventral stream presents two distinct expansion-contraction phases, as previously shown for CNNs. Conversely, in CNNs, we show that training results in both distillation and active pruning (mirroring the increase in ID) of low- to middle-level image information in single units, as representations gain the ability to support invariant discrimination, in agreement with previous observations in rat visual cortex. Taken together, our findings suggest that CNNs and visual cortex share a similarly tight relationship between dimensionality expansion/reduction of object representations and reformatting of image information.", "keywords": "convolutional neural networks;computational neuroscience;rat;visual cortex;ventral stream;intrinsic dimensionality;vision;representation analysis", "primary_area": "", "supplementary_material": "/attachment/2657077295bf4a39206a85862cea4ce47107f1c3.zip", "author": "Paolo Muratore;Sina Tafazoli;Eugenio Piasini;Alessandro Laio;Davide Zoccolan", "authorids": "~Paolo_Muratore1;tafazoli@princeton.edu;~Eugenio_Piasini1;~Alessandro_Laio1;zoccolan@sissa.it", "gender": "M;;M;M;", "homepage": ";;https://people.sissa.it/~epiasini;https://people.sissa.it/~laio/;", "dblp": "321/1726;;155/6689;;", "google_scholar": "HGEzIUcAAAAJ;;dGKi9Q4AAAAJ;https://scholar.google.it/citations?user=ma-T1oEAAAAJ;", "orcid": "0000-0003-4520-5950;;0000-0003-0384-7699;;", "linkedin": ";;;;", "or_profile": "~Paolo_Muratore1;tafazoli@princeton.edu;~Eugenio_Piasini1;~Alessandro_Laio1;zoccolan@sissa.it", "aff": "SISSA/ISAS;;International Higher School for Advanced Studies Trieste;SISSA/ISAS;", "aff_domain": "sissa.it;;sissa.it;sissa.it;", "position": "PhD student;;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nmuratore2022prune,\ntitle={Prune and distill: similar reformatting of image information along rat visual cortex and deep neural networks},\nauthor={Paolo Muratore and Sina Tafazoli and Eugenio Piasini and Alessandro Laio and Davide Zoccolan},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2OpRgzLhoPQ}\n}", "github": "", "project": "", "reviewers": "2Y7t;DRfj;sQm2;vMFS", "pdf_size": 1895071, "rating": "4;6;7;8", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "1;3;2;4", "presentation": "3;3;4;4", "contribution": "1;3;2;4", "wc_summary": "68;78;142;125", "wc_strengths_and_weaknesses": "79;200;391;409", "wc_questions": "250;252;108;49", "wc_limitations": "1;40;42;23", "wc_review": "398;570;683;606", "wc_reply_reviewers": "142;266;48;54", "wc_reply_authors": "1641;1578;2248;181", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 103.25, 31.04331651096577 ], "wc_strengths_and_weaknesses_avg": [ 269.75, 137.24316922892737 ], "wc_questions_avg": [ 164.75, 88.73943599099557 ], "wc_limitations_avg": [ 26.5, 16.469669092000604 ], "wc_review_avg": [ 564.25, 104.30334366644244 ], "wc_reply_reviewers_avg": [ 127.5, 88.19722217847907 ], "wc_reply_authors_avg": [ 1412.0, 757.3397520267902 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10150831839570295893&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 8, "email": "sissa.it;;sissa.it;sissa.it;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Scuola Internazionale Superiore di Studi Avanzati;International Higher School for Advanced Studies", "aff_unique_dep": ";", "aff_unique_url": "https://www.sissa.it;https://www.sissa.it", "aff_unique_abbr": "SISSA;SISSA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Trieste", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "A Comprehensive Study on Large-Scale Graph Training: Benchmarking and Rethinking", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55687", "id": "2QrFr_U782Z", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/23ee05bf1f4ade71c0f8f5ca722df601-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=2QrFr_U782Z", "openreview": "https://openreview.net/forum?id=2QrFr_U782Z", "poster": "/media/PosterPDFs/NeurIPS%202022/577ef1154f3240ad5b9b413aa7346a1e.png?t=1666161969.578953", "slides": "https://nips.cc/virtual/2022/poster/55687", "video": "https://nips.cc/virtual/2022/poster/55687", "author_site": "Keyu Duan, Zirui Liu, Peihao Wang, Wenqing Zheng, Kaixiong Zhou, Tianlong Chen, Xia Hu, Zhangyang Wang", "tldr": "We present a comprehensive and fair benchmark study on large-scale graph training and further propose a new layer-wise training manner the achieves new SOTA performance on large-scale graph datasets.", "abstract": "Large-scale graph training is a notoriously challenging problem for graph neural networks (GNNs). Due to the nature of evolving graph structures into the training process, vanilla GNNs usually fail to scale up, limited by the GPU memory space. Up to now, though numerous scalable GNN architectures have been proposed, we still lack a comprehensive survey and fair benchmark of this reservoir to find the rationale for designing scalable GNNs. To this end, we first systematically formulate the representative methods of large-scale graph training into several branches and further establish a fair and consistent benchmark for them by a greedy hyperparameter searching. In addition, regarding efficiency, we theoretically evaluate the time and space complexity of various branches and empirically compare them w.r.t GPU memory usage, throughput, and convergence. Furthermore, We analyze the pros and cons for various branches of scalable GNNs and then present a new ensembling training manner, named EnGCN, to address the existing issues. Remarkably, our proposed method has achieved new state-of-the-art (SOTA) performance on large-scale datasets. Our code is available at https://github.com/VITA-Group/Large_Scale_GCN_Benchmarking.", "keywords": "Graph Convolutional Networks;Scalability;Benchmark", "primary_area": "", "supplementary_material": "/attachment/af4d0df5f641abd2b403e460c7e784c6f8def6d7.pdf", "author": "Keyu Duan;Zirui Liu;Peihao Wang;Wenqing Zheng;Kaixiong Zhou;Tianlong Chen;Xia Hu;Zhangyang Wang", "authorids": "~Keyu_Duan1;~Zirui_Liu1;~Peihao_Wang1;~Wenqing_Zheng1;~Kaixiong_Zhou1;~Tianlong_Chen1;~Xia_Hu4;~Zhangyang_Wang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://kduan.live;https://zirui-ray-liu.github.io/;https://peihaowang.github.io/;https://wenqing-zheng.github.io;https://kaixiong-zhou.github.io/;https://tianlong-chen.github.io;https://vita-group.github.io;https://cs.rice.edu/~xh37/index.html", "dblp": ";196/8629-1.html;239/4075;;178/7315;;119/4026;256/9406.html", "google_scholar": "fGW4ClMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;fqf2tBsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;zMspIjIAAAAJ;LE3ctn0AAAAJ;pxFyKAIAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": "0000-0002-1902-5545;;;0000-0002-8283-7511;0000-0001-5226-8736;0000-0001-7774-8197;;", "linkedin": ";;peihao-wang-25a411162/;;;tianlong-chen-783862167/;;", "or_profile": "~Keyu_Duan1;~Zirui_Liu1;~Peihao_Wang1;~Wenqing_Zheng1;~Kaixiong_Zhou1;~Tianlong_Chen1;~Zhangyang_Wang1;~Xia_Hu2", "aff": "Rice University;Rice University;University of Texas, Austin;University of Texas, Austin;Rice University;University of Texas, Austin;University of Texas, Austin;Rice University", "aff_domain": "rice.edu;rice.edu;utexas.edu;utexas.edu;rice.edu;utexas.edu;utexas.edu;rice.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nduan2022a,\ntitle={A Comprehensive Study on Large-Scale Graph Training: Benchmarking and Rethinking},\nauthor={Keyu Duan and Zirui Liu and Peihao Wang and Wenqing Zheng and Kaixiong Zhou and Tianlong Chen and Xia Hu and Zhangyang Wang},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=2QrFr_U782Z}\n}", "github": "", "project": "", "reviewers": "oPP2;W47P;s5iV;kf8i;W3bB;vowK", "pdf_size": 1688745, "rating": "6;6;6;6;7;8", "confidence": "4;4;4;4;3;4", "wc_summary_and_contributions": "51;38;72;31;79;69", "wc_strengths": "43;71;44;125;62;32", "wc_weaknesses": "178;67;106;219;40;13", "wc_correctness": "1;1;7;29;209;9", "wc_clarity": "1;1;52;6;212;15", "wc_relation_to_prior_work": "1;1;35;30;59;14", "wc_documentation": "25;1;16;22;33;11", "wc_additional_feedback": "3;4;3;44;1;1", "wc_review": "303;184;335;506;695;164", "wc_reply_reviewers": "30;0;0;0;0;0", "wc_reply_authors": "646;492;736;339;526;49", "reply_reviewers": "1;0;0;0;0;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.5, 0.7637626158259734 ], "confidence_avg": [ 3.8333333333333335, 0.3726779962499649 ], "wc_summary_and_contributions_avg": [ 56.666666666666664, 17.9133717900592 ], "wc_strengths_avg": [ 62.833333333333336, 30.63449399324595 ], "wc_weaknesses_avg": [ 103.83333333333333, 73.54231586115726 ], "wc_correctness_avg": [ 42.666666666666664, 74.97925639061039 ], "wc_clarity_avg": [ 47.833333333333336, 75.47939380313603 ], "wc_relation_to_prior_work_avg": [ 23.333333333333332, 20.564262420249577 ], "wc_documentation_avg": [ 18.0, 10.263202878893768 ], "wc_additional_feedback_avg": [ 9.333333333333334, 15.542057635833022 ], "wc_review_avg": [ 364.5, 185.60060165132367 ], "wc_reply_reviewers_avg": [ 5.0, 11.180339887498949 ], "wc_reply_authors_avg": [ 464.6666666666667, 223.45146129653205 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1620706562706665630&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "rice.edu;rice.edu;utexas.edu;utexas.edu;rice.edu;utexas.edu;utexas.edu;rice.edu", "author_num": 8, "aff_unique_index": "0;0;1;1;0;1;1;0", "aff_unique_norm": "Rice University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.rice.edu;https://www.utexas.edu", "aff_unique_abbr": "Rice;UT Austin", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Memory safe computations with XLA compiler", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52830", "id": "2S_GtHBtTUP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/782b6152c04e9948c2cb3833e9a288ef-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2S_GtHBtTUP", "openreview": "https://openreview.net/forum?id=2S_GtHBtTUP", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/52830", "video": "https://nips.cc/virtual/2022/poster/52830", "author_site": "Artem Artemev, Yuze An, Tilman Roeder, Mark van der Wilk", "tldr": "The extension to the XLA compiler for automatic resolving memory overflows in machine learning programs. The impact of memory optimisations is demonstrated on sparse Gaussian processes.", "abstract": "Software packages like TensorFlow and PyTorch are designed to support linear algebra operations, and their speed and usability determine their success. However, by prioritising speed, they often neglect memory requirements. As a consequence, the implementations of memory-intensive algorithms that are convenient in terms of software design can often not be run for large problems due to memory overflows. Memory-efficient solutions require complex programming approaches with significant logic outside the computational framework. This impairs the adoption and use of such algorithms. To address this, we developed an XLA compiler extension that adjusts the computational data-flow representation of an algorithm according to a user-specified memory limit. We show that k-nearest neighbour, sparse Gaussian process regression methods and Transformers can be run on a single device at a much larger scale, where standard implementations would have failed. Our approach leads to better use of hardware resources. We believe that further focus on removing memory constraints at a compiler level will widen the range of machine learning methods that can be developed in the future.", "keywords": "xla;compiler;gaussian processes;sparse gaussian processes;k-nearest neighbour", "primary_area": "", "supplementary_material": "/attachment/3e0ec9b15350ad80703e0ab5475d94e364e64b4f.zip", "author": "Artem Artemev;Yuze An;Tilman Roeder;Mark van der Wilk", "authorids": "~Artem_Artemev1;yuze.an21@imperial.ac.uk;tilman.roeder17@imperial.ac.uk;~Mark_van_der_Wilk1", "gender": "M;;;M", "homepage": ";;;https://mvdw.uk", "dblp": ";;;142/2927", "google_scholar": "https://scholar.google.co.uk/citations?user=FRdoHO8AAAAJ;;;PKcjcT4AAAAJ", "orcid": ";;;0000-0001-7947-6682", "linkedin": ";;;", "or_profile": "~Artem_Artemev1;yuze.an21@imperial.ac.uk;tilman.roeder17@imperial.ac.uk;~Mark_van_der_Wilk1", "aff": "Imperial College London;;;Imperial College London", "aff_domain": "imperial.ac.uk;;;imperial.ac.uk", "position": "PhD student;;;Lecturer (Assistant Professor)", "bibtex": "@inproceedings{\nartemev2022memory,\ntitle={Memory safe computations with {XLA} compiler},\nauthor={Artem Artemev and Yuze An and Tilman Roeder and Mark van der Wilk},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2S_GtHBtTUP}\n}", "github": "", "project": "", "reviewers": "1TM1;esE8;Mdhs", "pdf_size": 372635, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "2;3;3", "contribution": "2;3;3", "wc_summary": "41;48;48", "wc_strengths_and_weaknesses": "217;81;41", "wc_questions": "103;137;51", "wc_limitations": "44;8;57", "wc_review": "405;274;197", "wc_reply_reviewers": "26;12;18", "wc_reply_authors": "1204;757;579", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 45.666666666666664, 3.299831645537222 ], "wc_strengths_and_weaknesses_avg": [ 113.0, 75.33038342306952 ], "wc_questions_avg": [ 97.0, 35.364765892999586 ], "wc_limitations_avg": [ 36.333333333333336, 20.725722075613085 ], "wc_review_avg": [ 292.0, 85.86423391998945 ], "wc_reply_reviewers_avg": [ 18.666666666666668, 5.734883511361751 ], "wc_reply_authors_avg": [ 846.6666666666666, 262.9148573630297 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18390099303465948139&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "imperial.ac.uk;;;imperial.ac.uk", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "2TdPjch_ogV", "title": "Learnable Graph Convolutional Attention Networks", "track": "main", "status": "Reject", "tldr": "We propose a GNN which learns to use, in each layer, an interpolation of a GCN, GAT, and a GAT with convolved features. It outperforms existing methods, is more robust, and removes the need of cross-validating.", "abstract": "Existing Graph Neural Networks (GNNs) compute the message exchange between nodes by either aggregating uniformly (convolving) the features of all the neighboring nodes, or by applying a non-uniform score (attending) to the features. Recent works have shown the strengths and weaknesses of the resulting GNN architectures, respectively, GCNs and GATs. In this work, we aim at exploiting the strengths of both approaches to their full extent. To that end, we first introduce a graph convolutional attention layer (CAT), which relies on convolutions to compute the attention scores. Unfortunately, as in the case of GCNs and GATs, we then show that there exists no clear winner between the three\u2014neither theoretically nor in practice\u2014since their performance directly depends on the nature of the data (i.e., of the graph and features). This result brings us to the main contribution of this work, the learnable graph convolutional attention network (L-CAT): a GNN architecture that allows us to automatically interpolate between GCN, GAT and CAT in each layer, by only introducing two additional (scalar) parameters. Our results demonstrate that L-CAT is able to efficiently combine different GNN layers across the network, outperforming competing methods in a wide range of datasets, and resulting in a more robust model that needs less cross-validation.", "keywords": "GNN;GCN;GAT", "primary_area": "", "supplementary_material": "/attachment/fd4f6d70e2869818301f74074ab517c2924403bf.zip", "author": "Adri\u00e1n Javaloy;Pablo Sanchez Martin;Amit Levi;Isabel Valera", "authorids": "~Adri\u00e1n_Javaloy1;~Pablo_Sanchez_Martin1;~Amit_Levi1;~Isabel_Valera1", "gender": "M;M;M;F", "homepage": "https://adrianjav.github.io;https://www.is.mpg.de/person/psanchez;https://sites.google.com/view/amit-levi/home;https://ivaleram.github.io/", "dblp": "259/2011;;161/4014.html;126/1768.html", "google_scholar": "ne3evXwAAAAJ;;https://scholar.google.ca/citations?user=kb4ubhcAAAAJ;https://scholar.google.es/citations?user=cpdQqpsAAAAJ", "orcid": "0000-0002-5184-4460;;;", "linkedin": "adrian-javaloy;;;", "or_profile": "~Adri\u00e1n_Javaloy1;~Pablo_Sanchez_Martin1;~Amit_Levi1;~Isabel_Valera1", "aff": "Saarland University, Saarland University;Max-Planck Institute;Huawei Noah\u2019s Ark Lab;Universit\u00e4t des Saarlandes", "aff_domain": "cs.uni-saarland.de;mpg.tuebingen.de;huawei.com;uni-saarland.de", "position": "PhD student;PhD student;Researcher;Full Professor", "bibtex": "@misc{\njavaloy2022learnable,\ntitle={Learnable Graph Convolutional Attention Networks},\nauthor={Adri{\\'a}n Javaloy and Pablo Sanchez Martin and Amit Levi and Isabel Valera},\nyear={2022},\nurl={https://openreview.net/forum?id=2TdPjch_ogV}\n}", "github": "", "project": "", "reviewers": "edc7;CqLh;QHuE;8bmS", "site": "https://openreview.net/forum?id=2TdPjch_ogV", "pdf_size": 779858, "rating": "3;5;6;6", "confidence": "5;4;5;3", "soundness": "2;4;4;3", "novelty": "2;3;2;3", "presentation": "3;4;4;4", "contribution": "2;3;2;3", "wc_summary": "46;58;174;73", "wc_strengths_and_weaknesses": "59;173;87;306", "wc_questions": "76;63;89;89", "wc_limitations": "82;82;8;13", "wc_review": "263;376;358;481", "wc_reply_reviewers": "94;114;0;0", "wc_reply_authors": "572;418;257;248", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.75, 50.706878231656106 ], "wc_strengths_and_weaknesses_avg": [ 156.25, 96.12329322281879 ], "wc_questions_avg": [ 79.25, 10.779030568655049 ], "wc_limitations_avg": [ 46.25, 35.79367960967411 ], "wc_review_avg": [ 369.5, 77.37732225917358 ], "wc_reply_reviewers_avg": [ 52.0, 52.478567053607705 ], "wc_reply_authors_avg": [ 373.75, 132.95182398147082 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18380786511904678402&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Saarland University;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Huawei;Universit\u00e4t des Saarlandes", "aff_unique_dep": ";;Noah\u2019s Ark Lab;", "aff_unique_url": "https://www.uni-saarland.de;https://www.mpg.de;https://www.huawei.com;https://www.uni-saarland.de", "aff_unique_abbr": "UdS;MPG;Huawei;UDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;China" }, { "title": "Distributed Inverse Constrained Reinforcement Learning for Multi-agent Systems", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54135", "id": "2Tv54LpM9cK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d842425e4bf79ba039352da0f658a906-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2Tv54LpM9cK", "openreview": "https://openreview.net/forum?id=2Tv54LpM9cK", "poster": "/media/PosterPDFs/NeurIPS%202022/54135.png?t=1671053913.7084882", "slides": "https://nips.cc/virtual/2022/poster/54135", "video": "https://nips.cc/virtual/2022/poster/54135", "author_site": "Shicheng Liu, Minghui Zhu", "tldr": "", "abstract": "This paper considers the problem of recovering the policies of multiple interacting experts by estimating their reward functions and constraints where the demonstration data of the experts is distributed to a group of learners. We formulate this problem as a distributed bi-level optimization problem and propose a novel bi-level ``distributed inverse constrained reinforcement learning\" (D-ICRL) algorithm that allows the learners to collaboratively estimate the constraints in the outer loop and learn the corresponding policies and reward functions in the inner loop from the distributed demonstrations through intermittent communications. We formally guarantee that the distributed learners asymptotically achieve consensus which belongs to the set of stationary points of the bi-level optimization problem.", "keywords": "inverse reinforcement learning;distributed bi-level optimization", "primary_area": "", "supplementary_material": "/attachment/c0234283aa5f706526c453245593cddf47676145.zip", "author": "Shicheng Liu;Minghui Zhu", "authorids": "~Shicheng_Liu1;~Minghui_Zhu1", "gender": ";", "homepage": ";", "dblp": "231/6618;", "google_scholar": ";", "orcid": ";", "linkedin": "shicheng-liu-404a21239/;", "or_profile": "~Shicheng_Liu1;~Minghui_Zhu1", "aff": "Pennsylvania State University;", "aff_domain": "psu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nliu2022distributed,\ntitle={Distributed Inverse Constrained Reinforcement Learning for Multi-agent Systems},\nauthor={Shicheng Liu and Minghui Zhu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2Tv54LpM9cK}\n}", "github": "", "project": "", "reviewers": "cHBi;7HCk;AoHD;SMXn", "pdf_size": 976785, "rating": "5;6;6;8", "confidence": "3;4;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "2;2;2;4", "contribution": "3;3;3;4", "wc_summary": "25;103;46;164", "wc_strengths_and_weaknesses": "68;609;73;150", "wc_questions": "111;367;182;414", "wc_limitations": "1;78;68;7", "wc_review": "205;1157;369;735", "wc_reply_reviewers": "14;79;0;94", "wc_reply_authors": "1316;2850;2550;1566", "reply_reviewers": "1;1;0;2", "reply_authors": "2;5;4;3", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 54.04858925078434 ], "wc_strengths_and_weaknesses_avg": [ 225.0, 224.07253289950555 ], "wc_questions_avg": [ 268.5, 125.65926149711369 ], "wc_limitations_avg": [ 38.5, 34.74550330618338 ], "wc_review_avg": [ 616.5, 366.32328618311993 ], "wc_reply_reviewers_avg": [ 46.75, 40.40652793794587 ], "wc_reply_authors_avg": [ 2070.5, 644.4631486749262 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=214468306175547070&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "psu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Zeroth-Order Hard-Thresholding: Gradient Error vs. Expansivity", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54390", "id": "2ZNPedOfwB", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/8de5384f522efff26884559599c09312-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ZNPedOfwB", "openreview": "https://openreview.net/forum?id=2ZNPedOfwB", "poster": "/media/PosterPDFs/NeurIPS%202022/54390.png?t=1669391083.9855998", "slides": "https://nips.cc/virtual/2022/poster/54390", "video": "https://nips.cc/virtual/2022/poster/54390", "author_site": "William de Vazelhes, Hualin Zhang, Huimin Wu, Xiaotong Yuan, Bin Gu", "tldr": "A Zeroth-Order Hard-Thresholding algorithm to reduce the dependence on the dimensionality.", "abstract": "$\\ell_0$ constrained optimization is prevalent in machine learning, particularly for high-dimensional problems, because it is a fundamental approach to achieve sparse learning. Hard-thresholding gradient descent is a dominant technique to solve this problem. However, first-order gradients of the objective function may be either unavailable or expensive to calculate in a lot of real-world problems, where zeroth-order (ZO) gradients could be a good surrogate. Unfortunately, whether ZO gradients can work with the hard-thresholding operator is still an unsolved problem.\nTo solve this puzzle, in this paper, we focus on the $\\ell_0$ constrained black-box stochastic optimization problems, and propose a new stochastic zeroth-order gradient hard-thresholding (SZOHT) algorithm with a general ZO gradient estimator powered by a novel random support sampling. We provide the convergence analysis of SZOHT under standard assumptions. Importantly, we reveal a conflict between the deviation of ZO estimators and the expansivity of the hard-thresholding operator, and provide a theoretical minimal value of the number of random directions in ZO gradients. In addition, we find that the query complexity of SZOHT is independent or weakly dependent on the dimensionality under different settings. Finally, we illustrate the utility of our method on a portfolio optimization problem as well as black-box adversarial attacks.", "keywords": "Sparse learning;Hard-thresholding;Zeroth-order;Stochastic optimization", "primary_area": "", "supplementary_material": "/attachment/ae74163cd5581cd938f07a038e58a13df53120d1.zip", "author": "William de Vazelhes;Hualin Zhang;Huimin Wu;Xiaotong Yuan;Bin Gu", "authorids": "~William_de_Vazelhes2;~Hualin_Zhang1;~Huimin_Wu1;~Xiaotong_Yuan1;~Bin_Gu1", "gender": "M;;M;M;M", "homepage": "https://github.com/zhanghualin0;https://www.researchgate.net/profile/Huimin-Wu-7;https://sites.google.com/site/xtyuan1980/;https://mbzuai.ac.ae/study/faculty/bin-gu/;http://github.com/wdevazelhes", "dblp": "303/7916;;64/5926;29/1758-1;247/1152", "google_scholar": ";;yzU6g24AAAAJ;Vo8OgCgAAAAJ;ple0xCwAAAAJ", "orcid": ";;;0000-0001-6049-1815;", "linkedin": ";;;;", "or_profile": "~Hualin_Zhang1;~Huimin_Wu1;~Xiaotong_Yuan1;~Bin_Gu1;~William_De_Vazelhes1", "aff": "NUIST;NUIST;;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "nuist.edu.cn;nuist.edu.cn;;mbzuai.ac.ae;mbzuai.ac.ae", "position": "MS student;MS student;;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nvazelhes2022zerothorder,\ntitle={Zeroth-Order Hard-Thresholding: Gradient Error vs. Expansivity},\nauthor={William de Vazelhes and Hualin Zhang and Huimin Wu and Xiaotong Yuan and Bin Gu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2ZNPedOfwB}\n}", "github": "", "project": "", "reviewers": "baZg;skyg;bBM4", "pdf_size": 980919, "rating": "6;7;8", "confidence": "3;3;5", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;3;3", "contribution": "3;3;4", "wc_summary": "246;50;124", "wc_strengths_and_weaknesses": "329;434;414", "wc_questions": "218;4;160", "wc_limitations": "34;4;19", "wc_review": "827;492;717", "wc_reply_reviewers": "0;27;10", "wc_reply_authors": "837;794;397", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 140.0, 80.81254028099022 ], "wc_strengths_and_weaknesses_avg": [ 392.3333333333333, 45.52166761249221 ], "wc_questions_avg": [ 127.33333333333333, 90.36715234100399 ], "wc_limitations_avg": [ 19.0, 12.24744871391589 ], "wc_review_avg": [ 678.6666666666666, 139.4234158557625 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 11.145502331533658 ], "wc_reply_authors_avg": [ 676.0, 198.06227976741727 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13041805001005535947&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "nuist.edu.cn;nuist.edu.cn;;mbzuai.ac.ae;mbzuai.ac.ae", "author_num": 5, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Nanjing University of Information Science & Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuist.edu.cn/;https://mbzuai.ac.ae", "aff_unique_abbr": "NUIST;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Online Decision Mediation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53616", "id": "2ZfUNW7SoaS", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0bc795afae289ed465a65a3b4b1f4eb7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ZfUNW7SoaS", "openreview": "https://openreview.net/forum?id=2ZfUNW7SoaS", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53616", "video": "https://nips.cc/virtual/2022/poster/53616", "author_site": "Daniel Jarrett, Alihan H\u00fcy\u00fck, Mihaela van der Schaar", "tldr": "We study the problem of learning to *mediate* between (oracle) expert behavior and (imperfect) human behavior with *abstentive* feedback.", "abstract": "Consider learning a decision support assistant to serve as an intermediary between (oracle) expert behavior and (imperfect) human behavior: At each time, the algorithm observes an action chosen by a fallible agent, and decides whether to *accept* that agent's decision, *intervene* with an alternative, or *request* the expert's opinion. For instance, in clinical diagnosis, fully-autonomous machine behavior is often beyond ethical affordances, thus real-world decision support is often limited to monitoring and forecasting. Instead, such an intermediary would strike a prudent balance between the former (purely prescriptive) and latter (purely descriptive) approaches, while providing an efficient interface between human mistakes and expert feedback. In this work, we first formalize the sequential problem of *online decision mediation*---that is, of simultaneously learning and evaluating mediator policies from scratch with *abstentive feedback*: In each round, deferring to the oracle obviates the risk of error, but incurs an upfront penalty, and reveals the otherwise hidden expert action as a new training data point. Second, we motivate and propose a solution that seeks to trade off (immediate) loss terms against (future) improvements in generalization error; in doing so, we identify why conventional bandit algorithms may fail. Finally, through experiments and sensitivities on a variety of datasets, we illustrate consistent gains over applicable benchmarks on performance measures with respect to the mediator policy, the learned model, and the decision-making system as a whole.", "keywords": "Decision System;Decision Mediation;Decision Support", "primary_area": "", "supplementary_material": "/attachment/73f8a9af0491b958860b303390ae292d3558bb8e.pdf", "author": "Daniel Jarrett;Alihan H\u00fcy\u00fck;Mihaela van der Schaar", "authorids": "~Daniel_Jarrett1;~Alihan_H\u00fcy\u00fck1;~Mihaela_van_der_Schaar2", "gender": ";;F", "homepage": "https://danieljarrett.github.io;;https://www.vanderschaar-lab.com", "dblp": "230/8183;227/2296;", "google_scholar": "Pczk-PQAAAAJ;EMq6KwMAAAAJ;DZ3S--MAAAAJ", "orcid": "0000-0002-2204-6515;;", "linkedin": "danjarrett/;;", "or_profile": "~Daniel_Jarrett1;~Alihan_H\u00fcy\u00fck1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;ucla.edu", "position": "Ph.D.;PhD student;Full Professor", "bibtex": "@inproceedings{\njarrett2022online,\ntitle={Online Decision Mediation},\nauthor={Daniel Jarrett and Alihan H{\\\"u}y{\\\"u}k and Mihaela van der Schaar},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2ZfUNW7SoaS}\n}", "github": "", "project": "", "reviewers": "Vsuz;X8r8;EaPK", "pdf_size": 3732347, "rating": "5;6;7", "confidence": "4;2;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "4;2;3", "contribution": "2;3;3", "wc_summary": "105;66;86", "wc_strengths_and_weaknesses": "204;161;80", "wc_questions": "97;13;43", "wc_limitations": "170;1;83", "wc_review": "576;241;292", "wc_reply_reviewers": "0;0;25", "wc_reply_authors": "2642;589;840", "reply_reviewers": "0;0;1", "reply_authors": "5;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 85.66666666666667, 15.923427883328248 ], "wc_strengths_and_weaknesses_avg": [ 148.33333333333334, 51.409035090039275 ], "wc_questions_avg": [ 51.0, 34.75629439396553 ], "wc_limitations_avg": [ 84.66666666666667, 69.00402564746173 ], "wc_review_avg": [ 369.6666666666667, 147.37782133761587 ], "wc_reply_reviewers_avg": [ 8.333333333333334, 11.785113019775793 ], "wc_reply_authors_avg": [ 1357.0, 914.3919655523372 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13744527458089681866&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Towards Consistency in Adversarial Classification", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53498", "id": "2_AZxVpFlGP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/38d6af46cca4ce1f7d699bf11078cb84-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2_AZxVpFlGP", "openreview": "https://openreview.net/forum?id=2_AZxVpFlGP", "poster": "/media/PosterPDFs/NeurIPS%202022/bc29e1f123ed6f213520caad629ee432.png?t=1666860643.8950841", "slides": "https://nips.cc/virtual/2022/poster/53498", "video": "https://nips.cc/virtual/2022/poster/53498", "author_site": "Laurent Meunier, Raphael Ettedgui, Rafael Pinot, Yann Chevaleyre, Jamal Atif", "tldr": "We study calibration and consistency of losses in the adversarial setting.", "abstract": "In this paper, we study the problem of consistency in the context of adversarial examples. Specifically, we tackle the following question: can surrogate losses still be used as a proxy for minimizing the $0/1$ loss in the presence of an adversary that alters the inputs at test-time? Different from the standard classification task, this question cannot be reduced to a point-wise minimization problem, and calibration needs not to be sufficient to ensure consistency. In this paper, we expose some pathological behaviors specific to the adversarial problem, and show that no convex surrogate loss can be consistent or calibrated in this context. It is therefore necessary to design another class of surrogate functions that can be used to solve the adversarial consistency issue. As a first step towards designing such a class, we identify sufficient and necessary conditions for a surrogate loss to be calibrated in both the adversarial and standard settings. Finally, we give some directions for building a class of losses that could be consistent in the adversarial framework.\n\n", "keywords": "adversarial;consistency;calibration", "primary_area": "", "supplementary_material": "/attachment/8ebe2bc729327e8ccd4ed33c0057b9438fa5577c.pdf", "author": "Laurent Meunier;Raphael Ettedgui;Rafael Pinot;Yann Chevaleyre;Jamal Atif", "authorids": "~Laurent_Meunier1;~Raphael_Ettedgui1;~Rafael_Pinot1;~Yann_Chevaleyre1;~Jamal_Atif1", "gender": "M;M;;M;M", "homepage": ";;;https://www.lamsade.dauphine.fr/~ychevaleyre/;http://www.lamsade.dauphine.fr/~atif/doku.php", "dblp": "15/4624;;;55/5658;http://dblp.uni-trier.de/pers/hd/a/Atif:Jamal", "google_scholar": ";https://scholar.google.fr/citations?user=U1TFo7gAAAAJ;;SF6g8p4AAAAJ;", "orcid": ";;;;", "linkedin": ";raphaelettedgui/;;yannchevaleyre;", "or_profile": "~Laurent_Meunier1;~Raphael_Ettedgui1;~Rafael_Pinot1;~Yann_Chevaleyre1;~Jamal_Atif1", "aff": "Univerist\u00e9 Paris-Dauphine;, Universit\u00e9 Paris-Dauphine (Paris IX);;Universit\u00e9 Paris-Dauphine (Paris IX);Universit\u00e9 Paris-Dauphine", "aff_domain": "dauphine.fr;lamsade.dauphine.fr;;dauphine.fr;dauphine.fr", "position": "PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmeunier2022towards,\ntitle={Towards Consistency in Adversarial Classification},\nauthor={Laurent Meunier and Raphael Ettedgui and Rafael Pinot and Yann Chevaleyre and Jamal Atif},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2_AZxVpFlGP}\n}", "github": "", "project": "", "reviewers": "a53P;MTmv;cdXZ", "pdf_size": 629683, "rating": "6;7;9", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "contribution": "3;3;3", "wc_summary": "65;98;86", "wc_strengths_and_weaknesses": "67;137;180", "wc_questions": "48;54;277", "wc_limitations": "31;15;76", "wc_review": "211;304;619", "wc_reply_reviewers": "23;0;167", "wc_reply_authors": "283;243;860", "reply_reviewers": "1;0;1", "reply_authors": "1;1;3", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.0, 13.638181696985855 ], "wc_strengths_and_weaknesses_avg": [ 128.0, 46.56894530335282 ], "wc_questions_avg": [ 126.33333333333333, 106.56557709796454 ], "wc_limitations_avg": [ 40.666666666666664, 25.82419193099542 ], "wc_review_avg": [ 378.0, 174.59095051004218 ], "wc_reply_reviewers_avg": [ 63.333333333333336, 73.90233795730386 ], "wc_reply_authors_avg": [ 462.0, 281.90187418083383 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14346924563808539282&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "dauphine.fr;lamsade.dauphine.fr;;dauphine.fr;dauphine.fr", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Universit\u00e9 Paris-Dauphine", "aff_unique_dep": "", "aff_unique_url": "https://www.univ-paris-dauphine.fr", "aff_unique_abbr": "UPD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Generalization Bounds with Minimal Dependency on Hypothesis Class via Distributionally Robust Optimization", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54244", "id": "2bE4He5a9eQ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b0dc3753faa0f55cb6e548bbe414bd08-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2bE4He5a9eQ", "openreview": "https://openreview.net/forum?id=2bE4He5a9eQ", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54244", "video": "https://nips.cc/virtual/2022/poster/54244", "author_site": "Yibo Zeng, Henry Lam", "tldr": "", "abstract": "Established approaches to obtain generalization bounds in data-driven optimization and machine learning mostly build on solutions from empirical risk minimization (ERM), which depend crucially on the functional complexity of the hypothesis class. In this paper, we present an alternate route to obtain these bounds on the solution from distributionally robust optimization (DRO), a recent data-driven optimization framework based on worst-case analysis and the notion of ambiguity set to capture statistical uncertainty. In contrast to the hypothesis class complexity in ERM, our DRO bounds depend on the ambiguity set geometry and its compatibility with the true loss function. Notably, when using statistical distances such as maximum mean discrepancy, Wasserstein distance, or $\\phi$-divergence in the DRO, our analysis implies generalization bounds whose dependence on the hypothesis class appears the minimal possible: The bound depends solely on the true loss function, independent of any other candidates in the hypothesis class. To our best knowledge, it is the first generalization bound of this type in the literature, and we hope our findings can open the door for a better understanding of DRO, especially its benefits on loss minimization and other machine learning applications.", "keywords": "distributionally robust optimization;generalization bound;maximum mean discrepancy;reproducing kernel Hilbert space;hypothesis class complexity", "primary_area": "", "supplementary_material": "/attachment/aa593540847a1f8d879f05a0be140a6f20d94e7e.zip", "author": "Yibo Zeng;Henry Lam", "authorids": "~Yibo_Zeng1;~Henry_Lam1", "gender": ";", "homepage": ";http://www.columbia.edu/~khl2114/", "dblp": ";35/9508", "google_scholar": "4bIBeOgAAAAJ;Bnj50x0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yibo_Zeng1;~Henry_Lam1", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzeng2022generalization,\ntitle={Generalization Bounds with Minimal Dependency on Hypothesis Class via Distributionally Robust Optimization},\nauthor={Yibo Zeng and Henry Lam},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2bE4He5a9eQ}\n}", "github": "", "project": "", "reviewers": "fMCk;J945;xNyC", "pdf_size": 467750, "rating": "5;6;7", "confidence": "2;4;5", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "3;4;3", "contribution": "2;3;3", "wc_summary": "77;119;52", "wc_strengths_and_weaknesses": "346;113;142", "wc_questions": "7;67;233", "wc_limitations": "38;7;63", "wc_review": "468;306;490", "wc_reply_reviewers": "46;44;0", "wc_reply_authors": "659;1468;1482", "reply_reviewers": "1;2;0", "reply_authors": "1;5;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.66666666666667, 27.644569488820444 ], "wc_strengths_and_weaknesses_avg": [ 200.33333333333334, 103.68006344305329 ], "wc_questions_avg": [ 102.33333333333333, 95.58707490497284 ], "wc_limitations_avg": [ 36.0, 22.90560339014597 ], "wc_review_avg": [ 421.3333333333333, 82.04605752597473 ], "wc_reply_reviewers_avg": [ 30.0, 21.228911104120876 ], "wc_reply_authors_avg": [ 1203.0, 384.7085476911927 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8957949159194763503&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Dataset Distillation using Neural Feature Regression", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53822", "id": "2clwrA2tfik", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3fe2a777282299ecb4f9e7ebb531f0ab-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2clwrA2tfik", "openreview": "https://openreview.net/forum?id=2clwrA2tfik", "poster": "/media/PosterPDFs/NeurIPS%202022/53822.png?t=1669349166.3447702", "slides": "https://nips.cc/virtual/2022/poster/53822", "video": "https://nips.cc/virtual/2022/poster/53822", "author_site": "Yongchao Zhou, Ehsan Nezhadarya, Jimmy Ba", "tldr": "We propose an efficient method for meta-gradient computation and a regularization trick to alleviate overfitting in dataset distillation. ", "abstract": "Dataset distillation aims to learn a small synthetic dataset that preserves most of the information from the original dataset. Dataset distillation can be formulated as a bi-level meta-learning problem where the outer loop optimizes the meta-dataset and the inner loop trains a model on the distilled data. Meta-gradient computation is one of the key challenges in this formulation, as differentiating through the inner loop learning procedure introduces significant computation and memory costs. In this paper, we address these challenges using neural Feature Regression with Pooling (FRePo), achieving the state-of-the-art performance with an order of magnitude less memory requirement and two orders of magnitude faster training than previous methods. The proposed algorithm is analogous to truncated backpropagation through time with a pool of models to alleviate various types of overfitting in dataset distillation. FRePo significantly outperforms the previous methods on CIFAR100, Tiny ImageNet, and ImageNet-1K. Furthermore, we show that high-quality distilled data can greatly improve various downstream applications, such as continual learning and membership inference defense. Please check out our webpage at https://sites.google.com/view/frepo.", "keywords": "Deep Learning;Dataset Distillation;Bi-Level Optimization;Meta Learning;Continual Learning;Privacy-Preserving", "primary_area": "", "supplementary_material": "/attachment/7c25de23fba825e3e4b2ac25eca152d6e4f50779.pdf", "author": "Yongchao Zhou;Ehsan Nezhadarya;Jimmy Ba", "authorids": "~Yongchao_Zhou1;~Ehsan_Nezhadarya1;~Jimmy_Ba1", "gender": "M;M;M", "homepage": ";;http://jimmylba.github.io", "dblp": ";;https://dblp.org/pers/b/Ba:Jimmy.html", "google_scholar": "35M6rhsAAAAJ;https://scholar.google.ca/citations?user=7uIrN0UAAAAJ;https://scholar.google.ca/citations?user=ymzxRhAAAAAJ", "orcid": ";;", "linkedin": "yongchao-zhou-a298a7158/;https://ca.linkedin.com/in/enarya;", "or_profile": "~Yongchao_Zhou1;~Ehsan_Nezhadarya1;~Jimmy_Ba1", "aff": "University of Toronto;LG Toronto AI lab;Department of Computer Science, University of Toronto", "aff_domain": "mail.utoronto.ca;lge.com;cs.toronto.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhou2022dataset,\ntitle={Dataset Distillation using Neural Feature Regression},\nauthor={Yongchao Zhou and Ehsan Nezhadarya and Jimmy Ba},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2clwrA2tfik}\n}", "github": "", "project": "", "reviewers": "JSgu;p7tg;nCf8;aP6S", "pdf_size": 3541336, "rating": "7;7;7;8", "confidence": "3;4;2;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "4;3;3;4", "contribution": "3;3;3;4", "wc_summary": "111;68;51;112", "wc_strengths_and_weaknesses": "99;172;112;346", "wc_questions": "217;45;7;240", "wc_limitations": "5;16;23;23", "wc_review": "432;301;193;721", "wc_reply_reviewers": "63;226;20;405", "wc_reply_authors": "1094;2244;731;1767", "reply_reviewers": "1;2;1;2", "reply_authors": "2;4;1;3", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 26.688012290165037 ], "wc_strengths_and_weaknesses_avg": [ 182.25, 98.4692210794825 ], "wc_questions_avg": [ 127.25, 102.46066318348716 ], "wc_limitations_avg": [ 16.75, 7.361215932167728 ], "wc_review_avg": [ 411.75, 197.5871643098306 ], "wc_reply_reviewers_avg": [ 178.5, 151.6748166308435 ], "wc_reply_authors_avg": [ 1459.0, 586.1522839672298 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15355176449784124932&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mail.utoronto.ca;lge.com;cs.toronto.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Toronto;LG", "aff_unique_dep": ";AI lab", "aff_unique_url": "https://www.utoronto.ca;https://www.lg.com/ca", "aff_unique_abbr": "U of T;LG", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "RNNs of RNNs: Recursive Construction of Stable Assemblies of Recurrent Neural Networks", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53104", "id": "2dgB38geVEU", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c4d5e998372e2b8742c6dc806e38bc0e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2dgB38geVEU", "openreview": "https://openreview.net/forum?id=2dgB38geVEU", "poster": "/media/PosterPDFs/NeurIPS%202022/53104.png?t=1669592518.9430296", "slides": "https://nips.cc/virtual/2022/poster/53104", "video": "https://nips.cc/virtual/2022/poster/53104", "author_site": "Leo Kozachkov, Michaela Ennis, Jean-Jacques Slotine", "tldr": "We develop theory for analyzing and optimizing stable \"RNNs of RNNs\".", "abstract": "Recurrent neural networks (RNNs) are widely used throughout neuroscience as models of local neural activity. Many properties of single RNNs are well characterized theoretically, but experimental neuroscience has moved in the direction of studying multiple interacting areas, and RNN theory needs to be likewise extended. We take a constructive approach towards this problem, leveraging tools from nonlinear control theory and machine learning to characterize when combinations of stable RNNs will themselves be stable. Importantly, we derive conditions which allow for massive feedback connections between interacting RNNs. We parameterize these conditions for easy optimization using gradient-based techniques, and show that stability-constrained \"networks of networks\" can perform well on challenging sequential-processing benchmark tasks. Altogether, our results provide a principled approach towards understanding distributed, modular function in the brain.", "keywords": "Neuroscience;Recurrent Neural Networks;Control Theory;Machine Learning;Dynamical Systems", "primary_area": "", "supplementary_material": "/attachment/05d90ebe9c42355274d06583a1bfc59321a81db0.pdf", "author": "Leo Kozachkov;Michaela M Ennis;Jean-Jacques Slotine", "authorids": "~Leo_Kozachkov1;~Michaela_M_Ennis1;~Jean-Jacques_Slotine1", "gender": ";F;M", "homepage": "https://kozleo.github.io/;https://menace.live;http://web.mit.edu/nsl/www/index.html", "dblp": "195/5971;295/9006;22/3009", "google_scholar": "V5dtdeUAAAAJ;1PVaM_AAAAAJ;TcREpMQAAAAJ", "orcid": "0000-0003-4330-1201;0000-0001-7898-8184;", "linkedin": ";ennisthemennis/;", "or_profile": "~Leo_Kozachkov1;~Michaela_M_Ennis1;~Jean-Jacques_Slotine1", "aff": "Massachusetts Institute of Technology;Harvard University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;harvard.edu;mit.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkozachkov2022rnns,\ntitle={{RNN}s of {RNN}s: Recursive Construction of Stable Assemblies of Recurrent Neural Networks},\nauthor={Leo Kozachkov and Michaela M Ennis and Jean-Jacques Slotine},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2dgB38geVEU}\n}", "github": "", "project": "", "reviewers": "tmsC;HWhw;3ViV;vKbB", "pdf_size": 2942165, "rating": "4;5;6;8", "confidence": "4;3;4;2", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "2;2;3;4", "contribution": "3;2;3;4", "wc_summary": "104;108;223;53", "wc_strengths_and_weaknesses": "235;75;362;140", "wc_questions": "140;137;508;52", "wc_limitations": "1;1;11;26", "wc_review": "480;321;1104;271", "wc_reply_reviewers": "233;85;434;0", "wc_reply_authors": "1141;772;1004;256", "reply_reviewers": "1;1;2;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.0, 62.213342620373645 ], "wc_strengths_and_weaknesses_avg": [ 203.0, 108.00231479000809 ], "wc_questions_avg": [ 209.25, 176.06444132760026 ], "wc_limitations_avg": [ 9.75, 10.231690964840562 ], "wc_review_avg": [ 544.0, 332.3981046877374 ], "wc_reply_reviewers_avg": [ 188.0, 164.6921370314928 ], "wc_reply_authors_avg": [ 793.25, 337.05887838773805 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7644707871564383, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16529854408520910175&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "mit.edu;harvard.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.harvard.edu", "aff_unique_abbr": "MIT;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Amortized Projection Optimization for Sliced Wasserstein Generative Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54653", "id": "2dxsDFaESK", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f02f1185b97518ab5bd7ebde466992d3-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2dxsDFaESK", "openreview": "https://openreview.net/forum?id=2dxsDFaESK", "poster": "/media/PosterPDFs/NeurIPS%202022/54653.png?t=1669183761.272722", "slides": "https://nips.cc/virtual/2022/poster/54653", "video": "https://nips.cc/virtual/2022/poster/54653", "author_site": "Khai Nguyen, Nhat Ho", "tldr": "We propose to utilize amortized optimization to solve the computational issue of sliced Wasserstein in deep learning applications.", "abstract": "Seeking informative projecting directions has been an important task in utilizing sliced Wasserstein distance in applications. However, finding these directions usually requires an iterative optimization procedure over the space of projecting directions, which is computationally expensive. Moreover, the computational issue is even more severe in deep learning applications, where computing the distance between two mini-batch probability measures is repeated several times. This nested-loop has been one of the main challenges that prevent the usage of sliced Wasserstein distances based on good projections in practice. To address this challenge, we propose to utilize the \\textit{learning-to-optimize} technique or \\textit{amortized optimization} to predict the informative direction of any given two mini-batch probability measures. To the best of our knowledge, this is the first work that bridges amortized optimization and sliced Wasserstein generative models. In particular, we derive linear amortized models, generalized linear amortized models, and non-linear amortized models which are corresponding to three types of novel mini-batch losses, named \\emph{amortized sliced Wasserstein}. We demonstrate the favorable performance of the proposed sliced losses in deep generative modeling on standard benchmark datasets.", "keywords": "Sliced Wasserstein;Optimal Transport;Amortized Optimization;Generative Models", "primary_area": "", "supplementary_material": "/attachment/ad930fd78b0e3e4d0e71e7b29b3547a1dc75d70d.pdf", "author": "Khai Nguyen;Nhat Ho", "authorids": "~Khai_Nguyen1;~Nhat_Ho1", "gender": "M;M", "homepage": "https://khainb.com;https://nhatptnk8912.github.io/", "dblp": "120/4308;203/4479", "google_scholar": "im5fNaQAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";", "linkedin": ";nhat-pham-minh-ho-267b8164/", "or_profile": "~Khai_Nguyen1;~Nhat_Ho1", "aff": "University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2022amortized,\ntitle={Amortized Projection Optimization for Sliced Wasserstein Generative Models},\nauthor={Khai Nguyen and Nhat Ho},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2dxsDFaESK}\n}", "github": "", "project": "", "reviewers": "hTkP;nJDk;3bjP;mdhx", "pdf_size": 2729376, "rating": "4;6;6;8", "confidence": "5;4;3;3", "soundness": "1;3;3;4", "novelty": "2;3;2;4", "presentation": "3;2;3;4", "contribution": "2;3;2;4", "wc_summary": "460;91;53;21", "wc_strengths_and_weaknesses": "539;47;119;181", "wc_questions": "204;273;63;22", "wc_limitations": "33;20;24;143", "wc_review": "1236;431;259;367", "wc_reply_reviewers": "0;201;0;40", "wc_reply_authors": "1600;506;1474;411", "reply_reviewers": "0;1;0;1", "reply_authors": "4;2;4;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 156.25, 177.11207609872343 ], "wc_strengths_and_weaknesses_avg": [ 221.5, 189.34294283125527 ], "wc_questions_avg": [ 140.5, 102.02573204834161 ], "wc_limitations_avg": [ 55.0, 51.024503917235684 ], "wc_review_avg": [ 573.25, 387.54507802318943 ], "wc_reply_reviewers_avg": [ 60.25, 82.88659421160939 ], "wc_reply_authors_avg": [ 997.75, 542.1283865469507 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4767006857593439261&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generalised Implicit Neural Representations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54580", "id": "2fD1Ux9InIW", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c44a04289beaf0a7d968a94066a1d696-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2fD1Ux9InIW", "openreview": "https://openreview.net/forum?id=2fD1Ux9InIW", "poster": "/media/PosterPDFs/NeurIPS%202022/54580.png?t=1668098236.9595938", "slides": "https://nips.cc/virtual/2022/poster/54580", "video": "https://nips.cc/virtual/2022/poster/54580", "author_site": "Daniele Grattarola, Pierre Vandergheynst", "tldr": "We train implicit neural representations for signals on non-Euclidean domains, showing experiments with biological, social, and meteorological data.", "abstract": "We consider the problem of learning implicit neural representations (INRs) for signals on non-Euclidean domains. In the Euclidean case, INRs are trained on a discrete sampling of a signal over a regular lattice. Here, we assume that the continuous signal exists on some unknown topological space from which we sample a discrete graph.\nIn the absence of a coordinate system to identify the sampled nodes, we propose approximating their location with a spectral embedding of the graph. This allows us to train INRs without knowing the underlying continuous domain, which is the case for most graph signals in nature, while also making the INRs independent of any choice of coordinate system. We show experiments with our method on various real-world signals on non-Euclidean domains.", "keywords": "implicit neural representations;neural fields", "primary_area": "", "supplementary_material": "/attachment/bc2064d59bd00c1448e4c2fb2b87a03be7896411.pdf", "author": "Daniele Grattarola;Pierre Vandergheynst", "authorids": "~Daniele_Grattarola1;~Pierre_Vandergheynst1", "gender": "M;M", "homepage": "https://danielegrattarola.github.io/about/;http://lts2.epfl.ch", "dblp": "220/5710;16/355", "google_scholar": "https://scholar.google.ch/citations?user=2AcU-iAAAAAJ;1p9NOFEAAAAJ", "orcid": "0000-0001-9506-037X;0000-0002-9070-900X", "linkedin": "danielegrattarola/;", "or_profile": "~Daniele_Grattarola1;~Pierre_Vandergheynst1", "aff": "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\ngrattarola2022generalised,\ntitle={Generalised Implicit Neural Representations},\nauthor={Daniele Grattarola and Pierre Vandergheynst},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2fD1Ux9InIW}\n}", "github": "", "project": "", "reviewers": "NW5a;ofDT;8jka", "pdf_size": 9325135, "rating": "6;6;7", "confidence": "3;5;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "4;3;3", "contribution": "3;3;3", "wc_summary": "60;69;94", "wc_strengths_and_weaknesses": "123;184;333", "wc_questions": "111;53;75", "wc_limitations": "5;80;70", "wc_review": "299;386;572", "wc_reply_reviewers": "0;23;121", "wc_reply_authors": "732;819;841", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.33333333333333, 14.383632673594278 ], "wc_strengths_and_weaknesses_avg": [ 213.33333333333334, 88.20556797743679 ], "wc_questions_avg": [ 79.66666666666667, 23.907228102721476 ], "wc_limitations_avg": [ 51.666666666666664, 33.2498955721 ], "wc_review_avg": [ 419.0, 113.86834503056589 ], "wc_reply_reviewers_avg": [ 48.0, 52.46586191674227 ], "wc_reply_authors_avg": [ 797.3333333333334, 47.062606057132975 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8630199693995819513&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Action-modulated midbrain dopamine activity arises from distributed control policies", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54377", "id": "2gZccSOY04p", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/24cb8b08f3cb2f59671e33faac4790e6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2gZccSOY04p", "openreview": "https://openreview.net/forum?id=2gZccSOY04p", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54377", "video": "https://nips.cc/virtual/2022/poster/54377", "author_site": "Jack Lindsey, Ashok Litwin-Kumar", "tldr": "Action-modulated dopamine responses provide a mechanism for off-policy reinforcement learning in the basal ganglia", "abstract": "Animal behavior is driven by multiple brain regions working in parallel with distinct control policies. We present a biologically plausible model of off-policy reinforcement learning in the basal ganglia, which enables learning in such an architecture. The model accounts for action-related modulation of dopamine activity that is not captured by previous models that implement on-policy algorithms. In particular, the model predicts that dopamine activity signals a combination of reward prediction error (as in classic models) and \"action surprise,\" a measure of how unexpected an action is relative to the basal ganglia's current policy. In the presence of the action surprise term, the model implements an approximate form of $Q$-learning. On benchmark navigation and reaching tasks, we show empirically that this model is capable of learning from data driven completely or in part by other policies (e.g. from other brain regions). By contrast, models without the action surprise term suffer in the presence of additional policies, and are incapable of learning at all from behavior that is completely externally driven. The model provides a computational account for numerous experimental findings about dopamine activity that cannot be explained by classic models of reinforcement learning in the basal ganglia. These include differing levels of action surprise signals in dorsal and ventral striatum, decreasing amounts movement-modulated dopamine activity with practice, and representations of action initiation and kinematics in dopamine activity. It also provides further predictions that can be tested with recordings of striatal dopamine activity.", "keywords": "neuroscience;reinforcement learning;dopamine;basal ganglia;off-policy learning", "primary_area": "", "supplementary_material": "/attachment/13e7e172788864830ac2e21cfe4d72c02c6b5609.zip", "author": "Jack Lindsey;Ashok Litwin-Kumar", "authorids": "~Jack_Lindsey1;~Ashok_Litwin-Kumar1", "gender": ";M", "homepage": ";", "dblp": ";", "google_scholar": "CNrQvh4AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Jack_Lindsey1;~Ashok_Litwin-Kumar1", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu", "position": "Student;Assistant Professor", "bibtex": "@inproceedings{\nlindsey2022actionmodulated,\ntitle={Action-modulated midbrain dopamine activity arises from distributed control policies},\nauthor={Jack Lindsey and Ashok Litwin-Kumar},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2gZccSOY04p}\n}", "github": "", "project": "", "reviewers": "QgmR;YRYS;15y6", "pdf_size": 2373341, "rating": "5;6;9", "confidence": "4;4;5", "soundness": "2;2;4", "novelty": "2;3;4", "presentation": "2;3;4", "contribution": "2;3;4", "wc_summary": "18;85;84", "wc_strengths_and_weaknesses": "105;359;176", "wc_questions": "155;290;3", "wc_limitations": "24;20;18", "wc_review": "302;754;281", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1223;696;247", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 1.699673171197595 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 62.333333333333336, 31.351058816073323 ], "wc_strengths_and_weaknesses_avg": [ 213.33333333333334, 107.00259602250571 ], "wc_questions_avg": [ 149.33333333333334, 117.2357548797389 ], "wc_limitations_avg": [ 20.666666666666668, 2.494438257849294 ], "wc_review_avg": [ 445.6666666666667, 218.19308472594227 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 722.0, 398.87424918972476 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2847567587636467784&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "BiMLP: Compact Binary Architectures for Vision Multi-Layer Perceptrons", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53746", "id": "2ge7_pORL_n", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/20f94998511f25bb6378cae0e098bc46-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ge7_pORL_n", "openreview": "https://openreview.net/forum?id=2ge7_pORL_n", "poster": "/media/PosterPDFs/NeurIPS%202022/20568692db622456cc42a2e853ca21f8.png?t=1665999890.1120024", "slides": "https://nips.cc/virtual/2022/poster/53746", "video": "https://nips.cc/virtual/2022/poster/53746", "author_site": "Yixing Xu, Xinghao Chen, Yunhe Wang", "tldr": "A new compact binary architectures for vision MLPs to handle the problem that directly binarizing vision MLPs results in poor results due to the limited representation ability.", "abstract": "This paper studies the problem of designing compact binary architectures for vision multi-layer perceptrons (MLPs). We provide extensive analysis on the difficulty of binarizing vision MLPs and find that previous binarization methods perform poorly due to limited capacity of binary MLPs. In contrast with the traditional CNNs that utilizing convolutional operations with large kernel size, fully-connected (FC) layers in MLPs can be treated as convolutional layers with kernel size $1\\times1$. Thus, the representation ability of the FC layers will be limited when being binarized, and places restrictions on the capability of spatial mixing and channel mixing on the intermediate features. To this end, we propose to improve the performance of binary MLP (BiMLP) model by enriching the representation ability of binary FC layers. We design a novel binary block that contains multiple branches to merge a series of outputs from the same stage, and also a universal shortcut connection that encourages the information flow from the previous stage. The downsampling layers are also carefully designed to reduce the computational complexity while maintaining the classification performance. Experimental results on benchmark dataset ImageNet-1k demonstrate the effectiveness of the proposed BiMLP models, which achieve state-of-the-art accuracy compared to prior binary CNNs.\nThe MindSpore code is available at \\url{https://gitee.com/mindspore/models/tree/master/research/cv/BiMLP}.", "keywords": "vision MLP;binary neural network;compact architecture;representation ability", "primary_area": "", "supplementary_material": "", "author": "Yixing Xu;Xinghao Chen;Yunhe Wang", "authorids": "~Yixing_Xu2;~Xinghao_Chen1;~Yunhe_Wang1", "gender": "M;M;M", "homepage": ";;https://www.wangyunhe.site/", "dblp": "142/1013;30/4937-1;63/8217-1", "google_scholar": "32tJoOkAAAAJ;tuGWUVIAAAAJ;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";0000-0002-2102-8235;0000-0002-0142-509X", "linkedin": ";;", "or_profile": "~Yixing_Xu2;~Xinghao_Chen1;~Yunhe_Wang1", "aff": "Advanced Micro Devices;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab", "aff_domain": "amd.com;huawei.com;huawei.com", "position": "Principal Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nxu2022bimlp,\ntitle={Bi{MLP}: Compact Binary Architectures for Vision Multi-Layer Perceptrons},\nauthor={Yixing Xu and Xinghao Chen and Yunhe Wang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2ge7_pORL_n}\n}", "github": "", "project": "", "reviewers": "orUW;BkjE;qm2m;NgiC", "pdf_size": 486839, "rating": "4;5;6;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "contribution": "2;3;3;3", "wc_summary": "62;50;47;93", "wc_strengths_and_weaknesses": "218;88;32;210", "wc_questions": "155;39;54;53", "wc_limitations": "111;11;46;1", "wc_review": "546;188;179;357", "wc_reply_reviewers": "0;14;17;22", "wc_reply_authors": "1489;90;697;437", "reply_reviewers": "0;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 18.207141456033124 ], "wc_strengths_and_weaknesses_avg": [ 137.0, 79.55501241279521 ], "wc_questions_avg": [ 75.25, 46.42399702739952 ], "wc_limitations_avg": [ 42.25, 43.06608294238054 ], "wc_review_avg": [ 317.5, 149.7706580075016 ], "wc_reply_reviewers_avg": [ 13.25, 8.166241485530538 ], "wc_reply_authors_avg": [ 678.25, 515.2442988524958 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9907743158987777341&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "amd.com;huawei.com;huawei.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Advanced Micro Devices, Inc.;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "https://www.amd.com;https://www.huawei.com", "aff_unique_abbr": "AMD;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "title": "Global Linear and Local Superlinear Convergence of IRLS for Non-Smooth Robust Regression", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54056", "id": "2hp6sIBsCDH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/ba3354bcfeae4f166a8bfe75443ac8f7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2hp6sIBsCDH", "openreview": "https://openreview.net/forum?id=2hp6sIBsCDH", "poster": "/media/PosterPDFs/NeurIPS%202022/54056.png?t=1669499123.5294664", "slides": "https://nips.cc/virtual/2022/poster/54056", "video": "https://nips.cc/virtual/2022/poster/54056", "author_site": "Liangzu Peng, Christian K\u00fcmmerle, Rene Vidal", "tldr": "The paper provides the first local superlinear convergence rate analysis of iteratively reweighted least-squares for robust regression with several applications.", "abstract": "We advance both the theory and practice of robust $\\ell_p$-quasinorm regression for $p \\in (0,1]$ by using novel variants of iteratively reweighted least-squares (IRLS) to solve the underlying non-smooth problem. In the convex case, $p=1$, we prove that this IRLS variant converges globally at a linear rate under a mild, deterministic condition on the feature matrix called the stable range space property. In the non-convex case, $p\\in(0,1)$, we prove that under a similar condition, IRLS converges locally to the global minimizer at a superlinear rate of order $2-p$; the rate becomes quadratic as $p\\to 0$. We showcase the proposed methods in three applications: real phase retrieval, regression without correspondences, and robust face restoration. The results show that (1) IRLS can handle a larger number of outliers than other methods, (2) it is faster than competing methods at the same level of accuracy, (3) it restores a sparsely corrupted face image with satisfactory visual quality.", "keywords": "Convergence Rate Analysis;Non-Smooth Optimization;Robust Regression;Outliers;Iteratively Reweighted Least-Squares;Sparsity", "primary_area": "", "supplementary_material": "/attachment/26e900220bee3fbeeaa1b9ec2b4ee00e3f2f4322.pdf", "author": "Liangzu Peng;Christian K\u00fcmmerle;Rene Vidal", "authorids": "~Liangzu_Peng2;~Christian_K\u00fcmmerle1;~Rene_Vidal1", "gender": "M;M;", "homepage": "https://liangzu.github.io/;http://ckuemmerle.com;http://www.vision.jhu.edu", "dblp": "228/7974;198/0699;v/ReneVidal", "google_scholar": "A39MlcYAAAAJ;https://scholar.google.de/citations?user=zElx1AYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0708-7543;0000-0001-9267-5379;", "linkedin": ";;rene-vidal-74844928/", "or_profile": "~Liangzu_Peng2;~Christian_K\u00fcmmerle1;~Rene_Vidal1", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu;jhu.edu", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\npeng2022global,\ntitle={Global Linear and Local Superlinear Convergence of {IRLS} for Non-Smooth Robust Regression},\nauthor={Liangzu Peng and Christian K{\\\"u}mmerle and Rene Vidal},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2hp6sIBsCDH}\n}", "github": "", "project": "", "reviewers": "yi3J;GXHV;qErq", "pdf_size": 438494, "rating": "6;6;6", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "4;2;3", "contribution": "3;3;3", "wc_summary": "60;133;83", "wc_strengths_and_weaknesses": "216;296;198", "wc_questions": "289;37;155", "wc_limitations": "1;16;25", "wc_review": "566;482;461", "wc_reply_reviewers": "31;20;0", "wc_reply_authors": "819;485;420", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.0, 30.474032661705056 ], "wc_strengths_and_weaknesses_avg": [ 236.66666666666666, 42.59368758656877 ], "wc_questions_avg": [ 160.33333333333334, 102.94766739573181 ], "wc_limitations_avg": [ 14.0, 9.899494936611665 ], "wc_review_avg": [ 503.0, 45.36518488885502 ], "wc_reply_reviewers_avg": [ 17.0, 12.832251036613439 ], "wc_reply_authors_avg": [ 574.6666666666666, 174.79575382587404 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=145441446786155398&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "jhu.edu;jhu.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Point Cloud Completion and Segmentation by Generative Adversarial Autoencoding Network", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53563", "id": "2jTCojmmh82", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/171846d7af5ea91e63db508154eaffe8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2jTCojmmh82", "openreview": "https://openreview.net/forum?id=2jTCojmmh82", "poster": "/media/PosterPDFs/NeurIPS%202022/53563.png?t=1669024569.247076", "slides": "https://nips.cc/virtual/2022/poster/53563", "video": "https://nips.cc/virtual/2022/poster/53563", "author_site": "Changfeng Ma, Yang Yang, Jie Guo, Fei Pan, Chongjun Wang, Yanwen Guo", "tldr": "We propose a unsupervised method for point cloud completion and segmentation.", "abstract": "Most existing point cloud completion methods assume the input partial point cloud is clean, which is not practical in practice, and are Most existing point cloud completion methods assume the input partial point cloud is clean, which is not the case in practice, and are generally based on supervised learning. In this paper, we present an unsupervised generative adversarial autoencoding network, named UGAAN, which completes the partial point cloud contaminated by surroundings from real scenes and cutouts the object simultaneously, only using artificial CAD models as assistance. The generator of UGAAN learns to predict the complete point clouds on real data from both the discriminator and the autoencoding process of artificial data. The latent codes from generator are also fed to discriminator which makes encoder only extract object features rather than noises. We also devise a refiner for generating better complete cloud with a segmentation module to separate the object from background. We train our UGAAN with one real scene dataset and evaluate it with the other two. Extensive experiments and visualization demonstrate our superiority, generalization and robustness. Comparisons against the previous method show that our method achieves the state-of-the-art performance on unsupervised point cloud completion and segmentation on real data.\n ", "keywords": "Point cloud completion;Point cloud segmentation;Unsupervised learning;GAN", "primary_area": "", "supplementary_material": "/attachment/867e90009bacd96fc7f1157bae5ce462b99c42c4.pdf", "author": "Changfeng Ma;Yang Yang;Jie Guo;Fei Pan;Chongjun Wang;Yanwen Guo", "authorids": "~Changfeng_Ma1;~Yang_Yang34;~Jie_Guo2;~Fei_Pan3;~Chongjun_Wang1;~Yanwen_Guo1", "gender": "M;M;M;;M;M", "homepage": "https://github.com/murcherful;https://github.com/yyyyy1231;;;;https://cs.nju.edu.cn/ywguo/", "dblp": ";;77/2751-1;;07/146;44/185-1", "google_scholar": ";;https://scholar.google.com.hk/citations?user=Sx4PQpQAAAAJ;;;hVlfEkwAAAAJ", "orcid": "0000-0001-8732-7038;;;;;", "linkedin": ";;;felix-pan-b9a896199/;;", "or_profile": "~Changfeng_Ma1;~Yang_Yang34;~Jie_Guo2;~Fei_Pan3;~Chongjun_Wang1;~Yanwen_Guo1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;Assistant Professor;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2022unsupervised,\ntitle={Unsupervised Point Cloud Completion and Segmentation by Generative Adversarial Autoencoding Network},\nauthor={Changfeng Ma and Yang Yang and Jie Guo and Fei Pan and Chongjun Wang and Yanwen Guo},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2jTCojmmh82}\n}", "github": "", "project": "", "reviewers": "hDJC;bh96;ujzp", "pdf_size": 5452116, "rating": "5;6;6", "confidence": "2;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "contribution": "2;3;3", "wc_summary": "107;101;55", "wc_strengths_and_weaknesses": "224;427;108", "wc_questions": "28;56;28", "wc_limitations": "14;12;20", "wc_review": "373;596;211", "wc_reply_reviewers": "50;19;67", "wc_reply_authors": "785;695;660", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 87.66666666666667, 23.22833518691246 ], "wc_strengths_and_weaknesses_avg": [ 253.0, 131.83575640419662 ], "wc_questions_avg": [ 37.333333333333336, 13.199326582148887 ], "wc_limitations_avg": [ 15.333333333333334, 3.39934634239519 ], "wc_review_avg": [ 393.3333333333333, 157.83183737411437 ], "wc_reply_reviewers_avg": [ 45.333333333333336, 19.871811414385174 ], "wc_reply_authors_avg": [ 713.3333333333334, 52.65189666310438 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4957800537106582392&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Multi-Instance Causal Representation Learning for Instance Label Prediction and Out-of-Distribution Generalization", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55138", "id": "2ktj0977QGO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/e261e92e1cfb820da930ad8c38d0aead-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ktj0977QGO", "openreview": "https://openreview.net/forum?id=2ktj0977QGO", "poster": "/media/PosterPDFs/NeurIPS%202022/270edd69788dce200a3b395a6da6fdb7.png?t=1666258573.5564873", "slides": "https://nips.cc/virtual/2022/poster/55138", "video": "https://nips.cc/virtual/2022/poster/55138", "author_site": "Weijia Zhang, Xuanhui Zhang, hanwen deng, Min-Ling Zhang", "tldr": "Learning invariant causal representation from multi-instance bags benefits instance label prediction and OOD generalization.", "abstract": "Multi-instance learning (MIL) deals with objects represented as bags of instances and can predict instance labels from bag-level supervision. However, significant performance gaps exist between instance-level MIL algorithms and supervised learners since the instance labels are unavailable in MIL. Most existing MIL algorithms tackle the problem by treating multi-instance bags as harmful ambiguities and predicting instance labels by reducing the supervision inexactness. This work studies MIL from a new perspective by considering bags as auxiliary information, and utilize it to identify instance-level causal representations from bag-level weak supervision. We propose the CausalMIL algorithm, which not only excels at instance label prediction but also provides robustness to distribution change by synergistically integrating MIL with identifiable variational autoencoder. Our approach is based on a practical and general assumption: the prior distribution over the instance latent representations belongs to the non-factorized exponential family conditioning on the multi-instance bags. Experiments on synthetic and real-world datasets demonstrate that our approach significantly outperforms various baselines on instance label prediction and out-of-distribution generalization tasks.", "keywords": "multi-instance learning;variational autoencoder;causal representation", "primary_area": "", "supplementary_material": "/attachment/2068949334b07d7c34f691db847bcc3f9e835a8e.pdf", "author": "Weijia Zhang;Xuanhui Zhang;Han-Wen Deng;Min-Ling Zhang", "authorids": "~Weijia_Zhang2;xuanhui.zhang.wj@gmail.com;denghw@seu.edu.cn;~Min-Ling_Zhang2", "gender": ";;;M", "homepage": "https://www.weijiazhangxh.com/;;;http://palm.seu.edu.cn/zhangml/", "dblp": "158/5387-1;;;84/271.html", "google_scholar": "https://scholar.google.com.au/citations?user=7jmAPvAAAAAJ;;;uFHCIM0AAAAJ", "orcid": "0000-0001-8103-5325;;;0000-0003-1880-5918", "linkedin": "weijia-zhang-86152337/;;;", "or_profile": "~Weijia_Zhang2;xuanhui.zhang.wj@gmail.com;denghw@seu.edu.cn;~Min-Ling_Zhang2", "aff": "Southeast University;;;Southeast University", "aff_domain": "seu.edu.cn;;;seu.edu.cn", "position": "Associate Professor;;;Full Professor", "bibtex": "@inproceedings{\nzhang2022multiinstance,\ntitle={Multi-Instance Causal Representation Learning for Instance Label Prediction and Out-of-Distribution Generalization},\nauthor={Weijia Zhang and Xuanhui Zhang and Han-Wen Deng and Min-Ling Zhang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2ktj0977QGO}\n}", "github": "", "project": "", "reviewers": "A4sn;WkwK;RjbB;s6pT", "pdf_size": 5657592, "rating": "4;5;7;8", "confidence": "4;3;3;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "contribution": "2;2;3;3", "wc_summary": "59;59;42;101", "wc_strengths_and_weaknesses": "91;442;114;287", "wc_questions": "12;2;69;90", "wc_limitations": "47;2;1;27", "wc_review": "209;505;226;505", "wc_reply_reviewers": "127;183;94;152", "wc_reply_authors": "1350;1484;261;380", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.25, 21.775846711436962 ], "wc_strengths_and_weaknesses_avg": [ 233.5, 142.23308335264338 ], "wc_questions_avg": [ 43.25, 37.17105728924051 ], "wc_limitations_avg": [ 19.25, 19.109879643786353 ], "wc_review_avg": [ 361.25, 143.87559730544996 ], "wc_reply_reviewers_avg": [ 139.0, 32.687918257362305 ], "wc_reply_authors_avg": [ 868.75, 551.8991642501372 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.38138503569823695, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5803800343677787178&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "seu.edu.cn;;;seu.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "2nJdh_C-UWe", "title": "Towards Effective and Interpretable Human-AI Collaboration in MOBA Games", "track": "main", "status": "Reject", "tldr": "We propose an efficient and interpretable Meta-Command Communication-based (MCC) framework for accomplishing effective human-AI collaboration in MOBA games. ", "abstract": "MOBA games, e.g., Dota2 and Honor of Kings, have been actively used as the testbed for the recent AI research on games, and various AI systems have been developed at the human level so far. However, these AI systems merely focus on how to compete with humans, less exploring how to collaborate with humans. To this end, this paper makes the first attempt to investigate human-AI collaboration in MOBA games. In this paper, we propose to enable humans and agents to collaborate through explicit communications by designing an efficient and interpretable Meta-Command Communication-based framework, dubbed MCC, for accomplishing effective human-AI collaboration in MOBA games. The MCC framework consists of two pivotal modules: 1) an interpretable communication protocol, i.e., the Meta-Command, to bridge the communication gap between humans and agents; 2) a meta-command value estimation model, i.e., the Meta-Command Selector, to select a valuable meta-command for each agent to achieve effective human-AI collaboration. Experimental results in Honor of Kings demonstrate that MCC agents can collaborate reasonably well with human teammates and even generalize to collaborate with different levels and numbers of human teammates. Videos are available at https://sites.google.com/view/mcc-demo.", "keywords": "Human-AI Collaboration;Game Playing;Deep Reinforcement Learning", "primary_area": "", "supplementary_material": "/attachment/7125f9cda41010f7ce62167c9b85693728b14833.pdf", "author": "Yiming Gao;Feiyu Liu;Liang Wang;Zhenjie Lian;Weixuan Wang;Siqin Li;Xianliang Wang;Xianhan Zeng;Rundong Wang;jiawei wang;QIANG FU;Yang Wei;Lanxiao Huang;Wei Liu", "authorids": "~Yiming_Gao4;~Feiyu_Liu1;~Liang_Wang10;~Zhenjie_Lian1;~Weixuan_Wang1;~Siqin_Li1;~Xianliang_Wang1;~Xianhan_Zeng1;~Rundong_Wang1;~jiawei_wang2;~QIANG_FU8;~Yang_Wei2;~Lanxiao_Huang1;~Wei_Liu3", "gender": "M;;M;;M;;M;M;M;M;M;M;M;M", "homepage": ";;;;;;;;;http://wangjw6.github.io/;;;;https://sites.google.com/view/cuweiliu", "dblp": "304/8689;https://dblp.uni-trier.de/pid/269/4504.html;56/4499.html;279/6569.html;;274/6530.html;03/1094.html;241/9695.html;254/1228;98/7308-5;;03/1094-32.html;255/6012.html;49/3283-5", "google_scholar": "https://scholar.google.com.hk/citations?user=UvvufgQAAAAJ;;;;https://scholar.google.com.hk/citations?user=VN4m3l8AAAAJ;;;;JEVpgE8AAAAJ;Y1gU9wYAAAAJ;gANaxT0AAAAJ;;;AjxoEpIAAAAJ", "orcid": ";;;;;;;;;0000-0002-6893-0912;;;;0000-0002-3865-8145", "linkedin": ";;;;;;;;;;;;;", "or_profile": "~Yiming_Gao4;~Feiyu_Liu1;~Liang_Wang10;~Zhenjie_Lian1;~Weixuan_Wang1;~Siqin_Li1;~Xianliang_Wang1;~Xianhan_Zeng1;~Rundong_Wang1;~jiawei_wang2;~QIANG_FU8;~Yang_Wei2;~Lanxiao_Huang1;~Wei_Liu3", "aff": "Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Nanyang Technological University;McGill University;Tencent AI Lab;Tencent AI Lab;Tencent TiMi L1 Studio;Tencent", "aff_domain": "tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;ntu.edu.sg;mcgill.ca;tencent.com;tencent.com;tencent.com;tencent.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;PhD student;PhD student;Principal Researcher;Researcher;Researcher;Distinguished Scientist", "bibtex": "@misc{\ngao2022towards,\ntitle={Towards Effective and Interpretable Human-{AI} Collaboration in {MOBA} Games},\nauthor={Yiming Gao and Feiyu Liu and Liang Wang and Zhenjie Lian and Weixuan Wang and Siqin Li and Xianliang Wang and Xianhan Zeng and Rundong Wang and jiawei wang and QIANG FU and Yang Wei and Lanxiao Huang and Wei Liu},\nyear={2022},\nurl={https://openreview.net/forum?id=2nJdh_C-UWe}\n}", "github": "", "project": "", "reviewers": "xzzg;PjFP;WAvp;8yYn", "site": "https://openreview.net/forum?id=2nJdh_C-UWe", "pdf_size": 5425505, "rating": "3;3;5;7", "confidence": "3;4;2;3", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "contribution": "2;2;3;3", "wc_summary": "107;56;42;83", "wc_strengths_and_weaknesses": "693;1150;396;597", "wc_questions": "72;84;135;51", "wc_limitations": "63;58;29;20", "wc_review": "935;1348;602;751", "wc_reply_reviewers": "308;723;0;40", "wc_reply_authors": "1366;1477;733;1071", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 4.5, 1.6583123951777 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.0, 25.0099980007996 ], "wc_strengths_and_weaknesses_avg": [ 709.0, 276.2471719312254 ], "wc_questions_avg": [ 85.5, 30.923292192132454 ], "wc_limitations_avg": [ 42.5, 18.364367672206956 ], "wc_review_avg": [ 909.0, 279.55768635471287 ], "wc_reply_reviewers_avg": [ 267.75, 288.28490681962523 ], "wc_reply_authors_avg": [ 1161.75, 288.60126039225815 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13248169153028867656&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0;0;0;0;1;2;0;0;0;0", "aff_unique_norm": "Tencent;Nanyang Technological University;McGill University", "aff_unique_dep": "Tencent AI Lab;;", "aff_unique_url": "https://ai.tencent.com;https://www.ntu.edu.sg;https://www.mcgill.ca", "aff_unique_abbr": "Tencent AI Lab;NTU;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;2;0;0;0;0", "aff_country_unique": "China;Singapore;Canada" }, { "title": "Learning Substructure Invariance for Out-of-Distribution Molecular Representations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55440", "id": "2nWUNTnFijm", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/547108084f0c2af39b956f8eadb75d1b-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2nWUNTnFijm", "openreview": "https://openreview.net/forum?id=2nWUNTnFijm", "poster": "/media/PosterPDFs/NeurIPS%202022/7f6ffaa6bb0b408017b62254211691b5.png?t=1666803064.7131555", "slides": "https://nips.cc/virtual/2022/poster/55440", "video": "https://nips.cc/virtual/2022/poster/55440", "author_site": "Nianzu Yang, Kaipeng Zeng, Qitian Wu, Xiaosong Jia, Junchi Yan", "tldr": "We aim to solve the out-of-distribution problem on molecule representation learning tasks from a substructure invariance perspective.", "abstract": "Molecule representation learning (MRL) has been extensively studied and current methods have shown promising power for various tasks, e.g., molecular property prediction and target identification. However, a common hypothesis of existing methods is that either the model development or experimental evaluation is mostly based on i.i.d. data across training and testing. Such a hypothesis can be violated in real-world applications where testing molecules could come from new environments, bringing about serious performance degradation or unexpected prediction. We propose a new representation learning framework entitled MoleOOD to enhance the robustness of MRL models against such distribution shifts, motivated by an observation that the (bio)chemical properties of molecules are usually invariantly associated with certain privileged molecular substructures across different environments (e.g., scaffolds, sizes, etc.). Specifically, We introduce an environment inference model to identify the latent factors that impact data generation from different distributions in a fully data-driven manner. We also propose a new learning objective to guide the molecule encoder to leverage environment-invariant substructures that more stably relate with the labels across environments. Extensive experiments on ten real-world datasets demonstrate that our model has a stronger generalization ability than existing methods under various out-of-distribution (OOD) settings, despite the absence of manual specifications of environments. Particularly, our method achieves up to 5.9\\% and 3.9\\% improvement over the strongest baselines on OGB and DrugOOD benchmarks in terms of ROC-AUC, respectively. Our source code is publicly available at \\url{https://github.com/yangnianzu0515/MoleOOD}.", "keywords": "Invariant Learning;Out-of-Distribution;Molecule Representation Learning", "primary_area": "", "supplementary_material": "/attachment/09c579ebebd1ed22d778e75a126500dc0497a415.pdf", "author": "Nianzu Yang;Kaipeng Zeng;Qitian Wu;Xiaosong Jia;Junchi Yan", "authorids": "~Nianzu_Yang1;~Kaipeng_Zeng1;~Qitian_Wu1;~Xiaosong_Jia1;~Junchi_Yan2", "gender": "M;M;;M;", "homepage": "https://yangnianzu0515.github.io/;https://github.com/zengkaipeng;;https://jiaxiaosong1002.github.io/;", "dblp": "296/8412.html;345/6420;;274/6360;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;JeFQwxUAAAAJ;", "orcid": ";0000-0002-4798-7784;;;", "linkedin": ";;;;", "or_profile": "~Nianzu_Yang1;~Kaipeng_Zeng1;~Qitian_Wu1;~Xiaosong_Jia1;~Junchi_Yan2", "aff": "Shanghai Jiaotong University;Texas A&M University - College Station;;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;tamu.edu;;sjtu.edu.cn;", "position": "PhD student;Intern;;PhD student;", "bibtex": "@inproceedings{\nyang2022learning,\ntitle={Learning Substructure Invariance for Out-of-Distribution Molecular Representations},\nauthor={Nianzu Yang and Kaipeng Zeng and Qitian Wu and Xiaosong Jia and Junchi Yan},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2nWUNTnFijm}\n}", "github": "", "project": "", "reviewers": "MgvC;pgdS;WE4q", "pdf_size": 706469, "rating": "5;6;7", "confidence": "2;4;5", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;2;4", "contribution": "2;3;3", "wc_summary": "111;115;113", "wc_strengths_and_weaknesses": "83;45;130", "wc_questions": "409;92;54", "wc_limitations": "84;1;36", "wc_review": "687;253;333", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1987;706;499", "reply_reviewers": "0;0;0", "reply_authors": "6;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 113.0, 1.632993161855452 ], "wc_strengths_and_weaknesses_avg": [ 86.0, 34.76588366008646 ], "wc_questions_avg": [ 185.0, 159.14982458886553 ], "wc_limitations_avg": [ 40.333333333333336, 34.022868126534476 ], "wc_review_avg": [ 424.3333333333333, 188.58302032673978 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1064.0, 658.1078938897482 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 2.160246899469287 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 124, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10496131243545414447&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "sjtu.edu.cn;tamu.edu;;sjtu.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tamu.edu", "aff_unique_abbr": "SJTU;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "2nYz4WZAne4", "title": "Generative Evolutionary Strategy For Black-Box Optimizations", "track": "main", "status": "Reject", "tldr": "We designed a black-box optimizer for high dimensional search space. It is a cooperative strategy of evolution and generative neural network.", "abstract": "Many scientific and technological problems are related to optimization. Among them, black-box optimization in high-dimensional space is particularly challenging. Recent neural network-based black-box optimization studies have shown noteworthy achievements. However, their capability in high-dimensional search space is still limited. This study proposes a black-box optimization method based on evolution strategy and generative neural network model. We designed the algorithm so that the evolutionary strategy and the generative neural network model work cooperatively with each other. This hybrid model enables reliable training of surrogate networks; it optimizes multi-objective, high-dimensional, and stochastic black-box functions. In this experiment, our method outperforms baseline optimization methods, including , including evolution strategies, and a Bayesian optimization.", "keywords": "Optimization;Blackbox;Generative model;Evolution", "primary_area": "", "supplementary_material": "/attachment/f60394a91aa728865bd72e3eea9a17a8b6809fd7.pdf", "author": "Changhwi Park;Seong Ryeol Kim;Young-Gu Kim;Dae Sin Kim", "authorids": "~Changhwi_Park1;sr75.kim@samsung.com;yg09.kim@samsung.com;daesin.kim@samsung.com", "gender": "M;;;", "homepage": "https://www.facebook.com/changhwi.park.1;;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Changhwi_Park1;sr75.kim@samsung.com;yg09.kim@samsung.com;daesin.kim@samsung.com", "aff": "Samsung;;;", "aff_domain": "samsung.com;;;", "position": "Researcher;;;", "bibtex": "@misc{\npark2022generative,\ntitle={Generative Evolutionary Strategy For Black-Box Optimizations},\nauthor={Changhwi Park and Seong Ryeol Kim and Young-Gu Kim and Dae Sin Kim},\nyear={2022},\nurl={https://openreview.net/forum?id=2nYz4WZAne4}\n}", "github": "", "project": "", "reviewers": "wE4h;bPMJ;uC1V;uzBL", "site": "https://openreview.net/forum?id=2nYz4WZAne4", "pdf_size": 2185485, "rating": "2;3;3;5", "confidence": "4;5;3;2", "soundness": "1;1;2;2", "novelty": "1;2;2;3", "presentation": "1;2;2;1", "contribution": "1;2;2;3", "wc_summary": "77;53;75;65", "wc_strengths_and_weaknesses": "283;209;139;326", "wc_questions": "23;203;73;433", "wc_limitations": "15;1;16;23", "wc_review": "398;466;303;847", "wc_reply_reviewers": "0;224;0;765", "wc_reply_authors": "1490;1612;850;1717", "reply_reviewers": "0;2;0;3", "reply_authors": "2;4;2;5", "rating_avg": [ 3.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 1.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.5, 0.5 ], "contribution_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.5, 9.526279441628825 ], "wc_strengths_and_weaknesses_avg": [ 239.25, 71.4225979645098 ], "wc_questions_avg": [ 183.0, 158.58751527153706 ], "wc_limitations_avg": [ 13.75, 7.980444849756184 ], "wc_review_avg": [ 503.5, 206.59682959813298 ], "wc_reply_reviewers_avg": [ 247.25, 312.5982845442374 ], "wc_reply_authors_avg": [ 1417.25, 337.2101533168893 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7181848464596079, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EBQn9vM2YWAJ:scholar.google.com/&scioq=Generative+Evolutionary+Strategy+For+Black-Box+Optimizations&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Geodesic Self-Attention for 3D Point Clouds", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53691", "id": "2ndfW2bw4mi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/28e4ee96c94e31b2d040b4521d2b299e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2ndfW2bw4mi", "openreview": "https://openreview.net/forum?id=2ndfW2bw4mi", "poster": "/media/PosterPDFs/NeurIPS%202022/53691.png?t=1669554024.639124", "slides": "https://nips.cc/virtual/2022/poster/53691", "video": "https://nips.cc/virtual/2022/poster/53691", "author_site": "Zhengyu Li, XUAN TANG, Zihao Xu, Xihao Wang, Hui Yu, Mingsong Chen, xian wei", "tldr": "A novel geodesic self-attention module for Transformers on point cloud tasks.", "abstract": "Due to the outstanding competence in capturing long-range relationships, self-attention mechanism has achieved remarkable progress in point cloud tasks. Nevertheless, point cloud object often has complex non-Euclidean spatial structures, with the behavior changing dynamically and unpredictably. Most current self-attention modules highly rely on the dot product multiplication in Euclidean space, which cannot capture internal non-Euclidean structures of point cloud objects, especially the long-range relationships along the curve of the implicit manifold surface represented by point cloud objects. To address this problem, in this paper, we introduce a novel metric on the Riemannian manifold to capture the long-range geometrical dependencies of point cloud objects to replace traditional self-attention modules, namely, the Geodesic Self-Attention (GSA) module. Our approach achieves state-of-the-art performance compared to point cloud Transformers on object classification, few-shot classification and part segmentation benchmarks.", "keywords": "Point Cloud;Geodesic;Attention;Transformer;Computer Vision.", "primary_area": "", "supplementary_material": "/attachment/99f8c22c40741ae22045739a073a28eb54928e98.zip", "author": "Zhengyu Li;Xuan Tang;Zihao Xu;Xihao Wang;Hui Yu;Mingsong Chen;Xian Wei", "authorids": "~Zhengyu_Li2;~Xuan_Tang3;~Zihao_Xu3;~Xihao_Wang1;~Hui_Yu4;~Mingsong_Chen1;~Xian_Wei1", "gender": "M;F;M;M;M;M;M", "homepage": ";https://faculty.ecnu.edu.cn/_s15/tx2_21642/main.psp;;;;https://faculty.ecnu.edu.cn/_s43/cms/main.psp;https://www.researchgate.net/", "dblp": "134/7254;;216/8409-2;;;95/573.html;139/0725", "google_scholar": ";mFj-I10AAAAJ;;;;93A6b7YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-2836-276X;;0000-0002-0313-1861;0000-0003-0483-7783;0000-0001-8670-9990;0000-0002-3922-0989;", "linkedin": ";;;;;;", "or_profile": "~Zhengyu_Li2;~Xuan_Tang3;~Zihao_Xu3;~Xihao_Wang1;~Hui_Yu4;~Mingsong_Chen1;~Xian_Wei1", "aff": "East China Normal University;East China Normal University;East China Normal University;Technische Universit\u00e4t M\u00fcnchen;Chinese Academy of Sciences;East China Normal University;East China Normal University", "aff_domain": "ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;tum.de;cas.cn;ecnu.edu.cn;ecnu.edu.cn", "position": "MS student;Associate Professor;MS student;PhD student;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2022geodesic,\ntitle={Geodesic Self-Attention for 3D Point Clouds},\nauthor={Zhengyu Li and Xuan Tang and Zihao Xu and Xihao Wang and Hui Yu and Mingsong Chen and Xian Wei},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2ndfW2bw4mi}\n}", "github": "", "project": "", "reviewers": "MUiF;2FWg;BX9m", "pdf_size": 904619, "rating": "4;6;6", "confidence": "4;3;4", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "2;4;3", "contribution": "2;3;3", "wc_summary": "56;80;80", "wc_strengths_and_weaknesses": "85;295;137", "wc_questions": "86;7;28", "wc_limitations": "12;23;6", "wc_review": "239;405;251", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "770;1364;663", "reply_reviewers": "0;0;0", "reply_authors": "3;3;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 11.313708498984761 ], "wc_strengths_and_weaknesses_avg": [ 172.33333333333334, 89.29850067174824 ], "wc_questions_avg": [ 40.333333333333336, 33.40991203553554 ], "wc_limitations_avg": [ 13.666666666666666, 7.039570693980959 ], "wc_review_avg": [ 298.3333333333333, 75.58365490560037 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 932.3333333333334, 308.3443241284363 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16328544506426723323&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;tum.de;cas.cn;ecnu.edu.cn;ecnu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "East China Normal University;Technische Universit\u00e4t M\u00fcnchen;Chinese Academy of Sciences", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.tum.de;https://www.cas.cn", "aff_unique_abbr": "ECNU;TUM;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;Germany" }, { "title": "pFL-Bench: A Comprehensive Benchmark for Personalized Federated Learning", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55693", "id": "2ptbv_JjYKA", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/3cc03e19fed71a2b9347d83921ca2e7d-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=2ptbv_JjYKA", "openreview": "https://openreview.net/forum?id=2ptbv_JjYKA", "poster": "/media/PosterPDFs/NeurIPS%202022/55693.png?t=1669734032.556508", "slides": "https://nips.cc/virtual/2022/poster/55693", "video": "https://nips.cc/virtual/2022/poster/55693", "author_site": "Daoyuan Chen, Dawei Gao, Weirui Kuang, Yaliang Li, Bolin Ding", "tldr": "We propose the first comprehensive benchmark for personalized Federated Learning, containing more than 10 datasets, 20 pFL methods, and systematic evaluation with highlighted benefits and potential of pFL.", "abstract": "Personalized Federated Learning (pFL), which utilizes and deploys distinct local models, has gained increasing attention in recent years due to its success in handling the statistical heterogeneity of FL clients. However, standardized evaluation and systematical analysis of diverse pFL methods remain a challenge. Firstly, the highly varied datasets, FL simulation settings and pFL implementations prevent easy and fair comparisons of pFL methods. Secondly, the current pFL literature diverges in the adopted evaluation and ablation protocols. Finally, the effectiveness and robustness of pFL methods are under-explored in various practical scenarios, such as the generalization to new clients and the participation of resource-limited clients. To tackle these challenges, we propose the first comprehensive pFL benchmark, pFL-Bench, for facilitating rapid, reproducible, standardized and thorough pFL evaluation. The proposed benchmark contains more than 10 dataset variants in various application domains with a unified data partition and realistic heterogeneous settings; a modularized and easy-to-extend pFL codebase with more than 20 competitive pFL method implementations; and systematic evaluations under containerized environments in terms of generalization, fairness, system overhead, and convergence. We highlight the benefits and potential of state-of-the-art pFL methods and hope pFL-Bench enables further pFL research and broad applications that would otherwise be difficult owing to the absence of a dedicated benchmark. The code is released at https://github.com/alibaba/FederatedScope/tree/master/benchmark/pFL-Bench.", "keywords": "Federated Learning;Personalized Federated Learning", "primary_area": "", "supplementary_material": "/attachment/bf3abadb81c2619f7ca572fbcd727fd3f88b3cbd.pdf", "author": "Daoyuan Chen;Dawei Gao;Weirui Kuang;Yaliang Li;Bolin Ding", "authorids": "~Daoyuan_Chen1;~Dawei_Gao1;~Weirui_Kuang2;~Yaliang_Li1;~Bolin_Ding3", "gender": "M;M;M;M;M", "homepage": "https://yxdyc.github.io/;https://davdgao.github.io/;https://sites.google.com/site/yaliangli/;https://bolinding.github.io/;https://weiruikuang.com/", "dblp": "217/4891;;https://dblp.org/pers/hd/l/Li:Yaliang;46/3522.html;318/1583", "google_scholar": "https://scholar.google.com.hk/citations?user=1GdfinUAAAAJ;NNEeYaUAAAAJ;CCPBcdYAAAAJ;AjYkTi8AAAAJ;51p3plEAAAAJ", "orcid": "0000-0002-8015-2121;0009-0007-3882-5189;0000-0002-4204-6096;;", "linkedin": ";;;bolin-ding-50a0119/;", "or_profile": "~Daoyuan_Chen1;~Dawei_Gao1;~Yaliang_Li1;~Bolin_Ding3;~weirui_kuang1", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Staff;Researcher;Staff Engineer;Senior Director;Researcher", "bibtex": "@inproceedings{\nchen2022pflbench,\ntitle={p{FL}-Bench: A Comprehensive Benchmark for Personalized Federated Learning},\nauthor={Daoyuan Chen and Dawei Gao and Weirui Kuang and Yaliang Li and Bolin Ding},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=2ptbv_JjYKA}\n}", "github": "", "project": "", "reviewers": "mZSd;kEb3;9wJe;Ytx8;CoEZ;Ekaz;3xnj", "pdf_size": 1001328, "rating": "5;6;6;6;7;7;7", "confidence": "4;4;4;3;4;3;3", "wc_summary_and_contributions": "106;59;107;37;89;121;40", "wc_strengths": "29;62;26;114;175;43;13", "wc_weaknesses": "290;122;240;171;284;72;20", "wc_correctness": "1;23;30;211;18;15;14", "wc_clarity": "1;22;5;234;63;26;4", "wc_relation_to_prior_work": "1;9;7;51;8;11;12", "wc_documentation": "1;11;7;147;4;13;15", "wc_additional_feedback": "1;48;6;89;105;1;6", "wc_review": "430;356;428;1054;746;302;124", "wc_reply_reviewers": "330;0;26;366;85;0;0", "wc_reply_authors": "1177;1169;1941;3720;654;808;0", "reply_reviewers": "2;0;1;1;1;0;0", "reply_authors": "2;2;3;6;2;1;0", "rating_avg": [ 6.285714285714286, 0.6998542122237652 ], "confidence_avg": [ 3.5714285714285716, 0.49487165930539345 ], "wc_summary_and_contributions_avg": [ 79.85714285714286, 31.75094406438322 ], "wc_strengths_avg": [ 66.0, 54.126835171791505 ], "wc_weaknesses_avg": [ 171.28571428571428, 97.60959303808835 ], "wc_correctness_avg": [ 44.57142857142857, 68.44154365558381 ], "wc_clarity_avg": [ 50.714285714285715, 77.38902521992247 ], "wc_relation_to_prior_work_avg": [ 14.142857142857142, 15.403418650496034 ], "wc_documentation_avg": [ 28.285714285714285, 48.68180735497542 ], "wc_additional_feedback_avg": [ 36.57142857142857, 41.33783166217856 ], "wc_review_avg": [ 491.42857142857144, 287.4314115167836 ], "wc_reply_reviewers_avg": [ 115.28571428571429, 150.1054051427237 ], "wc_reply_authors_avg": [ 1352.7142857142858, 1110.3088005834509 ], "reply_reviewers_avg": [ 0.7142857142857143, 0.6998542122237652 ], "reply_authors_avg": [ 2.2857142857142856, 1.749635530559413 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4714045207910318, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18376990207026660571&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DGraph: A Large-Scale Financial Dataset for Graph Anomaly Detection", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55658", "id": "2rQPxsmjKF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/8f1918f71972789db39ec0d85bb31110-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=2rQPxsmjKF", "openreview": "https://openreview.net/forum?id=2rQPxsmjKF", "poster": "/media/PosterPDFs/NeurIPS%202022/8d3bba7425e7c98c50f52ca1b52d3735.png?t=1666524567.5832865", "slides": "https://nips.cc/virtual/2022/poster/55658", "video": "https://nips.cc/virtual/2022/poster/55658", "author_site": "Xuanwen Huang, Yang Yang, Yang Wang, Chunping Wang, Zhisheng Zhang, Jiarong Xu, Lei Chen, Michalis Vazirgiannis", "tldr": "This paper present DGraph, a real-world dynamic graph in the finance domain.", "abstract": "Graph Anomaly Detection (GAD) has recently become a hot research spot due to its practicability and theoretical value. Since GAD emphasizes the application and the rarity of anomalous samples, enriching the varieties of its datasets is fundamental. Thus, this paper present DGraph, a real-world dynamic graph in the finance domain. DGraph overcomes many limitations of current GAD datasets. It contains about 3M nodes, 4M dynamic edges, and 1M ground-truth nodes. We provide a comprehensive observation of DGraph, revealing that anomalous nodes and normal nodes generally have different structures, neighbor distribution, and temporal dynamics. Moreover, it suggests that 2M background nodes are also essential for detecting fraudsters. Furthermore, we conduct extensive experiments on DGraph. Observation and experiments demonstrate that DGraph is propulsive to advance GAD research and enable in-depth exploration of anomalous nodes. ", "keywords": "Graph Anomaly Detection;Dynamic Graph;Financial Fraudsters Detection.", "primary_area": "", "supplementary_material": "/attachment/c73f56eb5c6563efec24ef1bdbfd8fea47f8331d.pdf", "author": "Xuanwen Huang;Yang Yang;Yang Wang;Chunping Wang;Zhisheng Zhang;Jiarong Xu;Lei Chen;Michalis Vazirgiannis", "authorids": "~Xuanwen_Huang1;~Yang_Yang35;wangyang09@xinye.com;~Chunping_Wang1;zhangzhsh6@zju.edu.cn;~Jiarong_Xu2;chenlei04@xinye.com;~Michalis_Vazirgiannis1", "gender": "M;M;;F;;F;;M", "homepage": ";http://yangy.org;;;;https://galina0217.github.io/;;", "dblp": "256/9418;;;54/2715-1;;;;v/MVazirgiannis", "google_scholar": "JFLCWNQAAAAJ;;;Rmy5RogAAAAJ;;;;https://scholar.google.gr/citations?user=aWGJYcMAAAAJ", "orcid": ";0000-0002-5058-4417;;0000-0003-1854-8667;;0000-0003-2973-1889;;", "linkedin": ";;;https://linkedin.com/in/chunping-wang-7b94a15/;;;;", "or_profile": "~Xuanwen_Huang1;~Yang_Yang35;wangyang09@xinye.com;~Chunping_Wang1;zhangzhsh6@zju.edu.cn;~Jiarong_Xu2;chenlei04@xinye.com;~Michalis_Vazirgiannis1", "aff": "Zhejiang University;Zhejiang University;;Finvolution Group;;Fudan University;;Ecole Polytechnique, France", "aff_domain": "zju.edu.cn;zju.edu.cn;;xinye.com;;fudan.edu.cn;;polytechnique.fr", "position": "PhD student;Associate Professor;;Principal Scientist;;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nhuang2022dgraph,\ntitle={{DG}raph: A Large-Scale Financial Dataset for Graph Anomaly Detection},\nauthor={Xuanwen Huang and Yang Yang and Yang Wang and Chunping Wang and Zhisheng Zhang and Jiarong Xu and Lei Chen and Michalis Vazirgiannis},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=2rQPxsmjKF}\n}", "github": "", "project": "", "reviewers": "YKcV;MFHZ;q3Ce;FwMm;SeLR;MofF", "pdf_size": 918778, "rating": "5;6;6;7;7;8", "confidence": "4;4;3;4;3;3", "wc_summary_and_contributions": "56;163;49;50;63;86", "wc_strengths": "59;78;68;180;85;126", "wc_weaknesses": "91;66;217;48;83;37", "wc_correctness": "76;17;8;8;38;8", "wc_clarity": "49;33;18;7;24;16", "wc_relation_to_prior_work": "38;38;15;9;24;46", "wc_documentation": "47;74;7;17;34;21", "wc_additional_feedback": "49;537;7;6;24;6", "wc_review": "465;1006;389;325;375;346", "wc_reply_reviewers": "0;42;87;0;0;0", "wc_reply_authors": "766;778;291;204;553;334", "reply_reviewers": "0;1;1;0;0;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.5, 0.9574271077563381 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 77.83333333333333, 40.04754119238228 ], "wc_strengths_avg": [ 99.33333333333333, 41.80776110830247 ], "wc_weaknesses_avg": [ 90.33333333333333, 59.6200935554076 ], "wc_correctness_avg": [ 25.833333333333332, 24.822145130687012 ], "wc_clarity_avg": [ 24.5, 13.5 ], "wc_relation_to_prior_work_avg": [ 28.333333333333332, 13.349989596333856 ], "wc_documentation_avg": [ 33.333333333333336, 22.186081723058315 ], "wc_additional_feedback_avg": [ 104.83333333333333, 193.87574772403988 ], "wc_review_avg": [ 484.3333333333333, 237.37288996195184 ], "wc_reply_reviewers_avg": [ 21.5, 33.06433123473088 ], "wc_reply_authors_avg": [ 487.6666666666667, 226.85874214781515 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17234990120433136230&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "zju.edu.cn;zju.edu.cn;;xinye.com;;fudan.edu.cn;;polytechnique.fr", "author_num": 8, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Zhejiang University;FinVolution Group;Fudan University;Ecole Polytechnique", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.finvolutiongroup.com;https://www.fudan.edu.cn;https://www.ec-polytechnique.fr", "aff_unique_abbr": "ZJU;;Fudan;X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;France" }, { "title": "Trading Off Resource Budgets For Improved Regret Bounds", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54606", "id": "2tfv0K8Vbtf", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/31a57804448363bcab777f818f75f5b4-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2tfv0K8Vbtf", "openreview": "https://openreview.net/forum?id=2tfv0K8Vbtf", "poster": "/media/PosterPDFs/NeurIPS%202022/54606.png?t=1669753987.9258895", "slides": "https://nips.cc/virtual/2022/poster/54606", "video": "https://nips.cc/virtual/2022/poster/54606", "author_site": "Thomas Orton, Damon Falck", "tldr": "", "abstract": "In this work we consider a variant of adversarial online learning where in each round one picks $B$ out of $N$ arms and incurs cost equal to the $\\textit{minimum}$ of the costs of each arm chosen. We propose an algorithm called Follow the Perturbed Multiple Leaders (FPML) for this problem, which we show (by adapting the techniques of Kalai and Vempala [2005]) achieves expected regret $\\mathcal{O}(T^{\\frac{1}{B+1}}\\ln(N)^{\\frac{B}{B+1}})$ over time horizon $T$ relative to the $\\textit{single}$ best arm in hindsight. This introduces a trade-off between the budget $B$ and the single-best-arm regret, and we proceed to investigate several applications of this trade-off. First, we observe that algorithms which use standard regret minimizers as subroutines can sometimes be adapted by replacing these subroutines with FPML, and we use this to generalize existing algorithms for Online Submodular Function Maximization [Streeter and Golovin, 2008] in both the full feedback and semi-bandit feedback settings. Next, we empirically evaluate our new algorithms on an online black-box hyperparameter optimization problem. Finally, we show how FPML can lead to new algorithms for Linear Programming which require stronger oracles at the benefit of fewer oracle calls.", "keywords": "Online Learning;Bandit Algorithms", "primary_area": "", "supplementary_material": "/attachment/52919955729652b11c9e62abb1163fdb872ff8ed.zip", "author": "Thomas Orton;Damon Falck", "authorids": "~Thomas_Orton1;~Damon_Falck1", "gender": ";M", "homepage": "https://thomasorton.info;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";damon-falck", "or_profile": "~Thomas_Orton1;~Damon_Falck1", "aff": "University of Oxford;University of Oxford", "aff_domain": "oxford.ac.uk;ox.ac.uk", "position": "PhD student;MS student", "bibtex": "@inproceedings{\norton2022trading,\ntitle={Trading Off Resource Budgets For Improved Regret Bounds},\nauthor={Thomas Orton and Damon Falck},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2tfv0K8Vbtf}\n}", "github": "", "project": "", "reviewers": "2itn;SYiQ;Gn74;AB1a", "pdf_size": 611594, "rating": "4;5;6;7", "confidence": "3;4;3;3", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;4;2;3", "contribution": "2;2;3;3", "wc_summary": "58;121;93;249", "wc_strengths_and_weaknesses": "100;217;242;473", "wc_questions": "2;103;92;22", "wc_limitations": "2;27;1;15", "wc_review": "162;468;428;759", "wc_reply_reviewers": "0;0;0;364", "wc_reply_authors": "425;720;666;918", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 130.25, 72.10192438485952 ], "wc_strengths_and_weaknesses_avg": [ 258.0, 135.209097327066 ], "wc_questions_avg": [ 54.75, 43.50502844499702 ], "wc_limitations_avg": [ 11.25, 10.638961415476606 ], "wc_review_avg": [ 454.25, 211.63692376331687 ], "wc_reply_reviewers_avg": [ 91.0, 157.61662348876783 ], "wc_reply_authors_avg": [ 682.25, 175.6735253246772 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XOMIvHvv_FMJ:scholar.google.com/&scioq=Trading+Off+Resource+Budgets+For+Improved+Regret+Bounds&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "oxford.ac.uk;ox.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "DPM-Solver: A Fast ODE Solver for Diffusion Probabilistic Model Sampling in Around 10 Steps", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54266", "id": "2uAaGwlP_V", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/260a14acce2a89dad36adc8eefe7c59e-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2uAaGwlP_V", "openreview": "https://openreview.net/forum?id=2uAaGwlP_V", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54266", "video": "https://nips.cc/virtual/2022/poster/54266", "author_site": "Cheng Lu, Yuhao Zhou, Fan Bao, Jianfei Chen, Chongxuan LI, Jun Zhu", "tldr": "We propose a fast ODE solver for sampling from diffusion probabilistic models in around 10 steps.", "abstract": "Diffusion probabilistic models (DPMs) are emerging powerful generative models. Despite their high-quality generation performance, DPMs still suffer from their slow sampling as they generally need hundreds or thousands of sequential function evaluations (steps) of large neural networks to draw a sample. Sampling from DPMs can be viewed alternatively as solving the corresponding diffusion ordinary differential equations (ODEs). In this work, we propose an exact formulation of the solution of diffusion ODEs. The formulation analytically computes the linear part of the solution, rather than leaving all terms to black-box ODE solvers as adopted in previous works. By applying change-of-variable, the solution can be equivalently simplified to an exponentially weighted integral of the neural network. Based on our formulation, we propose DPM-Solver, a fast dedicated high-order solver for diffusion ODEs with the convergence order guarantee. DPM-Solver is suitable for both discrete-time and continuous-time DPMs without any further training. Experimental results show that DPM-Solver can generate high-quality samples in only 10 to 20 function evaluations on various datasets. We achieve 4.70 FID in 10 function evaluations and 2.87 FID in 20 function evaluations on the CIFAR10 dataset, and a 4~16x speedup compared with previous state-of-the-art training-free samplers on various datasets.", "keywords": "diffusion probabilistic models;score-based generative models;fast sampling;ODE solver", "primary_area": "", "supplementary_material": "/attachment/02f54d3b3b1da6064ca086ad84c464f33781f953.pdf", "author": "Cheng Lu;Yuhao Zhou;Fan Bao;Jianfei Chen;Chongxuan Li;Jun Zhu", "authorids": "~Cheng_Lu5;~Yuhao_Zhou2;~Fan_Bao1;~Jianfei_Chen1;~Chongxuan_Li1;~Jun_Zhu2", "gender": "M;M;M;M;M;M", "homepage": "https://luchengthu.github.io/;https://yuhaoz.com;https://baofff.github.io/;http://ml.cs.tsinghua.edu.cn/~jianfei;http://ml.cs.tsinghua.edu.cn/~chongxuan;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "91/1482-11;;71/3877;48/6809-1;161/9965;50/2644-1", "google_scholar": "vPE9VRoAAAAJ;GKLRbxoAAAAJ;;di5RZ1MAAAAJ;UKMcQn4AAAAJ;axsP38wAAAAJ", "orcid": ";;;;0000-0002-0912-9076;", "linkedin": ";;;;;", "or_profile": "~Cheng_Lu5;~Yuhao_Zhou2;~Fan_Bao1;~Jianfei_Chen1;~Chongxuan_Li1;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Renmin University of China;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ruc.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nlu2022dpmsolver,\ntitle={{DPM}-Solver: A Fast {ODE} Solver for Diffusion Probabilistic Model Sampling in Around 10 Steps},\nauthor={Cheng Lu and Yuhao Zhou and Fan Bao and Jianfei Chen and Chongxuan Li and Jun Zhu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2uAaGwlP_V}\n}", "github": "", "project": "", "reviewers": "EaE9;Wm2X;DD9K", "pdf_size": 15945070, "rating": "7;8;8", "confidence": "4;4;2", "soundness": "3;4;4", "novelty": "3;4;4", "presentation": "3;4;4", "contribution": "3;4;4", "wc_summary": "202;41;87", "wc_strengths_and_weaknesses": "615;148;120", "wc_questions": "178;35;68", "wc_limitations": "75;9;2", "wc_review": "1070;233;277", "wc_reply_reviewers": "62;0;0", "wc_reply_authors": "1353;838;568", "reply_reviewers": "1;0;0", "reply_authors": "3;2;2", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 110.0, 67.71016664184683 ], "wc_strengths_and_weaknesses_avg": [ 294.3333333333333, 227.033526647987 ], "wc_questions_avg": [ 93.66666666666667, 61.13555066862124 ], "wc_limitations_avg": [ 28.666666666666668, 32.8870119584549 ], "wc_review_avg": [ 526.6666666666666, 384.6143811952723 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 29.227080289043965 ], "wc_reply_authors_avg": [ 919.6666666666666, 325.6361295816066 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1398, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2427327523938680723&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;ruc.edu.cn;mail.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hamiltonian Latent Operators for content and motion disentanglement in image sequences", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54960", "id": "2vYmjZVT29T", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2f8ee6a3d766b426d2618e555b5aeb39-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2vYmjZVT29T", "openreview": "https://openreview.net/forum?id=2vYmjZVT29T", "poster": "/media/PosterPDFs/NeurIPS%202022/01a0683665f38d8e5e567b3b15ca98bf.png?t=1666292301.1949027", "slides": "https://nips.cc/virtual/2022/poster/54960", "video": "https://nips.cc/virtual/2022/poster/54960", "author_site": "Asif Khan, Amos Storkey", "tldr": "A deep generative model utilising symplectic geometry to disentangle motion from content in Image sequences", "abstract": "We introduce \\textit{HALO} -- a deep generative model utilising HAmiltonian Latent Operators to reliably disentangle content and motion information in image sequences. The \\textit{content} represents summary statistics of a sequence, and \\textit{motion} is a dynamic process that determines how information is expressed in any part of the sequence. By modelling the dynamics as a Hamiltonian motion, important desiderata are ensured: (1) the motion is reversible, (2) the symplectic, volume-preserving structure in phase space means paths are continuous and are not divergent in the latent space. Consequently, the nearness of sequence frames is realised by the nearness of their coordinates in the phase space, which proves valuable for disentanglement and long-term sequence generation. The sequence space is generally comprised of different types of dynamical motions. To ensure long-term separability and allow controlled generation, we associate every motion with a unique Hamiltonian that acts in its respective subspace. We demonstrate the utility of \\textit{HALO} by swapping the motion of a pair of sequences, controlled generation, and image rotations.", "keywords": "Deep generative models;Variational Autoencoder;Symplectic Geometry;Hamiltonian Dynamics;Latent Space Disentanglement", "primary_area": "", "supplementary_material": "/attachment/cd5a5921d864cc9197382072e9036407c7ddc93d.pdf", "author": "Asif Khan;Amos Storkey", "authorids": "~Asif_Khan3;~Amos_Storkey1", "gender": "M;Not Specified", "homepage": "https://mdasifkhan.github.io/;http://homepages.inf.ed.ac.uk/amos/", "dblp": "181/1278-1;", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;", "orcid": ";", "linkedin": ";", "or_profile": "~Asif_Khan3;~Amos_Storkey1", "aff": "University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkhan2022hamiltonian,\ntitle={Hamiltonian Latent Operators for content and motion disentanglement in image sequences},\nauthor={Asif Khan and Amos Storkey},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2vYmjZVT29T}\n}", "github": "", "project": "", "reviewers": "pzKP;pj74;FnRf", "pdf_size": 4746749, "rating": "5;5;6", "confidence": "3;4;3", "soundness": "2;2;3", "novelty": "3;3;3", "presentation": "3;3;3", "contribution": "3;3;3", "wc_summary": "76;67;122", "wc_strengths_and_weaknesses": "621;308;242", "wc_questions": "203;4;48", "wc_limitations": "23;1;5", "wc_review": "923;380;417", "wc_reply_reviewers": "161;0;0", "wc_reply_authors": "1580;715;687", "reply_reviewers": "1;0;0", "reply_authors": "4;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.33333333333333, 24.087802353519557 ], "wc_strengths_and_weaknesses_avg": [ 390.3333333333333, 165.3165314043201 ], "wc_questions_avg": [ 85.0, 85.35025873813545 ], "wc_limitations_avg": [ 9.666666666666666, 9.568466729604882 ], "wc_review_avg": [ 573.3333333333334, 247.71264714494407 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 75.8961278473561 ], "wc_reply_authors_avg": [ 994.0, 414.5222149254086 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3449357233115494687&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ed.ac.uk;ed.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Near-Optimal Collaborative Learning in Bandits", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54179", "id": "2xfJ26BuFP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5b9bef4eae0f574cedbf9f4bf29d8ae7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2xfJ26BuFP", "openreview": "https://openreview.net/forum?id=2xfJ26BuFP", "poster": "/media/PosterPDFs/NeurIPS%202022/54179.png?t=1668026368.7573698", "slides": "https://nips.cc/virtual/2022/poster/54179", "video": "https://nips.cc/virtual/2022/poster/54179", "author_site": "Cl\u00e9mence R\u00e9da, Sattar Vakili, Emilie Kaufmann", "tldr": "A near-optimal algorithm is proposed for pure exploration in a new framework for collaborative bandit learning that encompasses recent prior works.", "abstract": "This paper introduces a general multi-agent bandit model in which each agent is facing a finite set of arms and may communicate with other agents through a central controller in order to identify -in pure exploration- or play -in regret minimization- its optimal arm. The twist is that the optimal arm for each agent is the arm with largest expected mixed reward, where the mixed reward of an arm is a weighted sum of the rewards of this arm for all agents. This makes communication between agents often necessary. This general setting allows to recover and extend several recent models for collaborative bandit learning, including the recently proposed federated learning with personalization [Shi et al., 2021]. In this paper, we provide new lower bounds on the sample complexity of pure exploration and on the regret. We then propose a near-optimal algorithm for pure exploration. This algorithm is based on phased elimination with two novel ingredients: a data-dependent sampling scheme within each phase, aimed at matching a relaxation of the lower bound.", "keywords": "collaborative learning;multi-armed bandit;centralized learning;communication;elimination based-algorithm;data-driven sampling", "primary_area": "", "supplementary_material": "/attachment/0688e456e2de86f874685767c9bcef1571803ed3.pdf", "author": "Cl\u00e9mence R\u00e9da;Sattar Vakili;Emilie Kaufmann", "authorids": "~Cl\u00e9mence_R\u00e9da1;~Sattar_Vakili1;~Emilie_Kaufmann1", "gender": ";;F", "homepage": "https://clreda.github.io;https://sattar-vakili.github.io/;https://emiliekaufmann.github.io/", "dblp": "288/0376;140/5473;67/11350", "google_scholar": "q3AUHgEAAAAJ;N9xs8w0AAAAJ;9GE1vx4AAAAJ", "orcid": "0000-0003-3238-0258;;", "linkedin": "https://linkedin.com/in/clemence-reda;;", "or_profile": "~Cl\u00e9mence_R\u00e9da1;~Sattar_Vakili1;~Emilie_Kaufmann1", "aff": "Inserm;MediaTek Research;CNRS", "aff_domain": "inserm.fr;mtkresearch.com;cnrs.fr", "position": "PhD student;Principal AI Research Manager;Researcher", "bibtex": "@inproceedings{\nr{\\'e}da2022nearoptimal,\ntitle={Near-Optimal Collaborative Learning in Bandits},\nauthor={Cl{\\'e}mence R{\\'e}da and Sattar Vakili and Emilie Kaufmann},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2xfJ26BuFP}\n}", "github": "", "project": "", "reviewers": "Q8Vf;cDN2;MREF", "pdf_size": 541213, "rating": "6;7;7", "confidence": "3;4;4", "soundness": "4;3;4", "novelty": "2;3;4", "presentation": "3;3;3", "contribution": "2;3;4", "wc_summary": "70;196;54", "wc_strengths_and_weaknesses": "366;199;131", "wc_questions": "66;65;3", "wc_limitations": "49;25;22", "wc_review": "551;485;210", "wc_reply_reviewers": "0;13;0", "wc_reply_authors": "1275;353;6", "reply_reviewers": "0;1;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 106.66666666666667, 63.50503042191925 ], "wc_strengths_and_weaknesses_avg": [ 232.0, 98.73533646403736 ], "wc_questions_avg": [ 44.666666666666664, 29.465610840812754 ], "wc_limitations_avg": [ 32.0, 12.083045973594572 ], "wc_review_avg": [ 415.3333333333333, 147.67155748108328 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 6.128258770283412 ], "wc_reply_authors_avg": [ 544.6666666666666, 535.5012190047335 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11872427930011371643&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "inserm.fr;mtkresearch.com;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Institut National de la Sant\u00e9 et de la Recherche M\u00e9dicale;MediaTek Inc.;Centre National de la Recherche Scientifique", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.inserm.fr;https://www.mediatek.com/;https://www.cnrs.fr", "aff_unique_abbr": "Inserm;MediaTek;CNRS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;China" }, { "title": "Test Time Adaptation via Conjugate Pseudo-labels", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54429", "id": "2yvUYc-YNUH", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/28e9eff897f98372409b40ae1ed3ea4c-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2yvUYc-YNUH", "openreview": "https://openreview.net/forum?id=2yvUYc-YNUH", "poster": "/media/PosterPDFs/NeurIPS%202022/54429.png?t=1669161455.616476", "slides": "https://nips.cc/virtual/2022/poster/54429", "video": "https://nips.cc/virtual/2022/poster/54429", "author_site": "Sachin Goyal, Mingjie Sun, Aditi Raghunathan, J. Zico Kolter", "tldr": "We provide a generic framework for designing test-time adaptation loss for neural-networks trained using various loss functions like cross-entropy, polyloss and squared loss.", "abstract": "Test-time adaptation (TTA) refers to adapting neural networks to distribution shifts, specifically with just access to unlabeled test samples from the new domain at test-time. Prior TTA methods optimize over unsupervised objectives such as the entropy of model predictions in TENT (Wang et al., 2021), but it is unclear what exactly makes a good TTA loss. In this paper, we start by presenting a surprising phenomenon: if we attempt to $\\textit{meta-learn}$ the ``best'' possible TTA loss over a wide class of functions, then we recover a function that is $\\textit{remarkably}$ similar to (a temperature-scaled version of) the softmax-entropy employed by TENT. This only holds, however, if the classifier we are adapting is trained via cross-entropy loss; if the classifier is trained via squared loss, a different ``best'' TTA loss emerges.\nTo explain this phenomenon, we analyze test-time adaptation through the lens of the training losses's $\\textit{convex conjugate}$. We show that under natural conditions, this (unsupervised) conjugate function can be viewed as a good local approximation to the original supervised loss and indeed, it recovers the ``best'' losses found by meta-learning. This leads to a generic recipe than be used to find a good TTA loss for $\\textit{any}$ given supervised training loss function of a general class. Empirically, our approach dominates other TTA alternatives over a wide range of domain adaptation benchmarks. Our approach is particularly of interest when applied to classifiers trained with $\\textit{novel}$ loss functions, e.g., the recently-proposed PolyLoss (Leng et al., 2022) function, where it differs substantially from (and outperforms) an entropy-based loss. Further, we show that our conjugate based approach can also be interpreted as a kind of self-training using a very specific soft label, which we refer to as the $\\textit{conjugate pseudo-label}$. Overall, therefore, our method provides a broad framework for better understanding and improving test-time adaptation. Code is available at https://github.com/locuslab/tta_conjugate.", "keywords": "Test Time Adaptation;Domain Adaptation", "primary_area": "", "supplementary_material": "/attachment/0e2bb01a1e8a6d3c5241e29da225b85ed479bb4d.pdf", "author": "Sachin Goyal;Mingjie Sun;Aditi Raghunathan;J Zico Kolter", "authorids": "~Sachin_Goyal1;~Mingjie_Sun1;~Aditi_Raghunathan1;~J_Zico_Kolter1", "gender": "M;M;F;M", "homepage": "https://saching007.github.io/;https://eric-mingjie.github.io/;https://www.cs.cmu.edu/~aditirag/;http://www.zicokolter.com", "dblp": "82/2605;54/3913;166/1409;67/2526", "google_scholar": "-KK-60AAAAAJ;wCZbouUAAAAJ;Ch9iRwQAAAAJ;UXh1I6UAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sachin_Goyal1;~Mingjie_Sun1;~Aditi_Raghunathan1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Computer Science Department, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngoyal2022test,\ntitle={Test Time Adaptation via Conjugate Pseudo-labels},\nauthor={Sachin Goyal and Mingjie Sun and Aditi Raghunathan and J Zico Kolter},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2yvUYc-YNUH}\n}", "github": "", "project": "", "reviewers": "U1Uv;M6iL;JZZE;so7v", "pdf_size": 986564, "rating": "7;7;7;8", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "3;4;3;4", "presentation": "4;2;3;4", "contribution": "3;4;3;4", "wc_summary": "60;246;115;91", "wc_strengths_and_weaknesses": "141;563;74;96", "wc_questions": "509;378;3;26", "wc_limitations": "26;2;3;4", "wc_review": "736;1189;195;217", "wc_reply_reviewers": "145;18;0;1", "wc_reply_authors": "1128;1035;530;326", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 128.0, 70.8625429969882 ], "wc_strengths_and_weaknesses_avg": [ 218.5, 200.35780493906395 ], "wc_questions_avg": [ 229.0, 219.59394345017807 ], "wc_limitations_avg": [ 8.75, 9.98436277385793 ], "wc_review_avg": [ 584.25, 410.8341362399186 ], "wc_reply_reviewers_avg": [ 41.0, 60.46900032247929 ], "wc_reply_authors_avg": [ 754.75, 336.2271367691787 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8389420970120156682&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "andrew.cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "An $\\alpha$-No-Regret Algorithm For Graphical Bilinear Bandits", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54063", "id": "2zQx2Pxbd7J", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/79a10a4977d1e21c319060e125406bd6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=2zQx2Pxbd7J", "openreview": "https://openreview.net/forum?id=2zQx2Pxbd7J", "poster": "/media/PosterPDFs/NeurIPS%202022/54063.png?t=1668975638.6911757", "slides": "https://nips.cc/virtual/2022/poster/54063", "video": "https://nips.cc/virtual/2022/poster/54063", "author_site": "Geovani Rizk, Igor Colin, Albert Thomas, Rida Laraki, Yann Chevaleyre", "tldr": "", "abstract": "We propose the first regret-based approach to the \\emph{Graphical Bilinear Bandits} problem, where $n$ agents in a graph play a stochastic bilinear bandit game with each of their neighbors. This setting reveals a combinatorial NP-hard problem that prevents the use of any existing regret-based algorithm in the (bi-)linear bandit literature. In this paper, we fill this gap and present the first regret-based algorithm for graphical bilinear bandits using the principle of optimism in the face of uncertainty. Theoretical analysis of this new method yields an upper bound of $\\tilde{O}(\\sqrt{T})$ on the $\\alpha$-regret and evidences the impact of the graph structure on the rate of convergence. Finally, we show through various experiments the validity of our approach.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/2d5b7c476def3c3cbea1a529956a79afbc715316.pdf", "author": "Geovani Rizk;Igor Colin;Albert Thomas;Rida Laraki;Yann Chevaleyre", "authorids": "~Geovani_Rizk1;~Igor_Colin1;~Albert_Thomas1;~Rida_Laraki1;~Yann_Chevaleyre1", "gender": "M;M;;M;M", "homepage": ";https://igorcolin.github.io/;https://albertcthomas.github.io/;https://sites.google.com/site/ridalaraki/;https://www.lamsade.dauphine.fr/~ychevaleyre/", "dblp": "259/2889;157/8205;172/7718-1;;55/5658", "google_scholar": ";;GzXiITUAAAAJ;https://scholar.google.fr/citations?user=zwkQWEgAAAAJ;SF6g8p4AAAAJ", "orcid": ";;;;", "linkedin": ";;;;yannchevaleyre", "or_profile": "~Geovani_Rizk1;~Igor_Colin1;~Albert_Thomas1;~Rida_Laraki1;~Yann_Chevaleyre1", "aff": "Univerist\u00e9 Paris-Dauphine;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Univerist\u00e9 Paris-Dauphine;Universit\u00e9 Paris-Dauphine (Paris IX)", "aff_domain": "dauphine.fr;huawei.com;huawei.com;dauphine.fr;dauphine.fr", "position": "PhD student;Researcher;Researcher;Director of Research CNRS;Full Professor", "bibtex": "@inproceedings{\nrizk2022an,\ntitle={An \\${\\textbackslash}alpha\\$-No-Regret Algorithm For Graphical Bilinear Bandits},\nauthor={Geovani Rizk and Igor Colin and Albert Thomas and Rida Laraki and Yann Chevaleyre},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=2zQx2Pxbd7J}\n}", "github": "", "project": "", "reviewers": "7vV1;DZQi;shTc", "pdf_size": 955435, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "contribution": "2;2;3", "wc_summary": "57;111;137", "wc_strengths_and_weaknesses": "119;146;113", "wc_questions": "47;60;86", "wc_limitations": "41;6;8", "wc_review": "264;323;344", "wc_reply_reviewers": "0;0;9", "wc_reply_authors": "814;754;682", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 101.66666666666667, 33.319997332266134 ], "wc_strengths_and_weaknesses_avg": [ 126.0, 14.352700094407323 ], "wc_questions_avg": [ 64.33333333333333, 16.21384867602041 ], "wc_limitations_avg": [ 18.333333333333332, 16.048537489614297 ], "wc_review_avg": [ 310.3333333333333, 33.86574801903671 ], "wc_reply_reviewers_avg": [ 3.0, 4.242640687119285 ], "wc_reply_authors_avg": [ 750.0, 53.96295025292817 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Rrdfe-KPGpoJ:scholar.google.com/&scioq=An+%24%5Calpha%24-No-Regret+Algorithm+For+Graphical+Bilinear+Bandits&hl=en&as_sdt=0,5", "gs_version_total": 8, "email": "dauphine.fr;huawei.com;huawei.com;dauphine.fr;dauphine.fr", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Universit\u00e9 Paris-Dauphine;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.univ-paris-dauphine.fr;https://www.huawei.com", "aff_unique_abbr": "UPD;Huawei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "France;China" }, { "title": "Is a Modular Architecture Enough?", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54964", "id": "3-3XMModtrx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b8d1d741f137d9b6ac4f3c1683791e4a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3-3XMModtrx", "openreview": "https://openreview.net/forum?id=3-3XMModtrx", "poster": "/media/PosterPDFs/NeurIPS%202022/4c8c76b39d294759a9000cbda3a6571a.png?t=1667606580.5126133", "slides": "https://nips.cc/virtual/2022/poster/54964", "video": "https://nips.cc/virtual/2022/poster/54964", "author_site": "Sarthak Mittal, Yoshua Bengio, Guillaume Lajoie", "tldr": "We propose metrics to study mixture-of-experts styled modular systems. Our analysis shows that such systems suffer from problems of collapse and specialization, and might require additional inductive biases to overcome this sub-optimality.", "abstract": "Inspired from human cognition, machine learning systems are gradually revealing advantages of sparser and more modular architectures. Recent work demonstrates that not only do some modular architectures generalize well, but they also lead to better out of distribution generalization, scaling properties, learning speed, and interpretability. A key intuition behind the success of such systems is that the data generating system for most real-world settings is considered to consist of sparse modular connections, and endowing models with similar inductive biases will be helpful. However, the field has been lacking in a rigorous quantitative assessment of such systems because these real-world data distributions are complex and unknown. In this work, we provide a thorough assessment of common modular architectures, through the lens of simple and known modular data distributions. We highlight the benefits of modularity and sparsity and reveal insights on the challenges faced while optimizing modular systems. In doing so, we propose evaluation metrics that highlight the benefits of modularity, the regimes in which these benefits are substantial, as well as the sub-optimality of current end-to-end learned modular systems as opposed to their claimed potential.", "keywords": "modularity;attention;mixture of experts;metrics;benchmark;specialization;collapse", "primary_area": "", "supplementary_material": "/attachment/8b3e6ca8b314ac6f21acacbff727842ff62917f5.pdf", "author": "Sarthak Mittal;Yoshua Bengio;Guillaume Lajoie", "authorids": "~Sarthak_Mittal1;~Yoshua_Bengio1;~Guillaume_Lajoie1", "gender": "M;M;M", "homepage": "https://sarthmit.github.io/;http://yoshuabengio.org;https://dms.umontreal.ca/~lajoie/", "dblp": "228/8275;56/953;31/10384", "google_scholar": "FGGgTrcAAAAJ;kukA0LcAAAAJ;", "orcid": ";;", "linkedin": ";yoshuabengio/?originalSubdomain=ca;", "or_profile": "~Sarthak_Mittal1;~Yoshua_Bengio1;~Guillaume_Lajoie1", "aff": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Mila - Quebec Artificial Intelligence Institute", "aff_domain": "umontreal.ca;umontreal.ca;mila.quebec", "position": "MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nmittal2022is,\ntitle={Is a Modular Architecture Enough?},\nauthor={Sarthak Mittal and Yoshua Bengio and Guillaume Lajoie},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3-3XMModtrx}\n}", "github": "", "project": "", "reviewers": "QEMJ;uXNv;H4Mv;HQDS", "pdf_size": 493320, "rating": "6;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;4;4", "novelty": "3;2;3;4", "presentation": "3;4;3;3", "contribution": "3;2;3;4", "wc_summary": "178;68;118;58", "wc_strengths_and_weaknesses": "220;114;168;41", "wc_questions": "51;118;55;44", "wc_limitations": "2;1;56;6", "wc_review": "451;301;397;149", "wc_reply_reviewers": "42;267;43;0", "wc_reply_authors": "845;1343;582;147", "reply_reviewers": "1;1;1;0", "reply_authors": "2;4;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.5, 47.63139720814412 ], "wc_strengths_and_weaknesses_avg": [ 135.75, 66.31129240182248 ], "wc_questions_avg": [ 67.0, 29.706901555025897 ], "wc_limitations_avg": [ 16.25, 23.025800746119558 ], "wc_review_avg": [ 324.5, 114.6854393547847 ], "wc_reply_reviewers_avg": [ 88.0, 104.79265241418408 ], "wc_reply_authors_avg": [ 729.25, 433.2391804765585 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5707197899340562621&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "umontreal.ca;umontreal.ca;mila.quebec", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;Artificial Intelligence", "aff_unique_url": "https://www.umontreal.ca;https://wwwumontreal.ca;https://mila.quebec", "aff_unique_abbr": "UdeM;UM;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "You Only Live Once: Single-Life Reinforcement Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54491", "id": "303XqIQ5c_d", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5ec4e93f2cec19d47ef852a0e1fb2c48-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=303XqIQ5c_d", "openreview": "https://openreview.net/forum?id=303XqIQ5c_d", "poster": "/media/PosterPDFs/NeurIPS%202022/54491.png?t=1669104737.5790935", "slides": "https://nips.cc/virtual/2022/poster/54491", "video": "https://nips.cc/virtual/2022/poster/54491", "author_site": "Annie Chen, Archit Sharma, Sergey Levine, Chelsea Finn", "tldr": "We formalize the single-life RL problem setting, where given prior data, an agent must complete a novel task autonomously in a single trial, and propose an algorithm (QWALE) that leverages the prior data as guidance to complete the desired task.", "abstract": "Reinforcement learning algorithms are typically designed to learn a performant policy that can repeatedly and autonomously complete a task, usually starting from scratch. However, in many real-world situations, the goal might not be to learn a policy that can do the task repeatedly, but simply to perform a new task successfully once in a single trial. For example, imagine a disaster relief robot tasked with retrieving an item from a fallen building, where it cannot get direct supervision from humans. It must retrieve this object within one test-time trial, and must do so while tackling unknown obstacles, though it may leverage knowledge it has of the building before the disaster. We formalize this problem setting, which we call single-life reinforcement learning (SLRL), where an agent must complete a task within a single episode without interventions, utilizing its prior experience while contending with some form of novelty. SLRL provides a natural setting to study the challenge of autonomously adapting to unfamiliar situations, and we find that algorithms designed for standard episodic reinforcement learning often struggle to recover from out-of-distribution states in this setting. Motivated by this observation, we propose an algorithm, Q-weighted adversarial learning (QWALE), which employs a distribution matching strategy that leverages the agent's prior experience as guidance in novel situations. Our experiments on several single-life continuous control problems indicate that methods based on our distribution matching formulation are 20-60% more successful because they can more quickly recover from novel states.", "keywords": "reinforcement learning;autonomous reinforcement learning;adversarial imitation learning", "primary_area": "", "supplementary_material": "/attachment/63be68d8e3155f40d6da3010d41fd0ff5f94d74c.pdf", "author": "Annie S Chen;Archit Sharma;Sergey Levine;Chelsea Finn", "authorids": "~Annie_S_Chen1;~Archit_Sharma1;~Sergey_Levine1;~Chelsea_Finn1", "gender": "F;M;M;F", "homepage": "https://anniesch.github.io/;;https://people.eecs.berkeley.edu/~svlevine/;https://ai.stanford.edu/~cbfinn/", "dblp": "277/1527.html;220/3163.html;80/7594;131/1783", "google_scholar": ";_0IIzxgAAAAJ;8R35rCwAAAAJ;vfPE6hgAAAAJ", "orcid": ";;;", "linkedin": "annie-s-chen/;;;", "or_profile": "~Annie_S_Chen1;~Archit_Sharma1;~Sergey_Levine1;~Chelsea_Finn1", "aff": "Stanford University;Stanford University;Google;Google", "aff_domain": "stanford.edu;stanford.edu;google.com;google.com", "position": "PhD student;Graduate Student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nchen2022you,\ntitle={You Only Live Once: Single-Life Reinforcement Learning},\nauthor={Annie S Chen and Archit Sharma and Sergey Levine and Chelsea Finn},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=303XqIQ5c_d}\n}", "github": "", "project": "", "reviewers": "jw3X;muVA;bMAN;bBp1", "pdf_size": 896674, "rating": "5;5;6;6", "confidence": "2;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;2", "contribution": "2;2;3;2", "wc_summary": "104;103;138;91", "wc_strengths_and_weaknesses": "102;257;162;126", "wc_questions": "128;127;148;95", "wc_limitations": "45;24;11;137", "wc_review": "379;511;459;449", "wc_reply_reviewers": "0;0;47;0", "wc_reply_authors": "530;670;495;693", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 17.507141400011598 ], "wc_strengths_and_weaknesses_avg": [ 161.75, 58.99311400494129 ], "wc_questions_avg": [ 124.5, 18.980252896102307 ], "wc_limitations_avg": [ 54.25, 49.29186038282589 ], "wc_review_avg": [ 449.5, 47.01861333557169 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 597.0, 85.78752823108962 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4957115599269349409&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "30bPCDjdxPU", "title": "Faster Reinforcement Learning with Value Target Lower Bounding", "track": "main", "status": "Reject", "tldr": "", "abstract": "We show that an arbitrary lower bound of the maximum achievable value can be used to improve the Bellman value target during value learning. In the tabular case, value learning using the lower bounded Bellman operator converges to the same optimal value as using the original Bellman operator, at a potentially faster speed. In practice, discounted episodic return in episodic tasks and n-step bootstrapped return in continuing tasks can serve as lower bounds to improve the value target. We experiment on Atari games, FetchEnv tasks and a challenging physically simulated car push and reach task. We see large gains in sample efficiency as well as converged performance over common baselines such as TD3, SAC and Hindsight Experience Replay (HER) in most tasks, and observe a reliable and competitive performance against the stronger n-step methods such as td-lambda, Retrace and optimality tightening. Prior works have already successfully applied a special case of lower bounding (using episodic return), but are limited to a small number of episodic tasks. To the best of our knowledge, we are the first to propose the general method of value target lower bounding (with possibly bootstrapped return), to demonstrate its optimality in theory, and effectiveness in a wide range of tasks over many strong baselines.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/5783a69d66bdd823236d81b7afccf9471ed398ab.pdf", "author": "Le Zhao;Wei Xu", "authorids": "~Le_Zhao2;~Wei_Xu13", "gender": ";M", "homepage": ";", "dblp": ";", "google_scholar": ";Gxz1fqwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Le_Zhao2;~Wei_Xu13", "aff": ";Horizon Robotics", "aff_domain": ";horizon.auto", "position": ";Researcher", "bibtex": "@misc{\nzhao2022faster,\ntitle={Faster Reinforcement Learning with Value Target Lower Bounding},\nauthor={Le Zhao and Wei Xu},\nyear={2022},\nurl={https://openreview.net/forum?id=30bPCDjdxPU}\n}", "github": "", "project": "", "reviewers": "5ybC;3Knv;41F1;zNRT", "site": "https://openreview.net/forum?id=30bPCDjdxPU", "pdf_size": 688092, "rating": "3;3;3;5", "confidence": "4;4;3;4", "soundness": "2;2;2;2", "novelty": "2;2;2;3", "presentation": "3;1;2;3", "contribution": "2;2;2;3", "wc_summary": "46;52;101;65", "wc_strengths_and_weaknesses": "237;315;395;257", "wc_questions": "39;15;334;3", "wc_limitations": "10;2;106;6", "wc_review": "332;384;936;331", "wc_reply_reviewers": "0;86;25;0", "wc_reply_authors": "556;152;529;257", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 21.342445970413046 ], "wc_strengths_and_weaknesses_avg": [ 301.0, 61.36774397026503 ], "wc_questions_avg": [ 97.75, 137.0134573682454 ], "wc_limitations_avg": [ 31.0, 43.393547907494266 ], "wc_review_avg": [ 495.75, 255.08074701944872 ], "wc_reply_reviewers_avg": [ 27.75, 35.1452343853331 ], "wc_reply_authors_avg": [ 373.5, 173.29238298321135 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10931768291379180304&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "Horizon Robotics", "aff_unique_dep": "", "aff_unique_url": "https://www.horizon-robotics.com/", "aff_unique_abbr": "Horizon Robotics", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Signal Recovery with Non-Expansive Generative Network Priors", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55253", "id": "319xcX5qIcO", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/91d193b65d0b120d29503590827de1ea-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=319xcX5qIcO", "openreview": "https://openreview.net/forum?id=319xcX5qIcO", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55253", "video": "https://nips.cc/virtual/2022/poster/55253", "tldr": "We provide theoretical guarantees for compressed sensing and other signal recovery problems with non-expansive generative network priors.", "abstract": "We study compressive sensing with a deep generative network prior. Initial theoretical guarantees for efficient recovery from compressed linear measurements have been developed for signals in the range of a ReLU network with Gaussian weights and logarithmic expansivity: that is when each layer is larger than the previous one by a logarithmic factor. It was later shown that constant expansivity is sufficient for recovery. It has remained open whether the expansivity can be relaxed, allowing for networks with contractive layers (as often the case of real generators). In this work we answer this question, proving that a signal in the range of a Gaussian generative network can be recovered from few linear measurements provided that the width of the layers is proportional to the input layer size (up to log factors). This condition allows the generative network to have contractive layers. Our result is based on showing that Gaussian matrices satisfy a matrix concentration inequality which we term Range Restricted Weight Distribution Condition (R2WDC) and which weakens the Weight Distribution Condition (WDC) upon which previous theoretical guarantees were based. The WDC has also been used to analyze other signal recovery problems with generative network priors. By replacing the WDC with the R2WDC, we are able to extend previous results for signal recovery with expansive generative network priors to non-expansive ones. We discuss these extensions for phase retrieval, denoising, and spiked matrix recovery.", "keywords": "inverse problems;generative networks;signal recovery;compressed sensing", "primary_area": "", "supplementary_material": "/attachment/0342cc93a94846527114a0da9acfe8e9a9cbb772.zip", "author": "Jorio Cocola", "authorids": "~Jorio_Cocola1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "0000-0001-7176-337X", "linkedin": "", "or_profile": "~Jorio_Cocola1", "aff": "Northeastern University", "aff_domain": "northeastern.edu", "position": "PhD student", "bibtex": "@inproceedings{\ncocola2022signal,\ntitle={Signal Recovery with Non-Expansive Generative Network Priors},\nauthor={Jorio Cocola},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=319xcX5qIcO}\n}", "github": "", "project": "", "reviewers": "xC1X;V26y;s2nj;NCFH", "pdf_size": 589428, "rating": "5;5;7;7", "confidence": "2;4;5;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "contribution": "2;3;3;3", "wc_summary": "51;150;167;69", "wc_strengths_and_weaknesses": "245;281;326;80", "wc_questions": "6;529;191;19", "wc_limitations": "1;31;5;1", "wc_review": "303;991;689;169", "wc_reply_reviewers": "0;256;0;0", "wc_reply_authors": "363;1001;438;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;0", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.25, 50.021870216936115 ], "wc_strengths_and_weaknesses_avg": [ 233.0, 92.87895348247632 ], "wc_questions_avg": [ 186.25, 210.92815720050274 ], "wc_limitations_avg": [ 9.5, 12.519984025548915 ], "wc_review_avg": [ 538.0, 323.80395303331306 ], "wc_reply_reviewers_avg": [ 64.0, 110.85125168440814 ], "wc_reply_authors_avg": [ 450.5, 358.4037527705311 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.19245008972987523, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1514371727255248398&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "northeastern.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "BackdoorBench: A Comprehensive Benchmark of Backdoor Learning", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55715", "id": "31_U7n18gM7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/4491ea1c91aa2b22c373e5f1dfce234f-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=31_U7n18gM7", "openreview": "https://openreview.net/forum?id=31_U7n18gM7", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55715", "video": "https://nips.cc/virtual/2022/poster/55715", "author_site": "Baoyuan Wu, Hongrui Chen, Mingda Zhang, Zihao Zhu, Shaokui Wei, Danni Yuan, Chao Shen", "tldr": "8 backdoor attacks; 9 backdoor defenses; 8,000 evaluations; 5 poisoning ratios; 5 models; 4 datasets; 5 analysis tools", "abstract": "Backdoor learning is an emerging and vital topic for studying deep neural networks' vulnerability (DNNs). Many pioneering backdoor attack and defense methods are being proposed, successively or concurrently, in the status of a rapid arms race. However, we find that the evaluations of new methods are often unthorough to verify their claims and accurate performance, mainly due to the rapid development, diverse settings, and the difficulties of implementation and reproducibility. Without thorough evaluations and comparisons, it is not easy to track the current progress and design the future development roadmap of the literature. To alleviate this dilemma, we build a comprehensive benchmark of backdoor learning called BackdoorBench. It consists of an extensible modular-based codebase (currently including implementations of 8 state-of-the-art (SOTA) attacks and 9 SOTA defense algorithms) and a standardized protocol of complete backdoor learning. We also provide comprehensive evaluations of every pair of 8 attacks against 9 defenses, with 5 poisoning ratios, based on 5 models and 4 datasets, thus 8,000 pairs of evaluations in total. We present abundant analysis from different perspectives about these 8,000 evaluations, studying the effects of different factors in backdoor learning. All codes and evaluations of BackdoorBench are publicly available at https://backdoorbench.github.io.", "keywords": "Backdoor Learning;Benchmark", "primary_area": "", "supplementary_material": "/attachment/ae4ced6030fc03c3c20e4fccf549ad9e3300577b.pdf", "author": "Baoyuan Wu;Hongrui Chen;Mingda Zhang;Zihao Zhu;Shaokui Wei;Danni Yuan;Chao Shen", "authorids": "~Baoyuan_Wu1;~Hongrui_Chen1;~Mingda_Zhang2;~Zihao_Zhu2;~Shaokui_Wei1;~Danni_Yuan1;~Chao_Shen2", "gender": "M;;M;;M;F;M", "homepage": "https://sites.google.com/site/baoyuanwu2015/;;https://github.com/mdzhangst;;https://shawkui.github.io/;https://github.com/April4lu;http://gr.xjtu.edu.cn/web/cshen", "dblp": "73/7781;;;;323/4243;;48/4825-1", "google_scholar": "JNTG1KoAAAAJ;;pmwwTcgAAAAJ;;WHkEfnsAAAAJ;;m6QY7-wAAAAJ", "orcid": "0000-0003-2183-5990;;;;;;0000-0002-6959-0569", "linkedin": ";;;;;;", "or_profile": "~Baoyuan_Wu1;~Hongrui_Chen1;~Mingda_Zhang2;~Zihao_Zhu2;~Shaokui_Wei1;~Danni_Yuan1;~Chao_Shen2", "aff": "The Chinese University of Hong Kong, Shenzhen;;Nankai University;;The Chinese University of Hong Kong, Shenzhen;;Xi\u2019an Jiaotong University", "aff_domain": "cuhk.edu.cn;;nku.nankai.edu.cn;;cuhk.edu.cn;;xjtu.edu.cn", "position": "Associate Professor;;MS student;;PhD student;;Full Professor", "bibtex": "@inproceedings{\nwu2022backdoorbench,\ntitle={BackdoorBench: A Comprehensive Benchmark of Backdoor Learning},\nauthor={Baoyuan Wu and Hongrui Chen and Mingda Zhang and Zihao Zhu and Shaokui Wei and Danni Yuan and Chao Shen},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=31_U7n18gM7}\n}", "github": "", "project": "", "reviewers": "RPfC;rrL4;C28j;Aamn;wFZF", "pdf_size": 1397182, "rating": "4;7;7;8;9", "confidence": "4;3;4;5;3", "wc_summary_and_contributions": "37;209;39;65;64", "wc_strengths": "56;85;32;1;50", "wc_weaknesses": "134;197;128;1;63", "wc_correctness": "8;1;6;1;9", "wc_clarity": "25;21;5;1;47", "wc_relation_to_prior_work": "89;1;1;1;6", "wc_documentation": "13;1;4;1;38", "wc_additional_feedback": "1;13;1;7;12", "wc_review": "363;528;216;78;289", "wc_reply_reviewers": "0;0;0;43;108", "wc_reply_authors": "1211;1030;620;519;1169", "reply_reviewers": "0;0;0;1;2", "reply_authors": "4;3;2;2;4", "rating_avg": [ 7.0, 1.6733200530681511 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 82.8, 64.20716470924408 ], "wc_strengths_avg": [ 44.8, 27.751756701153173 ], "wc_weaknesses_avg": [ 104.6, 66.95252049026982 ], "wc_correctness_avg": [ 5.0, 3.40587727318528 ], "wc_clarity_avg": [ 19.8, 16.375591592366977 ], "wc_relation_to_prior_work_avg": [ 19.6, 34.753992576393294 ], "wc_documentation_avg": [ 11.4, 14.008568806269968 ], "wc_additional_feedback_avg": [ 6.8, 5.1536394906900505 ], "wc_review_avg": [ 294.8, 149.85246077392256 ], "wc_reply_reviewers_avg": [ 30.2, 42.315009157508165 ], "wc_reply_authors_avg": [ 909.8, 286.028949583779 ], "reply_reviewers_avg": [ 0.6, 0.8 ], "reply_authors_avg": [ 3.0, 0.8944271909999159 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.15971914124998499, "gs_citation": 154, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13477998480458836443&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "cuhk.edu.cn;;nku.nankai.edu.cn;;cuhk.edu.cn;;xjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Chinese University of Hong Kong;Nankai University;Xi'an Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.nankai.edu.cn;https://www.xjtu.edu.cn", "aff_unique_abbr": "CUHK;NKU;XJTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Explainable Reinforcement Learning via Model Transforms", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55305", "id": "32Ryt4pAHeD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/dbef234be68d8b170240511639610fd1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=32Ryt4pAHeD", "openreview": "https://openreview.net/forum?id=32Ryt4pAHeD", "poster": "/media/PosterPDFs/NeurIPS%202022/1efa39bcaec6f3900149160693694536.png?t=1666426344.7683468", "slides": "https://nips.cc/virtual/2022/poster/55305", "video": "https://nips.cc/virtual/2022/poster/55305", "author_site": "Mira Finkelstein, Nitsan levy, Lucy Liu, Yoav Kolumbus, David Parkes, Jeffrey S Rosenschein, Sarah Keren", "tldr": "We use formal MDP abstractions and transforms, previously used for expediting planning, to automatically explain discrepancies between the behavior of a DRL agent and the behavior that is anticipated by an observer.", "abstract": "Understanding emerging behaviors of reinforcement learning (RL) agents may be difficult since such agents are often trained in complex environments using highly complex decision making procedures. This has given rise to a variety of approaches to explainability in RL that aim to reconcile discrepancies that may arise between the behavior of an agent and the behavior that is anticipated by an observer. Most recent approaches have relied either on domain knowledge, that may not always be available, on an analysis of the agent\u2019s policy, or on an analysis of specific elements of the underlying environment, typically modeled as a Markov Decision Process (MDP). Our key claim is that even if the underlying model is not fully known (e.g., the transition probabilities have not been accurately learned) or is not maintained by the agent (i.e., when using model-free methods), the model can nevertheless be exploited to automatically generate explanations. For this purpose, we suggest using formal MDP abstractions and transforms, previously used in the literature for expediting the search for optimal policies, to automatically produce explanations. Since such transforms are typically based on a symbolic representation of the environment, they can provide meaningful explanations for gaps between the anticipated and actual agent behavior. We formally define the explainability problem, suggest a class of transforms that can be used for explaining emergent behaviors, and suggest methods that enable efficient search for an explanation. We demonstrate the approach on a set of standard benchmarks.", "keywords": "Reinforcement Learning;Deep Reinforcement Learning;Explanability;XAI;Model Based Reasoning", "primary_area": "", "supplementary_material": "/attachment/f935fe99b71987546e12ca166a1b3848ac01b01f.pdf", "author": "Mira Finkelstein;Nitsan Schlotterbeck levy;Lucy Liu;Yoav Kolumbus;David C. Parkes;Jeffrey Rosenschein;Sarah Keren", "authorids": "~Mira_Finkelstein1;nitsan.levyschlot@mail.huji.ac.il;~Lucy_Liu1;~Yoav_Kolumbus1;~David_C._Parkes1;~Jeffrey_Rosenschein1;~Sarah_Keren1", "gender": "F;;F;;M;M;", "homepage": ";;https://lliu12.github.io;;https://parkes.seas.harvard.edu/;http://www.cs.huji.ac.il/~jeff/;https://sarahk.cs.technion.ac.il", "dblp": ";;;;p/DavidCParkes.html;r/JeffreySRosenschein;132/0317", "google_scholar": ";;sb19DNMAAAAJ;;JUn8PgwAAAAJ;https://scholar.google.com.tw/citations?user=YO7cKNMAAAAJ;Lmco3q8AAAAJ", "orcid": ";;0000-0003-1573-3752;;0000-0002-2701-3464;0000-0002-4042-9739;", "linkedin": "mira-finkelstein/;;;;;;", "or_profile": "~Mira_Finkelstein1;nitsan.levyschlot@mail.huji.ac.il;~Lucy_Liu1;~Yoav_Kolumbus1;~David_C._Parkes1;~Jeffrey_Rosenschein1;~Sarah_Keren1", "aff": "Hebrew University of Jerusalem, Technion;;Harvard University;;Google;Hebrew University, Hebrew University of Jerusalem;Technion, Technion", "aff_domain": "huji.ac.il;;harvard.edu;;deepmind.com;cs.huji.ac.il;technion.ac.il", "position": "MS student;;Undergrad student;;Senior Research Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nfinkelstein2022explainable,\ntitle={Explainable Reinforcement Learning via Model Transforms},\nauthor={Mira Finkelstein and Nitsan Schlotterbeck levy and Lucy Liu and Yoav Kolumbus and David C. Parkes and Jeffrey Rosenschein and Sarah Keren},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=32Ryt4pAHeD}\n}", "github": "", "project": "", "reviewers": "2xom;5vMh;Bq81;bxZ3", "pdf_size": 1086304, "rating": "3;4;4;8", "confidence": "5;4;4;5", "soundness": "3;2;2;4", "novelty": "1;3;2;4", "presentation": "2;3;3;4", "contribution": "1;3;2;4", "wc_summary": "124;179;36;142", "wc_strengths_and_weaknesses": "430;377;59;147", "wc_questions": "65;417;371;120", "wc_limitations": "26;126;1;9", "wc_review": "645;1099;467;418", "wc_reply_reviewers": "0;190;209;19", "wc_reply_authors": "375;404;480;89", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 4.75, 1.920286436967152 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 120.25, 52.52796874047196 ], "wc_strengths_and_weaknesses_avg": [ 253.25, 154.5774482258004 ], "wc_questions_avg": [ 243.25, 152.86656763334486 ], "wc_limitations_avg": [ 40.5, 50.18216814765978 ], "wc_review_avg": [ 657.25, 268.66742917592376 ], "wc_reply_reviewers_avg": [ 104.5, 95.47381840064845 ], "wc_reply_authors_avg": [ 337.0, 148.22786512663535 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.39056673294247163, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12642694616127148920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "huji.ac.il;;harvard.edu;;deepmind.com;cs.huji.ac.il;technion.ac.il", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Hebrew University of Jerusalem;Harvard University;Google;Technion - Israel Institute of Technology", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.huji.ac.il;https://www.harvard.edu;https://www.google.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "HUJI;Harvard;Google;Technion", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Israel;United States" }, { "title": "Few-Shot Continual Active Learning by a Robot", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55083", "id": "35I4narr5A", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c58437945392cec01e0c75ff6cef901a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=35I4narr5A", "openreview": "https://openreview.net/forum?id=35I4narr5A", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55083", "video": "https://nips.cc/virtual/2022/poster/55083", "author_site": "Ali Ayub, Carter Fendley", "tldr": "We consider the few-shot continual active learning (FoCAL) problem, and present a novel GMM based framework for FoCAL.", "abstract": "In this paper, we consider a challenging but realistic continual learning problem, Few-Shot Continual Active Learning (FoCAL), where a CL agent is provided with unlabeled data for a new or a previously learned task in each increment and the agent only has limited labeling budget available. Towards this, we build on the continual learning and active learning literature and develop a framework that can allow a CL agent to continually learn new object classes from a few labeled training examples. Our framework represents each object class using a uniform Gaussian mixture model (GMM) and uses pseudo-rehearsal to mitigate catastrophic forgetting. The framework also uses uncertainty measures on the Gaussian representations of the previously learned classes to find the most informative samples to be labeled in an increment. We evaluate our approach on the CORe-50 dataset and on a real humanoid robot for the object classification task. The results show that our approach not only produces state-of-the-art results on the dataset but also allows a real robot to continually learn unseen objects in a real environment with limited labeling supervision provided by its user.", "keywords": "Continual Learning;Catastrophic Forgetting;Active Learning;Human-Robot Intearction", "primary_area": "", "supplementary_material": "/attachment/e64b0a1620a1f641d1d33057f1424a58f3fc6fa2.zip", "author": "Ali Ayub;Carter Fendley", "authorids": "~Ali_Ayub1;ccf5164@psu.edu", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nayub2022fewshot,\ntitle={Few-Shot Continual Active Learning by a Robot},\nauthor={Ali Ayub and Carter Fendley},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=35I4narr5A}\n}", "github": "", "project": "", "reviewers": "6wg4;JcVg;ACp7", "pdf_size": 523540, "rating": "5;5;6", "confidence": "5;3;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "contribution": "2;2;3", "wc_summary": "102;172;120", "wc_strengths_and_weaknesses": "411;228;273", "wc_questions": "27;127;31", "wc_limitations": "13;49;34", "wc_review": "553;576;458", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "745;607;527", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 131.33333333333334, 29.67977238606942 ], "wc_strengths_and_weaknesses_avg": [ 304.0, 77.85884663928692 ], "wc_questions_avg": [ 61.666666666666664, 46.22649552895925 ], "wc_limitations_avg": [ 32.0, 14.7648230602334 ], "wc_review_avg": [ 529.0, 51.07510809255979 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 626.3333333333334, 90.04196552472384 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6835981977438370944&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";", "author_num": 2 }, { "title": "Neural Network Architecture Beyond Width and Depth", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53443", "id": "36-xl1wdyu", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/257be12f31dfa7cc158dda99822c6fd1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=36-xl1wdyu", "openreview": "https://openreview.net/forum?id=36-xl1wdyu", "poster": "/media/PosterPDFs/NeurIPS%202022/53443.png?t=1670039618.167602", "slides": "https://nips.cc/virtual/2022/poster/53443", "video": "https://nips.cc/virtual/2022/poster/53443", "author_site": "Shijun Zhang, Zuowei Shen, Haizhao Yang", "tldr": "", "abstract": "This paper proposes a new neural network architecture by introducing an additional dimension called height beyond width and depth. Neural network architectures with height, width, and depth as hyper-parameters are called three-dimensional architectures. It is shown that neural networks with three-dimensional architectures are significantly more expressive than the ones with two-dimensional architectures (those with only width and depth as hyper-parameters), e.g., standard fully connected networks. The new network architecture is constructed recursively via a nested structure, and hence we call a network with the new architecture nested network (NestNet). A NestNet of height $s$ is built with each hidden neuron activated by a NestNet of height $\\le s-1$. When $s=1$, a NestNet degenerates to a standard network with a two-dimensional architecture. It is proved by construction that height-$s$ ReLU NestNets with $\\mathcal{O}(n)$ parameters can approximate $1$-Lipschitz continuous functions on $[0,1]^d$ with an error $\\mathcal{O}(n^{-(s+1)/d})$, while the optimal approximation error of standard ReLU networks with $\\mathcal{O}(n)$ parameters is $\\mathcal{O}(n^{-2/d})$. Furthermore, such a result is extended to generic continuous functions on $[0,1]^d$ with the approximation error characterized by the modulus of continuity. Finally, we use numerical experimentation to show the advantages of the super-approximation power of ReLU NestNets.", "keywords": "Neural Network Approximation;Nested Architecture;Parameter Sharing;Function Composition", "primary_area": "", "supplementary_material": "/attachment/66a67c596352e7759e2f3bc541111819bd38d876.pdf", "author": "Shijun Zhang;Zuowei Shen;Haizhao Yang", "authorids": "~Shijun_Zhang1;~Zuowei_Shen1;~Haizhao_Yang1", "gender": "M;M;M", "homepage": "https://shijunzhang.top/;https://blog.nus.edu.sg/matzuows/;https://haizhaoyang.github.io", "dblp": ";;139/1215", "google_scholar": "NZA4ur4AAAAJ;985QGhAAAAAJ;p4mxTIwAAAAJ", "orcid": "0000-0003-4115-7891;;", "linkedin": ";;", "or_profile": "~Shijun_Zhang1;~Zuowei_Shen1;~Haizhao_Yang1", "aff": "Duke University;National University of Singapore;Purdue University", "aff_domain": "duke.edu;nus.edu;purdue.edu", "position": "Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2022neural,\ntitle={Neural network architecture beyond width and depth},\nauthor={Shijun Zhang and Zuowei Shen and Haizhao Yang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=36-xl1wdyu}\n}", "github": "", "project": "", "reviewers": "wtW7;zWh2;7MPy", "pdf_size": 747396, "rating": "5;6;6", "confidence": "2;4;2", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "4;2;3", "contribution": "2;2;3", "wc_summary": "67;68;51", "wc_strengths_and_weaknesses": "91;532;69", "wc_questions": "2;176;94", "wc_limitations": "13;18;17", "wc_review": "173;794;231", "wc_reply_reviewers": "88;102;0", "wc_reply_authors": "480;459;97", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.0, 7.788880963698615 ], "wc_strengths_and_weaknesses_avg": [ 230.66666666666666, 213.26405124998342 ], "wc_questions_avg": [ 90.66666666666667, 71.07429602574729 ], "wc_limitations_avg": [ 16.0, 2.160246899469287 ], "wc_review_avg": [ 399.3333333333333, 280.07419651862887 ], "wc_reply_reviewers_avg": [ 63.333333333333336, 45.146674542232034 ], "wc_reply_authors_avg": [ 345.3333333333333, 175.80734405087355 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10319871590909975908&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "email": "duke.edu;nus.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Duke University;National University of Singapore;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.duke.edu;https://www.nus.edu.sg;https://www.purdue.edu", "aff_unique_abbr": "Duke;NUS;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Singapore" }, { "title": "Redeeming intrinsic rewards via constrained optimization", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54497", "id": "36Yz37cEN_Q", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/204fee94c982a19230c39045aa54f977-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=36Yz37cEN_Q", "openreview": "https://openreview.net/forum?id=36Yz37cEN_Q", "poster": "/media/PosterPDFs/NeurIPS%202022/54497.png?t=1669597397.829213", "slides": "https://nips.cc/virtual/2022/poster/54497", "video": "https://nips.cc/virtual/2022/poster/54497", "author_site": "Eric Chen, Zhang-Wei Hong, Joni Pajarinen, Pulkit Agrawal", "tldr": "We alleviate the performance drop resulting from the bias of intrinsic rewards while preserving the merits of intrinsic rewards.", "abstract": "State-of-the-art reinforcement learning (RL) algorithms typically use random sampling (e.g., $\\epsilon$-greedy) for exploration, but this method fails on hard exploration tasks like Montezuma's Revenge. To address the challenge of exploration, prior works incentivize exploration by rewarding the agent when it visits novel states. Such intrinsic rewards (also called exploration bonus or curiosity) often lead to excellent performance on hard exploration tasks. However, on easy exploration tasks, the agent gets distracted by intrinsic rewards and performs unnecessary exploration even when sufficient task (also called extrinsic) reward is available. Consequently, such an overly curious agent performs worse than an agent trained with only task reward. \nSuch inconsistency in performance across tasks prevents the widespread use of intrinsic rewards with RL algorithms. We propose a principled constrained optimization procedure called Extrinsic-Intrinsic Policy Optimization (EIPO) that automatically tunes the importance of the intrinsic reward: it suppresses the intrinsic reward when exploration is unnecessary and increases it when exploration is required. The results is superior exploration that does not require manual tuning in balancing the intrinsic reward against the task reward. Consistent performance gains across sixty-one ATARI games validate our claim. The code is available at https://github.com/Improbable-AI/eipo.", "keywords": "reinforcement learning;intrinsic reward;curiosity-driven exploration", "primary_area": "", "supplementary_material": "/attachment/9b9f5b5705fff77d60d94c8f7e80700e0325609f.pdf", "author": "Eric R Chen;Zhang-Wei Hong;Joni Pajarinen;Pulkit Agrawal", "authorids": "~Eric_R_Chen1;~Zhang-Wei_Hong1;~Joni_Pajarinen2;~Pulkit_Agrawal1", "gender": ";M;;M", "homepage": "https://echen9898.github.io/;;;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": ";198/0600;23/8355;149/2672", "google_scholar": ";GZkyN4cAAAAJ;https://scholar.google.fi/citations?user=-2fJStwAAAAJ;UpZmJI0AAAAJ", "orcid": ";;0000-0003-4469-8191;", "linkedin": ";;;", "or_profile": "~Eric_R_Chen1;~Zhang-Wei_Hong1;~Joni_Pajarinen2;~Pulkit_Agrawal1", "aff": ";Microsoft Research;Technische Universit\u00e4t Darmstadt;Massachusetts Institute of Technology", "aff_domain": ";research.microsoft.com;tu-darmstadt.de;mit.edu", "position": ";Internship;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2022redeeming,\ntitle={Redeeming intrinsic rewards via constrained policy optimization},\nauthor={Eric R Chen and Zhang-Wei Hong and Joni Pajarinen and Pulkit Agrawal},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=36Yz37cEN_Q}\n}", "github": "", "project": "", "reviewers": "sfNt;bbkw;JLyn", "pdf_size": 1077155, "rating": "7;7;8", "confidence": "5;3;3", "soundness": "4;3;4", "novelty": "4;3;3", "presentation": "4;3;4", "contribution": "4;3;3", "wc_summary": "76;99;149", "wc_strengths_and_weaknesses": "70;202;254", "wc_questions": "62;67;14", "wc_limitations": "1;14;23", "wc_review": "209;382;440", "wc_reply_reviewers": "29;12;0", "wc_reply_authors": "645;237;753", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 108.0, 30.474032661705056 ], "wc_strengths_and_weaknesses_avg": [ 175.33333333333334, 77.4481905677739 ], "wc_questions_avg": [ 47.666666666666664, 23.893281249943232 ], "wc_limitations_avg": [ 12.666666666666666, 9.030811456096044 ], "wc_review_avg": [ 343.6666666666667, 98.12350494260905 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 11.897712198383164 ], "wc_reply_authors_avg": [ 545.0, 222.2071105972984 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1760121311943802855&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";research.microsoft.com;tu-darmstadt.de;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Microsoft;Technische Universit\u00e4t Darmstadt;Massachusetts Institute of Technology", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.tu-darmstadt.de;https://web.mit.edu", "aff_unique_abbr": "MSR;TUD;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "Domain Generalization by Learning and Removing Domain-specific Features", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54387", "id": "37Rf7BTAtAM", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/9941833e8327910ef25daeb9005e4748-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=37Rf7BTAtAM", "openreview": "https://openreview.net/forum?id=37Rf7BTAtAM", "poster": "/media/PosterPDFs/NeurIPS%202022/54387.png?t=1669599176.9211016", "slides": "https://nips.cc/virtual/2022/poster/54387", "video": "https://nips.cc/virtual/2022/poster/54387", "author_site": "Yu Ding, Lei Wang, Bin Liang, Shuming Liang, Yang Wang, Fang Chen", "tldr": "", "abstract": "Deep Neural Networks (DNNs) suffer from domain shift when the test dataset follows a distribution different from the training dataset. Domain generalization aims to tackle this issue by learning a model that can generalize to unseen domains. In this paper, we propose a new approach that aims to explicitly remove domain-specific features for domain generalization. Following this approach, we propose a novel framework called Learning and Removing Domain-specific features for Generalization (LRDG) that learns a domain-invariant model by tactically removing domain-specific features from the input images. Specifically, we design a classifier to effectively learn the domain-specific features for each source domain, respectively. We then develop an encoder-decoder network to map each input image into a new image space where the learned domain-specific features are removed. With the images output by the encoder-decoder network, another classifier is designed to learn the domain-invariant features to conduct image classification. Extensive experiments demonstrate that our framework achieves superior performance compared with state-of-the-art methods.", "keywords": "Domain Generalization;Domain-invariant Features;Domain-specific Features;Transfer Learning", "primary_area": "", "supplementary_material": "/attachment/ca8bb77308c9499e3d30fcfeb0c09275775fee54.pdf", "author": "Yu Ding;Lei Wang;Bin Liang;Shuming Liang;Yang Wang;Fang Chen", "authorids": "~Yu_Ding7;~Lei_Wang13;~Bin_Liang7;~Shuming_Liang1;~Yang_Wang21;~Fang_Chen3", "gender": ";M;;M;M;F", "homepage": ";https://sites.google.com/view/lei-hs-wang;;;https://www.uts.edu.au/staff/yang.wang;https://profiles.uts.edu.au/Fang.Chen", "dblp": ";w/LeiWang1;71/6053-3;;;52/488-1.html", "google_scholar": ";5ClujcoAAAAJ;https://scholar.google.com.au/citations?user=qt8kAwoAAAAJ;;;EMVGAKgAAAAJ", "orcid": "0000-0001-6926-1258;0000-0002-0961-0441;;;;0000-0003-4971-8729", "linkedin": ";;;shuming-liang-527419164/;;", "or_profile": "~Yu_Ding7;~Lei_Wang13;~Bin_Liang7;~Shuming_Liang1;~Yang_Wang21;~Fang_Chen3", "aff": "University of Wollongong;University of Wollonong;University of Technology Sydney;University of Technology Sydney;University of Technology Sydney;University of New South Wales", "aff_domain": "uow.edu.au;uow.edu.au;uts.edu.au;uts.edu.au;uts.edu.au;unsw.edu.au", "position": "PhD student;Associate Professor;Lecturer;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nding2022domain,\ntitle={Domain Generalization by Learning and Removing Domain-specific Features},\nauthor={Yu Ding and Lei Wang and Bin Liang and Shuming Liang and Yang Wang and Fang Chen},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=37Rf7BTAtAM}\n}", "github": "", "project": "", "reviewers": "T9VV;MBUN;8K1L", "pdf_size": 489511, "rating": "4;6;8", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "2;2;4", "presentation": "3;3;4", "contribution": "2;2;4", "wc_summary": "91;79;83", "wc_strengths_and_weaknesses": "208;271;318", "wc_questions": "48;39;47", "wc_limitations": "15;1;154", "wc_review": "362;390;602", "wc_reply_reviewers": "71;99;118", "wc_reply_authors": "623;772;407", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 84.33333333333333, 4.988876515698588 ], "wc_strengths_and_weaknesses_avg": [ 265.6666666666667, 45.065384597148274 ], "wc_questions_avg": [ 44.666666666666664, 4.027681991198191 ], "wc_limitations_avg": [ 56.666666666666664, 69.06196895220666 ], "wc_review_avg": [ 451.3333333333333, 107.14890988194368 ], "wc_reply_reviewers_avg": [ 96.0, 19.30457631409368 ], "wc_reply_authors_avg": [ 600.6666666666666, 149.84510521053252 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5494103796376605602&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "uow.edu.au;uow.edu.au;uts.edu.au;uts.edu.au;uts.edu.au;unsw.edu.au", "author_num": 6, "aff_unique_index": "0;0;1;1;1;2", "aff_unique_norm": "University of Wollongong;University of Technology Sydney;University of New South Wales", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uow.edu.au;https://www.uts.edu.au;https://www.unsw.edu.au", "aff_unique_abbr": "UOW;UTS;UNSW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Neural Set Function Extensions: Learning with Discrete Functions in High Dimensions", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52994", "id": "39XK7VJ0sKG", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6294a235c0b80f0a2b224375c546c750-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=39XK7VJ0sKG", "openreview": "https://openreview.net/forum?id=39XK7VJ0sKG", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/52994", "video": "https://nips.cc/virtual/2022/poster/52994", "author_site": "Nikolaos Karalias, Joshua Robinson, Andreas Loukas, Stefanie Jegelka", "tldr": "We present a framework that extends discrete set functions onto continuous and high-dimensional domains with the purpose of using them in neural network architectures.", "abstract": "Integrating functions on discrete domains into neural networks is key to developing their capability to reason about discrete objects. But, discrete domains are (1) not naturally amenable to gradient-based optimization, and (2) incompatible with deep learning architectures that rely on representations in high-dimensional vector spaces. In this work, we address both difficulties for set functions, which capture many important discrete problems. First, we develop a framework for extending set functions onto low-dimensional continuous domains, where many extensions are naturally defined. Our framework subsumes many well-known extensions as special cases. Second, to avoid undesirable low-dimensional neural network bottlenecks, we convert low-dimensional extensions into representations in high-dimensional spaces, taking inspiration from the success of semidefinite programs for combinatorial optimization. Empirically, we observe benefits of our extensions for unsupervised neural combinatorial optimization, in particular with high-dimensional representations.", "keywords": "deep learning;unsupervised learning;combinatorial optimization;algorithmic reasoning", "primary_area": "", "supplementary_material": "/attachment/9c98e843f1e1ddab6c4951461f22d1796bf9809d.zip", "author": "Nikolaos Karalias;Joshua David Robinson;Andreas Loukas;Stefanie Jegelka", "authorids": "~Nikolaos_Karalias1;~Joshua_David_Robinson1;~Andreas_Loukas1;~Stefanie_Jegelka3", "gender": "M;M;M;F", "homepage": ";https://joshrobinson.mit.edu/;;http://people.csail.mit.edu/stefje/", "dblp": "267/9290;15/4759;19/10012;38/7003", "google_scholar": "CRLG9UcAAAAJ;E02doCkAAAAJ;https://scholar.google.ch/citations?user=-XGXJbQAAAAJ;gTWUZlsAAAAJ", "orcid": "0000-0002-9471-5343;;;", "linkedin": ";;;", "or_profile": "~Nikolaos_Karalias1;~Joshua_David_Robinson1;~Andreas_Loukas1;~Stefanie_Jegelka3", "aff": "Swiss Federal Institute of Technology Lausanne;Massachusetts Institute of Technology;Roche / Genentech;Massachusetts Institute of Technology", "aff_domain": "epfl.ch;mit.edu;roche.com;mit.edu", "position": "PhD student;PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nkaralias2022neural,\ntitle={Neural Set Function Extensions: Learning with Discrete Functions in High Dimensions},\nauthor={Nikolaos Karalias and Joshua David Robinson and Andreas Loukas and Stefanie Jegelka},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=39XK7VJ0sKG}\n}", "github": "", "project": "", "reviewers": "NU5a;Wk2P", "pdf_size": 552049, "rating": "6;8", "confidence": "2;2", "soundness": "3;3", "novelty": "3;4", "presentation": "2;4", "contribution": "3;4", "wc_summary": "70;132", "wc_strengths_and_weaknesses": "233;149", "wc_questions": "106;50", "wc_limitations": "43;49", "wc_review": "452;380", "wc_reply_reviewers": "33;73", "wc_reply_authors": "1112;691", "reply_reviewers": "1;1", "reply_authors": "3;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 1.0 ], "contribution_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 101.0, 31.0 ], "wc_strengths_and_weaknesses_avg": [ 191.0, 42.0 ], "wc_questions_avg": [ 78.0, 28.0 ], "wc_limitations_avg": [ 46.0, 3.0 ], "wc_review_avg": [ 416.0, 36.0 ], "wc_reply_reviewers_avg": [ 53.0, 20.0 ], "wc_reply_authors_avg": [ 901.5, 210.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11142300575635398098&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "epfl.ch;mit.edu;roche.com;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Massachusetts Institute of Technology;Roche", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://web.mit.edu;https://www.roche.com", "aff_unique_abbr": "EPFL;MIT;Roche", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Perceptual Attacks of No-Reference Image Quality Models with Human-in-the-Loop", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54368", "id": "3AV_53iRfTi", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/137cb5dd61b2685bd2623967daee6860-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3AV_53iRfTi", "openreview": "https://openreview.net/forum?id=3AV_53iRfTi", "poster": "/media/PosterPDFs/NeurIPS%202022/54368.png?t=1668571557.5535562", "slides": "https://nips.cc/virtual/2022/poster/54368", "video": "https://nips.cc/virtual/2022/poster/54368", "author_site": "Weixia Zhang, Dingquan Li, Xiongkuo Min, Guangtao Zhai, Guodong Guo, Xiaokang Yang, Kede Ma", "tldr": "", "abstract": "No-reference image quality assessment (NR-IQA) aims to quantify how humans perceive visual distortions of digital images without access to their undistorted references. NR-IQA models are extensively studied in computational vision, and are widely used for performance evaluation and perceptual optimization of man-made vision systems. Here we make one of the first attempts to examine the perceptual robustness of NR-IQA models. Under a Lagrangian formulation, we identify insightful connections of the proposed perceptual attack to previous beautiful ideas in computer vision and machine learning. We test one knowledge-driven and three data-driven NR-IQA methods under four full-reference IQA models (as approximations to human perception of just-noticeable differences). Through carefully designed psychophysical experiments, we find that all four NR-IQA models are vulnerable to the proposed perceptual attack. More interestingly, we observe that the generated counterexamples are not transferable, manifesting themselves as distinct design flows of respective NR-IQA methods. Source code are available at https://github.com/zwx8981/PerceptualAttack_BIQA.", "keywords": "", "primary_area": "", "supplementary_material": "", "author": "Weixia Zhang;Dingquan Li;Xiongkuo Min;Guangtao Zhai;Guodong Guo;Xiaokang Yang;Kede Ma", "authorids": "~Weixia_Zhang1;~Dingquan_Li1;minxiongkuo@sjtu.edu.cn;~Guangtao_Zhai1;~Guodong_Guo1;~Xiaokang_Yang1;~Kede_Ma2", "gender": "M;M;;M;M;M;", "homepage": ";https://lidq92.github.io;;https://faculty.sjtu.edu.cn/zhaiguangtao/en/index.htm;http://pages.cs.wisc.edu/~gdguo/;https://icne.sjtu.edu.cn/info/1064/1078.htm;https://kedema.org/", "dblp": "196/3124;https://dblp.uni-trier.de/pid/207/2000;;19/3230;92/4520;06/3071-1.html;127/1809", "google_scholar": "KK2nLnQAAAAJ;hdRPwGkAAAAJ;;E6zbSYgAAAAJ;f2Y5nygAAAAJ;yDEavdMAAAAJ;https://scholar.google.com.hk/citations?user=sfzOyFoAAAAJ", "orcid": ";0000-0002-5549-9027;;;;0000-0003-4029-3322;0000-0001-8608-1128", "linkedin": ";;;;;;", "or_profile": "~Weixia_Zhang1;~Dingquan_Li1;minxiongkuo@sjtu.edu.cn;~Guangtao_Zhai1;~Guodong_Guo1;~Xiaokang_Yang1;~Kede_Ma2", "aff": "Shanghai Jiaotong University;Pengcheng Laboratory ;;Shanghai Jiaotong University;West Virginia University;Shanghai Jiaotong University;City University of Hong Kong", "aff_domain": "sjtu.edu.cn;pcl.ac.cn;;sjtu.edu.cn;wvu.edu;sjtu.edu.cn;cityu.edu.hk", "position": "Postdoc;Postdoc;;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2022perceptual,\ntitle={Perceptual Attacks of No-Reference Image Quality Models with Human-in-the-Loop},\nauthor={Weixia Zhang and Dingquan Li and Xiongkuo Min and Guangtao Zhai and Guodong Guo and Xiaokang Yang and Kede Ma},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3AV_53iRfTi}\n}", "github": "", "project": "", "reviewers": "4XSx;Hmqo;qeax;1irv", "pdf_size": 8635696, "rating": "5;5;6;8", "confidence": "3;3;2;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;2;3;4", "contribution": "2;3;3;4", "wc_summary": "38;52;82;130", "wc_strengths_and_weaknesses": "85;154;66;129", "wc_questions": "99;13;59;239", "wc_limitations": "8;1;7;30", "wc_review": "230;220;214;528", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1299;773;416;1222", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.5, 35.25265947414464 ], "wc_strengths_and_weaknesses_avg": [ 108.5, 34.81738071710737 ], "wc_questions_avg": [ 102.5, 84.47928740229762 ], "wc_limitations_avg": [ 11.5, 11.01135777277262 ], "wc_review_avg": [ 298.0, 132.9135057095403 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 927.5, 357.15717828429547 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7492686492653551, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8403042660344902079&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;pcl.ac.cn;;sjtu.edu.cn;wvu.edu;sjtu.edu.cn;cityu.edu.hk", "author_num": 7, "aff_unique_index": "0;1;0;2;0;3", "aff_unique_norm": "Shanghai Jiao Tong University;Pengcheng Laboratory;West Virginia University;City University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;;https://www.wvu.edu;https://www.cityu.edu.hk", "aff_unique_abbr": "SJTU;;WVU;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "CEBaB: Estimating the Causal Effects of Real-World Concepts on NLP Model Behavior", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55045", "id": "3AbigH4s-ml", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/701ec28790b29a5bc33832b7bdc4c3b6-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3AbigH4s-ml", "openreview": "https://openreview.net/forum?id=3AbigH4s-ml", "poster": "/media/PosterPDFs/NeurIPS%202022/55045.png?t=1669029455.5157485", "slides": "https://nips.cc/virtual/2022/poster/55045", "video": "https://nips.cc/virtual/2022/poster/55045", "author_site": "Eldar D Abraham, Karel D'Oosterlinck, Amir Feder, Yair Gat, Atticus Geiger, Christopher Potts, Roi Reichart, Zhengxuan Wu", "tldr": "Casting model explanation as a causal inference problem, we introduce CEBaB, a new benchmark dataset for assessing explanation methods in NLP.", "abstract": "The increasing size and complexity of modern ML systems has improved their predictive capabilities but made their behavior harder to explain. Many techniques for model explanation have been developed in response, but we lack clear criteria for assessing these techniques. In this paper, we cast model explanation as the causal inference problem of estimating causal effects of real-world concepts on the output behavior of ML models given actual input data. We introduce CEBaB, a new benchmark dataset for assessing concept-based explanation methods in Natural Language Processing (NLP). CEBaB consists of short restaurant reviews with human-generated counterfactual reviews in which an aspect (food, noise, ambiance, service) of the dining experience was modified. Original and counterfactual reviews are annotated with multiply-validated sentiment ratings at the aspect-level and review-level. The rich structure of CEBaB allows us to go beyond input features to study the effects of abstract, real-world concepts on model behavior. We use CEBaB to compare the quality of a range of concept-based explanation methods covering different assumptions and conceptions of the problem, and we seek to establish natural metrics for comparative assessments of these methods.", "keywords": "Explainability;Causality;Benchmark;Causal Explanation", "primary_area": "", "supplementary_material": "/attachment/0708089121ec3e9908e1fb465b31877094667a86.pdf", "author": "Eldar David Abraham;Karel D'Oosterlinck;Amir Feder;Yair Ori Gat;Atticus Geiger;Christopher Potts;Roi Reichart;Zhengxuan Wu", "authorids": "~Eldar_David_Abraham1;~Karel_D'Oosterlinck1;~Amir_Feder1;~Yair_Ori_Gat1;~Atticus_Geiger1;~Christopher_Potts1;~Roi_Reichart1;~Zhengxuan_Wu1", "gender": "M;;;M;M;M;M;M", "homepage": "https://eldarab.github.io/;https://www.kareldoosterlinck.com/;https://www.amirfeder.com/;https://atticusg.github.io/;http://web.stanford.edu/~cgpotts/;https://roireichart.com/;https://cs.stanford.edu/~wuzhengx/;", "dblp": "321/1804;;214/3604;229/4086;13/2617;96/5429;234/4650;", "google_scholar": "1LTWz10AAAAJ;;ERwoPLIAAAAJ;;3j08YoAAAAAJ;https://scholar.google.co.il/citations?user=xXJIsh4AAAAJ;CBvE6lwAAAAJ;", "orcid": ";;0000-0001-5472-1135;;0000-0002-7978-6055;;;", "linkedin": ";karel-doosterlinck/;amir-feder-b65b7035/;;;roi-reichart-ba2a8a7/;;yair-gat/", "or_profile": "~Eldar_David_Abraham1;~Karel_D'Oosterlinck1;~Amir_Feder1;~Atticus_Geiger1;~Christopher_Potts1;~Roi_Reichart1;~Zhengxuan_Wu1;~Yair_Gat1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Stanford University;Technion - Israel Institute of Technology, Technion;Stanford University;Stanford University;Technion, Israel Institute of Technology;Stanford University;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology", "aff_domain": "campus.technion.ac.il;stanford.edu;technion.ac.il;stanford.edu;stanford.edu;technion.ac.il;stanford.edu;campus.technion.ac.il", "position": "MS student;PhD student;PhD student;PhD student;Full Professor;Associate Professor;MS student;MS student", "bibtex": "@inproceedings{\nabraham2022cebab,\ntitle={{CEB}aB: Estimating the Causal Effects of Real-World Concepts on {NLP} Model Behavior},\nauthor={Eldar David Abraham and Karel D'Oosterlinck and Amir Feder and Yair Ori Gat and Atticus Geiger and Christopher Potts and Roi Reichart and Zhengxuan Wu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3AbigH4s-ml}\n}", "github": "", "project": "", "reviewers": "Jcm3;T7tB;dBGa;NcbX", "pdf_size": 1740269, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "3;3;2;2", "presentation": "3;3;3;2", "contribution": "3;3;2;2", "wc_summary": "42;188;102;147", "wc_strengths_and_weaknesses": "61;138;93;203", "wc_questions": "14;38;231;22", "wc_limitations": "5;21;2;29", "wc_review": "122;385;428;401", "wc_reply_reviewers": "0;0;77;190", "wc_reply_authors": "223;400;1108;1651", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;2;4", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 119.75, 54.22349582976 ], "wc_strengths_and_weaknesses_avg": [ 123.75, 53.307480713310774 ], "wc_questions_avg": [ 76.25, 89.7618376594419 ], "wc_limitations_avg": [ 14.25, 11.166355717063647 ], "wc_review_avg": [ 334.0, 123.35923151511605 ], "wc_reply_reviewers_avg": [ 66.75, 77.79259283505083 ], "wc_reply_authors_avg": [ 845.5, 570.901261165186 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3091824843462176714&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "campus.technion.ac.il;stanford.edu;technion.ac.il;stanford.edu;stanford.edu;technion.ac.il;stanford.edu;campus.technion.ac.il", "author_num": 8, "aff_unique_index": "0;1;0;1;1;2;1;0", "aff_unique_norm": "Technion - Israel Institute of Technology;Stanford University;Israel Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.stanford.edu;https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion;Stanford;Technion", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0;1;1;0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "FIRE: Semantic Field of Words Represented as Non-Linear Functions", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54361", "id": "3AxaYRmJ2KY", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f08223bc8d177df6807811c32f5acfed-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3AxaYRmJ2KY", "openreview": "https://openreview.net/forum?id=3AxaYRmJ2KY", "poster": "/media/PosterPDFs/NeurIPS%202022/54361.png?t=1669386638.635764", "slides": "https://nips.cc/virtual/2022/poster/54361", "video": "https://nips.cc/virtual/2022/poster/54361", "author_site": "Xin Du, Kumiko Tanaka-Ishii", "tldr": "A novel semantic Field Representation (FIRE) for words and sentences, enabling nonlinear polysemy and linear compositionality in a unified framework.", "abstract": "State-of-the-art word embeddings presume a linear vector space, but this approach does not easily incorporate the nonlinearity that is necessary to represent polysemy. We thus propose a novel semantic FIeld REepresentation, called FIRE, which is a $D$-dimensional field in which every word is represented as a set of its locations and a nonlinear function covering the field. The strength of a word's relation to another word at a certain location is measured as the function value at that location. With FIRE, compositionality is represented via functional additivity, whereas polysemy is represented via the set of points and the function's multimodality. By implementing FIRE for English and comparing it with previous representation methods via word and sentence similarity tasks, we show that FIRE produces comparable or even better results. In an evaluation of polysemy to predict the number of word senses, FIRE greatly outperformed BERT and Word2vec, providing evidence of how FIRE represents polysemy. The code is available at https://github.com/kduxin/firelang.", "keywords": "natural language;nonlinear word representation;field representation;word polysemy;semantic compositionality", "primary_area": "", "supplementary_material": "/attachment/e0ac50c8e34093e367946604d4e3c06cee07dd50.pdf", "author": "Xin Du;Kumiko Tanaka-Ishii", "authorids": "~Xin_Du4;~Kumiko_Tanaka-Ishii2", "gender": "M;", "homepage": "https://kduxin.com;", "dblp": ";", "google_scholar": "8of0O7YAAAAJ;", "orcid": "0000-0001-9135-2906;", "linkedin": ";", "or_profile": "~Xin_Du4;~Kumiko_Tanaka-Ishii2", "aff": "The University of Tokyo;", "aff_domain": "u-tokyo.ac.jp;", "position": "PhD student;", "bibtex": "@inproceedings{\ndu2022semantic,\ntitle={Semantic Field of Words Represented as Non-Linear Functions},\nauthor={Xin Du and Kumiko Tanaka-Ishii},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3AxaYRmJ2KY}\n}", "github": "", "project": "", "reviewers": "GMJr;A5Nv;J2V2;awZz", "pdf_size": 5257829, "rating": "3;3;6;8", "confidence": "4;4;3;5", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "3;1;3;4", "contribution": "2;2;3;4", "wc_summary": "167;84;63;48", "wc_strengths_and_weaknesses": "465;166;447;52", "wc_questions": "130;37;66;111", "wc_limitations": "31;32;31;2", "wc_review": "793;319;607;213", "wc_reply_reviewers": "233;0;0;51", "wc_reply_authors": "955;661;783;401", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 45.98097432634502 ], "wc_strengths_and_weaknesses_avg": [ 282.5, 178.23369490643458 ], "wc_questions_avg": [ 86.0, 36.61283927804562 ], "wc_limitations_avg": [ 24.0, 12.708265027138834 ], "wc_review_avg": [ 483.0, 229.8216699965432 ], "wc_reply_reviewers_avg": [ 71.0, 95.82014401992934 ], "wc_reply_authors_avg": [ 700.0, 201.76471445721128 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Tk1J9dGqaqgJ:scholar.google.com/&scioq=FIRE:+Semantic+Field+of+Words+Represented+as+Non-Linear+Functions&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "u-tokyo.ac.jp;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Pile of Law: Learning Responsible Data Filtering from the Law and a 256GB Open-Source Legal Dataset", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55671", "id": "3HCT3xfNm9r", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/bc218a0c656e49d4b086975a9c785f47-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=3HCT3xfNm9r", "openreview": "https://openreview.net/forum?id=3HCT3xfNm9r", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/55671", "video": "https://nips.cc/virtual/2022/poster/55671", "author_site": "Peter Henderson, Mark Krass, Lucia Zheng, Neel Guha, Christopher D Manning, Dan Jurafsky, Daniel Ho", "tldr": "In this work we have examine how the law and legal data can inform data filtering practices and provide an extensive 256GB legal dataset (the Pile of Law) that can be used to learn these norms, and for pretraining.", "abstract": "One concern with the rise of large language models lies with their potential for significant harm, particularly from pretraining on biased, obscene, copyrighted, and private information. Emerging ethical approaches have attempted to filter pretraining material, but such approaches have been ad hoc and failed to take context into account. We offer an approach to filtering grounded in law, which has directly addressed the tradeoffs in filtering material. First, we gather and make available the Pile of Law, a ~256GB (and growing) dataset of open-source English-language legal and administrative data, covering court opinions, contracts, administrative rules, and legislative records. Pretraining on the Pile of Law may help with legal tasks that have the promise to improve access to justice. Second, we distill the legal norms that governments have developed to constrain the inclusion of toxic or private content into actionable lessons for researchers and discuss how our dataset reflects these norms. Third, we show how the Pile of Law offers researchers the opportunity to learn such filtering rules directly from the data, providing an exciting new research direction in model-based processing.", "keywords": "data curation;legal data;content filtering;ai and law", "primary_area": "", "supplementary_material": "/attachment/a533f84f4ac0976c9983bdd5d4f166be7b760dbc.pdf", "author": "Peter Henderson;Mark Simon Krass;Lucia Zheng;Neel Guha;Christopher D Manning;Dan Jurafsky;Daniel E. Ho", "authorids": "~Peter_Henderson1;~Mark_Simon_Krass1;~Lucia_Zheng1;~Neel_Guha1;~Christopher_D_Manning1;~Dan_Jurafsky1;~Daniel_E._Ho1", "gender": "M;M;F;M;M;M;M", "homepage": "http://www.peterhenderson.co/;https://markskrass.github.io/;;http://neelguha.com;https://nlp.stanford.edu/~manning/;http://web.stanford.edu/~jurafsky/;https://dho.stanford.edu", "dblp": "h/PeterHenderson2;;;130/0311;m/ChristopherDManning;31/985;240/9334", "google_scholar": "dy_JBs0AAAAJ;rq_dGZIAAAAJ;dlqLZAsAAAAJ;YI5N4HQAAAAJ;1zmDOdwAAAAJ;uZg9l58AAAAJ;", "orcid": ";0000-0002-4358-4830;;;0000-0001-6155-649X;;", "linkedin": "phende/;;;;christopher-manning-011575/;;", "or_profile": "~Peter_Henderson1;~Mark_Simon_Krass1;~Lucia_Zheng1;~Neel_Guha1;~Christopher_D_Manning1;~Dan_Jurafsky1;~Daniel_E._Ho1", "aff": "Stanford University;Stanford University;Stanford University;Stanford Law;Computer Science Department, Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;law.stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;MS student;JD;Full Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nhenderson2022pile,\ntitle={Pile of Law: Learning Responsible Data Filtering from the Law and a 256{GB} Open-Source Legal Dataset},\nauthor={Peter Henderson and Mark Simon Krass and Lucia Zheng and Neel Guha and Christopher D Manning and Dan Jurafsky and Daniel E. Ho},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=3HCT3xfNm9r}\n}", "github": "", "project": "", "reviewers": "RkuH;zrN6;DWPM;c6sX", "pdf_size": 1076382, "rating": "5;7;8;8", "confidence": "3;3;3;4", "wc_summary_and_contributions": "45;135;172;21", "wc_strengths": "70;31;26;29", "wc_weaknesses": "178;74;64;8", "wc_correctness": "20;5;11;3", "wc_clarity": "9;28;6;5", "wc_relation_to_prior_work": "53;1;1;17", "wc_documentation": "27;1;19;7", "wc_additional_feedback": "17;43;1;14", "wc_review": "419;318;300;104", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 93.25, 62.23493793682131 ], "wc_strengths_avg": [ 39.0, 17.98610574860495 ], "wc_weaknesses_avg": [ 81.0, 61.39218191268331 ], "wc_correctness_avg": [ 9.75, 6.609652033201143 ], "wc_clarity_avg": [ 12.0, 9.354143466934854 ], "wc_relation_to_prior_work_avg": [ 18.0, 21.236760581595302 ], "wc_documentation_avg": [ 13.5, 10.136567466356647 ], "wc_additional_feedback_avg": [ 18.75, 15.237699957670777 ], "wc_review_avg": [ 285.25, 114.05124944515076 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 114, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16242802812264116024&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 8, "email": "stanford.edu;stanford.edu;stanford.edu;law.stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hiding Images in Deep Probabilistic Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55110", "id": "3I8VTXMhuPx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/eec7fee9a8595ca964b9a11562767345-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3I8VTXMhuPx", "openreview": "https://openreview.net/forum?id=3I8VTXMhuPx", "poster": "/media/PosterPDFs/NeurIPS%202022/55110.png?t=1669535990.170695", "slides": "https://nips.cc/virtual/2022/poster/55110", "video": "https://nips.cc/virtual/2022/poster/55110", "author_site": "Haoyu Chen, Linqi Song, Zhenxing Qian, Xinpeng Zhang, Kede Ma", "tldr": "", "abstract": "Data hiding with deep neural networks (DNNs) has experienced impressive successes in recent years. A prevailing scheme is to train an autoencoder, consisting of an encoding network to embed (or transform) secret messages in (or into) a carrier, and a decoding network to extract the hidden messages. This scheme may suffer from several limitations regarding practicability, security, and embedding capacity. In this work, we describe a different computational framework to hide images in deep probabilistic models. Specifically, we use a DNN to model the probability density of cover images, and hide a secret image in one particular location of the learned distribution. As an instantiation, we adopt a SinGAN, a pyramid of generative adversarial networks (GANs), to learn the patch distribution of one cover image. We hide the secret image by fitting a deterministic mapping from a fixed set of noise maps (generated by an embedding key) to the secret image during patch distribution learning. The stego SinGAN, behaving as the original SinGAN, is publicly communicated; only the receiver with the embedding key is able to extract the secret image. We demonstrate the feasibility of our SinGAN approach in terms of extraction accuracy and model security. Moreover, we show the flexibility of the proposed method in terms of hiding multiple images for different receivers and obfuscating the secret image. ", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/527bc663fa4e3eb1a2a9ec6aa67ee3045db7a40e.pdf", "author": "Haoyu Chen;Linqi Song;Zhenxing Qian;Xinpeng Zhang;Kede Ma", "authorids": "~Haoyu_Chen5;~Linqi_Song1;~Zhenxing_Qian1;~Xinpeng_Zhang1;~Kede_Ma2", "gender": ";M;M;M;", "homepage": ";https://sites.google.com/site/aisquaredlab/;https://tomzqian.github.io/;;https://kedema.org/", "dblp": ";137/7963.html;43/8279;;127/1809", "google_scholar": ";UcGN3MoAAAAJ;90AsMtQAAAAJ;https://scholar.google.com.hk/citations?user=P76GtHwAAAAJ;https://scholar.google.com.hk/citations?user=sfzOyFoAAAAJ", "orcid": ";0000-0003-2756-4984;;0000-0001-5867-1315;0000-0001-8608-1128", "linkedin": ";;;;", "or_profile": "~Haoyu_Chen5;~Linqi_Song1;~Zhenxing_Qian1;~Xinpeng_Zhang1;~Kede_Ma2", "aff": ";City University of Hong Kong;Fudan University;Shanghai university;City University of Hong Kong", "aff_domain": ";cityu.edu.hk;fudan.edu.cn;shu.edu.cn;cityu.edu.hk", "position": ";Assistant Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2022hiding,\ntitle={Hiding Images in Deep Probabilistic Models},\nauthor={Haoyu Chen and Linqi Song and Zhenxing Qian and Xinpeng Zhang and Kede Ma},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3I8VTXMhuPx}\n}", "github": "", "project": "", "reviewers": "4fNE;ShzF;bS3X", "pdf_size": 2603606, "rating": "3;5;7", "confidence": "4;2;3", "soundness": "2;2;3", "novelty": "1;3;3", "presentation": "2;2;3", "contribution": "1;3;3", "wc_summary": "49;39;175", "wc_strengths_and_weaknesses": "154;251;7", "wc_questions": "59;65;1", "wc_limitations": "52;27;1", "wc_review": "314;382;184", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1183;758;92", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 5.0, 1.632993161855452 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 87.66666666666667, 61.88878914813427 ], "wc_strengths_and_weaknesses_avg": [ 137.33333333333334, 100.30730559413685 ], "wc_questions_avg": [ 41.666666666666664, 28.85981442921782 ], "wc_limitations_avg": [ 26.666666666666668, 20.82199691565522 ], "wc_review_avg": [ 293.3333333333333, 82.14350586355293 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 677.6666666666666, 449.0065577348385 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16034392138001028728&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": ";cityu.edu.hk;fudan.edu.cn;shu.edu.cn;cityu.edu.hk", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "City University of Hong Kong;Fudan University;Shanghai University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.fudan.edu.cn;https://www.shu.edu.cn", "aff_unique_abbr": "CityU;Fudan;SHU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "GREED: A Neural Framework for Learning Graph Distance Functions", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54507", "id": "3LBxVcnsEkV", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/8d492b8a6201d83d1015af9e264f0bf2-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3LBxVcnsEkV", "openreview": "https://openreview.net/forum?id=3LBxVcnsEkV", "poster": "/media/PosterPDFs/NeurIPS%202022/54507.png?t=1669859442.0786178", "slides": "https://nips.cc/virtual/2022/poster/54507", "video": "https://nips.cc/virtual/2022/poster/54507", "author_site": "Rishabh Ranjan, Siddharth Grover, Sourav Medya, Venkatesan Chakaravarthy, Yogish Sabharwal, Sayan Ranu", "tldr": "Learning graph and subgraph edit distance using graph neural networks", "abstract": "Similarity search in graph databases is one of the most fundamental operations in graph analytics. Among various distance functions, graph and subgraph edit distances (GED and SED respectively) are two of the most popular and expressive measures. Unfortunately, exact computations for both are NP-hard. To overcome this computational bottleneck, neural approaches to learn and predict edit distance in polynomial time have received much interest. While considerable progress has been made, there exist limitations that need to be addressed. First, the efficacy of an approximate distance function lies not only in its approximation accuracy, but also in the preservation of its properties. To elaborate, although GED is a metric, its neural approximations do not provide such a guarantee. This prohibits their usage in higher order tasks that rely on metric distance functions, such as clustering or indexing. Second, several existing frameworks for GED do not extend to SED due to SED being asymmetric. In this work, we design a novel siamese graph neural network called Greed, which through a carefully crafted inductive bias, learns GED and SED in a property-preserving manner. Through extensive experiments across $10$ real graph datasets containing up to $7$ million edges, we establish that Greed is not only more accurate than the state of the art, but also up to $3$ orders of magnitude faster. Even more significantly, due to preserving the triangle inequality, the generated embeddings are indexable and consequently, even in a CPU-only environment, Greed is up to $50$ times faster than GPU-powered computations of the closest baseline.", "keywords": "edit distance;subgraph edit distance;learning graph distance;graph neural networks", "primary_area": "", "supplementary_material": "/attachment/392806a9301c7230d37063eccd227ef5a273109d.pdf", "author": "Rishabh Ranjan;Siddharth Grover;Sourav Medya;Venkatesan Chakaravarthy;Yogish Sabharwal;Sayan Ranu", "authorids": "~Rishabh_Ranjan1;~Siddharth_Grover1;~Sourav_Medya1;~Venkatesan_Chakaravarthy1;~Yogish_Sabharwal1;~Sayan_Ranu2", "gender": "M;M;M;M;M;M", "homepage": "https://rishabh-ranjan.github.io;;https://souravmedya.github.io/;https://dblp.org/pid/c/VTChakaravarthy.html;https://www.cse.iitd.ac.in/~yogish;https://www.cse.iitd.ac.in/~sayan/index.html", "dblp": ";;178/3021;;57/3685.html;38/768", "google_scholar": "NNzQUrcAAAAJ;;RCFhOM4AAAAJ;https://scholar.google.co.in/citations?user=_3I7KHAAAAAJ;https://scholar.google.co.in/citations?user=vkw-hvEAAAAJ;K4w5qYUAAAAJ", "orcid": ";;0000-0003-0996-2807;;;0000-0003-4147-9372", "linkedin": ";siddharth-grover-173853184;sourav-medya-35987a49/;;;", "or_profile": "~Rishabh_Ranjan1;~Siddharth_Grover1;~Sourav_Medya1;~Venkatesan_Chakaravarthy1;~Yogish_Sabharwal1;~Sayan_Ranu2", "aff": "Indian Institute of Technology Delhi, Dhirubhai Ambani Institute Of Information and Communication Technology;Indian Institute of Technology Delhi;Northwestern University;;;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;northwestern.edu;;;iitd.ac.in", "position": "Undergrad student;Undergrad student;Postdoc;;;Associate Professor", "bibtex": "@inproceedings{\nranjan2022greed,\ntitle={{GREED}: A Neural Framework for Learning Graph Distance Functions},\nauthor={Rishabh Ranjan and Siddharth Grover and Sourav Medya and Venkatesan Chakaravarthy and Yogish Sabharwal and Sayan Ranu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3LBxVcnsEkV}\n}", "github": "", "project": "", "reviewers": "ebHE;ap6M;r5mu", "pdf_size": 735711, "rating": "6;6;6", "confidence": "5;5;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "contribution": "2;2;3", "wc_summary": "67;38;124", "wc_strengths_and_weaknesses": "278;350;301", "wc_questions": "54;81;119", "wc_limitations": "1;9;98", "wc_review": "400;478;642", "wc_reply_reviewers": "275;25;0", "wc_reply_authors": "1890;1068;1247", "reply_reviewers": "2;1;0", "reply_authors": "5;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.33333333333333, 35.72425257751689 ], "wc_strengths_and_weaknesses_avg": [ 309.6666666666667, 30.02591473303612 ], "wc_questions_avg": [ 84.66666666666667, 26.662499674428293 ], "wc_limitations_avg": [ 36.0, 43.96210489349511 ], "wc_review_avg": [ 506.6666666666667, 100.85413008675229 ], "wc_reply_reviewers_avg": [ 100.0, 124.1638702145945 ], "wc_reply_authors_avg": [ 1401.6666666666667, 352.9516806715742 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16530278228367950456&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 9, "email": "iitd.ac.in;iitd.ac.in;northwestern.edu;;;iitd.ac.in", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Indian Institute of Technology Delhi;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitd.ac.in;https://www.northwestern.edu", "aff_unique_abbr": "IIT Delhi;NU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "India;United States" }, { "title": "Reproducibility in Optimization: Theoretical Framework and Limits", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54471", "id": "3LMI8CHDb0g", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/7274ed909a312d4d869cc328ad1c5f04-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3LMI8CHDb0g", "openreview": "https://openreview.net/forum?id=3LMI8CHDb0g", "poster": "/media/PosterPDFs/NeurIPS%202022/501627aa14e37bd1d4143159e0e9620f.png?t=1667833916.3663945", "slides": "https://nips.cc/virtual/2022/poster/54471", "video": "https://nips.cc/virtual/2022/poster/54471", "author_site": "Kwangjun Ahn, Prateek Jain, Ziwei Ji, Satyen Kale, Praneeth Netrapalli, Gil I Shamir", "tldr": "We initiate a formal study of reproducibility in optimization by defining a quantitative measure and characterizing the fundamental limits for various settings.", "abstract": " We initiate a formal study of reproducibility in optimization. We define a quantitative measure of reproducibility of optimization procedures in the face of noisy or error-prone operations such as inexact or stochastic gradient computations or inexact initialization. We then analyze several convex optimization settings of interest such as smooth, non-smooth, and strongly-convex objective functions and establish tight bounds on the limits of reproducibility in each setting. Our analysis reveals a fundamental trade-off between computation and reproducibility: more computation is necessary (and sufficient) for better reproducibility.", "keywords": "reproducibility;first-order optimization;convex optimization;inexact gradient oracles", "primary_area": "", "supplementary_material": "/attachment/e3c4cd224171437e7cea5dd66ece9e3d39dc770a.pdf", "author": "Kwangjun Ahn;Prateek Jain;Ziwei Ji;Satyen Kale;Praneeth Netrapalli;Gil I. Shamir", "authorids": "~Kwangjun_Ahn2;~Prateek_Jain1;~Ziwei_Ji1;~Satyen_Kale2;~Praneeth_Netrapalli1;~Gil_I._Shamir1", "gender": ";M;M;;M;", "homepage": "http://kjahn.mit.edu/;http://prateekjain.org;https://jiziwei.github.io/;https://www.satyenkale.com;http://praneethnetrapalli.org/;", "dblp": ";https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html;176/4574.html=;52/4768;http://dblp.uni-trier.de/pers/hd/n/Netrapalli:Praneeth;22/4711", "google_scholar": "z94iNtgAAAAJ;qYhRbJoAAAAJ;3l_6H5sAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=mim8FQkAAAAJ;", "orcid": ";;;;;", "linkedin": ";;ziwei-ji-b1274899/;;;", "or_profile": "~Kwangjun_Ahn2;~Prateek_Jain1;~Ziwei_Ji1;~Satyen_Kale2;~Praneeth_Netrapalli1;~Gil_I._Shamir1", "aff": "Massachusetts Institute of Technology;Google;University of Illinois Urbana Champaign;Google;Google;Google", "aff_domain": "mit.edu;google.com;illinois.edu;google.com;google.com;google.com", "position": "PhD student;Researcher;PhD student;Research Scientist;Research Scientist;Google", "bibtex": "@inproceedings{\nahn2022reproducibility,\ntitle={Reproducibility in Optimization: Theoretical Framework and Limits},\nauthor={Kwangjun Ahn and Prateek Jain and Ziwei Ji and Satyen Kale and Praneeth Netrapalli and Gil I. Shamir},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3LMI8CHDb0g}\n}", "github": "", "project": "", "reviewers": "aZPs;YXF1;Vi1H", "pdf_size": 619623, "rating": "6;7;8", "confidence": "3;4;4", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "3;3;4", "contribution": "2;3;4", "wc_summary": "137;156;61", "wc_strengths_and_weaknesses": "71;570;198", "wc_questions": "1;70;155", "wc_limitations": "1;3;1", "wc_review": "210;799;415", "wc_reply_reviewers": "0;195;19", "wc_reply_authors": "6;679;296", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 118.0, 41.04469108991645 ], "wc_strengths_and_weaknesses_avg": [ 279.6666666666667, 211.74250609853678 ], "wc_questions_avg": [ 75.33333333333333, 62.983242921342885 ], "wc_limitations_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_review_avg": [ 474.6666666666667, 244.13156744309455 ], "wc_reply_reviewers_avg": [ 71.33333333333333, 87.78888818574302 ], "wc_reply_authors_avg": [ 327.0, 275.6241402102992 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6991753056766598239&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "mit.edu;google.com;illinois.edu;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Google;", "aff_unique_url": "https://web.mit.edu;https://www.google.com;https://illinois.edu", "aff_unique_abbr": "MIT;Google;UIUC", "aff_campus_unique_index": "1;2;1;1;1", "aff_campus_unique": ";Mountain View;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SAPipe: Staleness-Aware Pipeline for Data Parallel DNN Training", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55014", "id": "3MZnNARib5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/725ce5f2b1a8e2e0ac66994e7fefe375-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3MZnNARib5", "openreview": "https://openreview.net/forum?id=3MZnNARib5", "poster": "/media/PosterPDFs/NeurIPS%202022/55014.png?t=1668053049.8940196", "slides": "https://nips.cc/virtual/2022/poster/55014", "video": "https://nips.cc/virtual/2022/poster/55014", "author_site": "Yangrui Chen, Cong Xie, Meng Ma, Juncheng Gu, Yanghua Peng, Haibin Lin, Chuan Wu, Yibo Zhu", "tldr": "We design a performant and staleness-aware communication pipeline system for accelerating distributed DNN training. ", "abstract": "Data parallelism across multiple machines is widely adopted for accelerating distributed deep learning, but it is hard to achieve linear speedup due to the heavy communication. In this paper, we propose SAPipe, a performant system that pushes the training speed of data parallelism to its fullest extent. By introducing partial staleness, the communication overlaps the computation with minimal staleness in SAPipe. To mitigate additional problems incurred by staleness, SAPipe adopts staleness compensation techniques including weight prediction and delay compensation with provably lower error bounds. Additionally, SAPipe presents an algorithm-system co-design with runtime optimization to minimize system overhead for the staleness training pipeline and staleness compensation. We have implemented SAPipe in the BytePS framework, compatible to both TensorFlow and PyTorch. Our experiments show that SAPipe achieves up to 157% speedups over BytePS (non-stale), and outperforms PipeSGD in accuracy by up to 13.7%.", "keywords": "data parallelism;communication optimization;staleness mitigation", "primary_area": "", "supplementary_material": "/attachment/29783b1932c42d51f8eefc08c1bfb67a781d1e02.pdf", "author": "Yangrui Chen;Cong Xie;Meng Ma;Juncheng Gu;Yanghua Peng;Haibin Lin;Chuan Wu;Yibo Zhu", "authorids": "~Yangrui_Chen1;~Cong_Xie1;~Meng_Ma2;~Juncheng_Gu1;~Yanghua_Peng1;~Haibin_Lin1;~Chuan_Wu1;~Yibo_Zhu1", "gender": "M;M;M;M;M;;;", "homepage": ";https://congxie1108.github.io/;;https://gujuncheng.info/;;;https://i.cs.hku.hk/~cwu/;http://yibozhu.com", "dblp": ";;;162/9944.html;;;34/3772-1;", "google_scholar": "3dX7CzYAAAAJ;pIPJUJMAAAAJ;jrLoX3wAAAAJ;gJix6K4AAAAJ;Gf9amnoAAAAJ;;mY7MWXMAAAAJ;pZgFp1kAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Yangrui_Chen1;~Cong_Xie1;~Meng_Ma2;~Juncheng_Gu1;~Yanghua_Peng1;~Haibin_Lin1;~Chuan_Wu1;~Yibo_Zhu1", "aff": "the University of Hong Kong, University of Hong Kong;ByteDance Inc.;;ByteDance Inc;ByteDance Inc.;;The University of Hong Kong;", "aff_domain": "cs.hku.hk;bytedance.com;;bytedance.com;bytedance.com;;hku.hk;", "position": "PhD student;Researcher;;Researcher;Researcher;;Full Professor;", "bibtex": "@inproceedings{\nchen2022sapipe,\ntitle={{SAP}ipe: Staleness-Aware Pipeline for Data Parallel {DNN} Training},\nauthor={Yangrui Chen and Cong Xie and Meng Ma and Juncheng Gu and Yanghua Peng and Haibin Lin and Chuan Wu and Yibo Zhu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3MZnNARib5}\n}", "github": "", "project": "", "reviewers": "oko6;5UGd;9R6e", "pdf_size": 4425206, "rating": "5;6;6", "confidence": "3;4;4", "soundness": "3;2;3", "novelty": "3;2;2", "presentation": "2;3;3", "contribution": "3;2;2", "wc_summary": "160;53;40", "wc_strengths_and_weaknesses": "288;143;120", "wc_questions": "250;138;1", "wc_limitations": "82;40;33", "wc_review": "780;374;194", "wc_reply_reviewers": "0;326;178", "wc_reply_authors": "640;2320;480", "reply_reviewers": "0;2;1", "reply_authors": "1;4;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.33333333333333, 53.766986979827024 ], "wc_strengths_and_weaknesses_avg": [ 183.66666666666666, 74.36994614015052 ], "wc_questions_avg": [ 129.66666666666666, 101.82446769918428 ], "wc_limitations_avg": [ 51.666666666666664, 21.63844315615664 ], "wc_review_avg": [ 449.3333333333333, 245.09227287334502 ], "wc_reply_reviewers_avg": [ 168.0, 133.27665461987957 ], "wc_reply_authors_avg": [ 1146.6666666666667, 832.2392818307859 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8118397710849874066&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "cs.hku.hk;bytedance.com;;bytedance.com;bytedance.com;;hku.hk;", "author_num": 8, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Hong Kong;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.bytedance.com", "aff_unique_abbr": "HKU;ByteDance", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Two-Player Markov Games: Neural Function Approximation and Correlated Equilibrium", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53944", "id": "3PAIKtWQsc", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d6f681da2151687df12cc21a1c1e3527-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3PAIKtWQsc", "openreview": "https://openreview.net/forum?id=3PAIKtWQsc", "poster": "/media/PosterPDFs/NeurIPS%202022/53944.png?t=1669756810.0374682", "slides": "https://nips.cc/virtual/2022/poster/53944", "video": "https://nips.cc/virtual/2022/poster/53944", "author_site": "Chris Junchi Li, Dongruo Zhou, Quanquan Gu, Michael Jordan", "tldr": "", "abstract": "We consider learning Nash equilibria in two-player zero-sum Markov Games with nonlinear function approximation, where the action-value function is approximated by a function in a Reproducing Kernel Hilbert Space (RKHS). The key challenge is how to do exploration in the high-dimensional function space. We propose a novel online learning algorithm to find a Nash equilibrium by minimizing the duality gap. At the core of our algorithms are upper and lower confidence bounds that are derived based on the principle of optimism in the face of uncertainty. We prove that our algorithm is able to attain an $O(\\sqrt{T})$ regret with polynomial computational complexity, under very mild assumptions on the reward function and the underlying dynamic of the Markov Games. We also propose several extensions of our algorithm, including an algorithm with Bernstein-type bonus that can achieve a tighter regret bound, and another algorithm for model misspecification that can be applied to neural network function approximation.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/18a6f690d14b8371e83727c897bbcaa0a93a9147.pdf", "author": "Chris Junchi Li;Dongruo Zhou;Quanquan Gu;Michael Jordan", "authorids": "~Chris_Junchi_Li1;~Dongruo_Zhou1;~Quanquan_Gu1;~Michael_Jordan1", "gender": "M;M;M;M", "homepage": ";;http://web.cs.ucla.edu/~qgu/;http://www.cs.berkeley.edu/~jordan/", "dblp": ";215/3401;50/4597;j/MichaelIJordan", "google_scholar": "cHN3PVYAAAAJ;1780wr0AAAAJ;GU9HgNAAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ", "orcid": ";;;0000-0001-8935-817X", "linkedin": ";;;", "or_profile": "~Chris_Junchi_Li1;~Dongruo_Zhou1;~Quanquan_Gu1;~Michael_Jordan1", "aff": "University of California, Berkeley;University of California, Los Angeles;University of California, Los Angeles;University of California, Berkeley", "aff_domain": "berkeley.edu;cs.ucla.edu;cs.ucla.edu;berkeley.edu", "position": "Visiting Scientist;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2022learning,\ntitle={Learning Two-Player Markov Games: Neural Function Approximation and Correlated Equilibrium},\nauthor={Chris Junchi Li and Dongruo Zhou and Quanquan Gu and Michael Jordan},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3PAIKtWQsc}\n}", "github": "", "project": "", "reviewers": "3y5m;QrN9;nHpk;ExxC", "pdf_size": 322552, "rating": "6;6;7;7", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "contribution": "2;2;3;3", "wc_summary": "91;78;163;68", "wc_strengths_and_weaknesses": "228;36;30;59", "wc_questions": "216;79;242;1", "wc_limitations": "13;1;7;1", "wc_review": "548;194;442;129", "wc_reply_reviewers": "285;0;63;0", "wc_reply_authors": "616;378;809;5", "reply_reviewers": "3;0;2;0", "reply_authors": "4;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.0, 37.275997639231605 ], "wc_strengths_and_weaknesses_avg": [ 88.25, 81.40753957711779 ], "wc_questions_avg": [ 134.5, 98.86986396268581 ], "wc_limitations_avg": [ 5.5, 4.9749371855331 ], "wc_review_avg": [ 328.25, 172.44763698004098 ], "wc_reply_reviewers_avg": [ 87.0, 117.17294909662384 ], "wc_reply_authors_avg": [ 452.0, 299.8457937006954 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15461157888490476545&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "berkeley.edu;cs.ucla.edu;cs.ucla.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of California, Berkeley;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.ucla.edu", "aff_unique_abbr": "UC Berkeley;UCLA", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Berkeley;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reinforcement Learning with Neural Radiance Fields", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53487", "id": "3SLW-YIw7tX", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/6c294f059e3d77d58dbb8fe48f21fe00-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3SLW-YIw7tX", "openreview": "https://openreview.net/forum?id=3SLW-YIw7tX", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53487", "video": "https://nips.cc/virtual/2022/poster/53487", "author_site": "Danny Driess, Ingmar Schubert, Pete Florence, Yunzhu Li, Marc Toussaint", "tldr": "We learn state representations of scenes using supervision from neural radiance fields, and show that using these in downstream reinforcement learning tasks improves sample efficiency.", "abstract": "It is a long-standing problem to find effective representations for training reinforcement learning (RL) agents. This paper demonstrates that learning state representations with supervision from Neural Radiance Fields (NeRFs) can improve the performance of RL compared to other learned representations or even low-dimensional, hand-engineered state information. Specifically, we propose to train an encoder that maps multiple image observations to a latent space describing the objects in the scene. The decoder built from a latent-conditioned NeRF serves as the supervision signal to learn the latent space. An RL algorithm then operates on the learned latent space as its state representation. We call this NeRF-RL. Our experiments indicate that NeRF as supervision leads to a latent space better suited for the downstream RL tasks involving robotic object manipulations like hanging mugs on hooks, pushing objects, or opening doors.\nVideo: https://dannydriess.github.io/nerf-rl", "keywords": "RL;NeRF;Computer Vision;Representation Learning;Robotic Manipulation;Neural Implicit Representations", "primary_area": "", "supplementary_material": "/attachment/2c9ba562651ad6993e126ec86b836ed9cf86b97e.zip", "author": "Danny Driess;Ingmar Schubert;Pete Florence;Yunzhu Li;Marc Toussaint", "authorids": "~Danny_Driess1;~Ingmar_Schubert1;~Pete_Florence1;~Yunzhu_Li1;~Marc_Toussaint3", "gender": ";;;M;M", "homepage": "https://dannydriess.github.io/;https://ingmarschubert.com/;http://www.peteflorence.com/;https://yunzhuli.github.io/;https://www.user.tu-berlin.de/mtoussai/", "dblp": ";295/5172;;182/1831;t/MarcToussaint", "google_scholar": "https://scholar.google.de/citations?user=wxnzyjwAAAAJ;;;WlA92lcAAAAJ;t2X4Mg8AAAAJ", "orcid": ";;;;0000-0002-5487-6767", "linkedin": ";ingmar-schubert-3292a9160/;;;marctoussaint/", "or_profile": "~Danny_Driess1;~Ingmar_Schubert1;~Pete_Florence1;~Yunzhu_Li1;~Marc_Toussaint3", "aff": "Technische Universit\u00e4t Berlin;Google DeepMind;Google;Massachusetts Institute of Technology;TU Berlin", "aff_domain": "tu-berlin.de;deepmind.com;google.com;mit.edu;tu-berlin.de", "position": "PhD student;Intern;Research Scientist;PhD student;Full Professor", "bibtex": "@inproceedings{\ndriess2022reinforcement,\ntitle={Reinforcement Learning with Neural Radiance Fields},\nauthor={Danny Driess and Ingmar Schubert and Pete Florence and Yunzhu Li and Marc Toussaint},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3SLW-YIw7tX}\n}", "github": "", "project": "", "reviewers": "aWEL;iBq4;WpRR", "pdf_size": 1180730, "rating": "5;6;7", "confidence": "5;4;4", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "3;3;3", "contribution": "2;3;4", "wc_summary": "46;108;66", "wc_strengths_and_weaknesses": "65;284;49", "wc_questions": "111;102;14", "wc_limitations": "3;27;9", "wc_review": "225;521;138", "wc_reply_reviewers": "49;85;0", "wc_reply_authors": "1034;1816;159", "reply_reviewers": "1;1;0", "reply_authors": "3;5;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 73.33333333333333, 25.837096500101467 ], "wc_strengths_and_weaknesses_avg": [ 132.66666666666666, 107.20800135976584 ], "wc_questions_avg": [ 75.66666666666667, 43.75944342526409 ], "wc_limitations_avg": [ 13.0, 10.198039027185569 ], "wc_review_avg": [ 294.6666666666667, 163.9356242214879 ], "wc_reply_reviewers_avg": [ 44.666666666666664, 34.83612429010374 ], "wc_reply_authors_avg": [ 1003.0, 676.8224779561231 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4763020707689070986&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "tu-berlin.de;deepmind.com;google.com;mit.edu;tu-berlin.de", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Google;Massachusetts Institute of Technology", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.tu-berlin.de;https://deepmind.com;https://web.mit.edu", "aff_unique_abbr": "TU Berlin;DeepMind;MIT", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Berlin", "aff_country_unique_index": "0;1;2;2;0", "aff_country_unique": "Germany;United Kingdom;United States" }, { "title": "Double Check Your State Before Trusting It: Confidence-Aware Bidirectional Offline Model-Based Imagination", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54373", "id": "3e3IQMLDSLP", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f9e2800a251fa9107a008104f47c45d1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3e3IQMLDSLP", "openreview": "https://openreview.net/forum?id=3e3IQMLDSLP", "poster": "/media/PosterPDFs/NeurIPS%202022/5516adb142fcb18a017c72602abbdb6d.png?t=1666243576.4708884", "slides": "https://nips.cc/virtual/2022/poster/54373", "video": "https://nips.cc/virtual/2022/poster/54373", "author_site": "Jiafei Lyu, Xiu Li, Zongqing Lu", "tldr": "We introduce a simple yet effective way for selecting trustworthy transitions for data augmentation in offline RL.", "abstract": "The learned policy of model-free offline reinforcement learning (RL) methods is often constrained to stay within the support of datasets to avoid possible dangerous out-of-distribution actions or states, making it challenging to handle out-of-support region. Model-based RL methods offer a richer dataset and benefit generalization by generating imaginary trajectories with either trained forward or reverse dynamics model. However, the imagined transitions may be inaccurate, thus downgrading the performance of the underlying offline RL method. In this paper, we propose to augment the offline dataset by using trained bidirectional dynamics models and rollout policies with double check. We introduce conservatism by trusting samples that the forward model and backward model agree on. Our method, confidence-aware bidirectional offline model-based imagination, generates reliable samples and can be combined with any model-free offline RL method. Experimental results on the D4RL benchmarks demonstrate that our method significantly boosts the performance of existing model-free offline RL algorithms and achieves competitive or better scores against baseline methods.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/814048fad7db7ed39e1f2381b523b56de9cfe514.pdf", "author": "Jiafei Lyu;Xiu Li;Zongqing Lu", "authorids": "~Jiafei_Lyu1;~Xiu_Li1;~Zongqing_Lu2", "gender": "M;F;", "homepage": ";https://thusigsiclab.github.io/thu.github.io/introduction.html;", "dblp": "278/1503;13/1206-1;", "google_scholar": "bfgCMr8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0001-6616-417X;0000-0003-0403-1923;", "linkedin": ";;", "or_profile": "~Jiafei_Lyu1;~Xiu_Li1;~Zongqing_Lu2", "aff": "Tsinghua University;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;", "position": "PhD student;Professor;", "bibtex": "@inproceedings{\nlyu2022double,\ntitle={Double Check Your State Before Trusting It: Confidence-Aware Bidirectional Offline Model-Based Imagination},\nauthor={Jiafei Lyu and Xiu Li and Zongqing Lu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3e3IQMLDSLP}\n}", "github": "", "project": "", "reviewers": "rJ1S;iDjK;ytiJ", "pdf_size": 1140305, "rating": "6;6;7", "confidence": "4;5;5", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "3;3;4", "contribution": "3;2;3", "wc_summary": "79;134;64", "wc_strengths_and_weaknesses": "262;125;150", "wc_questions": "6;62;69", "wc_limitations": "1;24;108", "wc_review": "348;345;391", "wc_reply_reviewers": "0;0;17", "wc_reply_authors": "1631;713;1430", "reply_reviewers": "0;0;1", "reply_authors": "3;1;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 92.33333333333333, 30.09245014211298 ], "wc_strengths_and_weaknesses_avg": [ 179.0, 59.570686303472 ], "wc_questions_avg": [ 45.666666666666664, 28.193773938387338 ], "wc_limitations_avg": [ 44.333333333333336, 45.98792111945146 ], "wc_review_avg": [ 361.3333333333333, 21.01322334996598 ], "wc_reply_reviewers_avg": [ 5.666666666666667, 8.013876853447538 ], "wc_reply_authors_avg": [ 1258.0, 394.01269015096454 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=360756721662557774&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Geoclidean: Few-Shot Generalization in Euclidean Geometry", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55760", "id": "3lk54yE2tYJ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/feb34ce77fc8b94c85d12e608b23ce67-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=3lk54yE2tYJ", "openreview": "https://openreview.net/forum?id=3lk54yE2tYJ", "poster": "/media/PosterPDFs/NeurIPS%202022/55760.png?t=1669099976.9175465", "slides": "https://nips.cc/virtual/2022/poster/55760", "video": "https://nips.cc/virtual/2022/poster/55760", "author_site": "Joy Hsu, Jiajun Wu, Noah Goodman", "tldr": "A study of few-shot generalization of human and vision models in Euclidean geometry concepts.", "abstract": "Euclidean geometry is among the earliest forms of mathematical thinking. While the geometric primitives underlying its constructions, such as perfect lines and circles, do not often occur in the natural world, humans rarely struggle to perceive and reason with them. Will computer vision models trained on natural images show the same sensitivity to Euclidean geometry? Here we explore these questions by studying few-shot generalization in the universe of Euclidean geometry constructions. We introduce Geoclidean, a domain-specific language for Euclidean geometry, and use it to generate two datasets of geometric concept learning tasks for benchmarking generalization judgements of humans and machines. We find that humans are indeed sensitive to Euclidean geometry and generalize strongly from a few visual examples of a geometric concept. In contrast, low-level and high-level visual features from standard computer vision models pretrained on natural images do not support correct generalization. Thus Geoclidean represents a novel few-shot generalization benchmark for geometric concept learning, where the performance of humans and of AI models diverge. The Geoclidean framework and dataset are publicly available for download.\n", "keywords": "geometry;concept learning;few-shot generalization", "primary_area": "", "supplementary_material": "/attachment/ad647b0281885671350df61db8094cdaab387a41.pdf", "author": "Joy Hsu;Jiajun Wu;Noah Goodman", "authorids": "~Joy_Hsu2;~Jiajun_Wu1;~Noah_Goodman1", "gender": "F;M;", "homepage": "https://web.stanford.edu/~joycj/;https://jiajunwu.com;https://cocolab.stanford.edu/", "dblp": "258/5012;117/4768;96/1216", "google_scholar": "Zr7RJT4AAAAJ;2efgcS0AAAAJ;OUpIbcQAAAAJ", "orcid": ";0000-0002-4176-343X;", "linkedin": ";jiajunwu/;", "or_profile": "~Joy_Hsu2;~Jiajun_Wu1;~Noah_Goodman1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhsu2022geoclidean,\ntitle={Geoclidean: Few-Shot Generalization in Euclidean Geometry},\nauthor={Joy Hsu and Jiajun Wu and Noah Goodman},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=3lk54yE2tYJ}\n}", "github": "", "project": "", "reviewers": "V6yt;qXzh;bmE1;yPZW;TsFh", "pdf_size": 3640652, "rating": "6;7;7;8;8", "confidence": "4;4;3;3;4", "wc_summary_and_contributions": "83;68;79;77;171", "wc_strengths": "60;80;118;57;24", "wc_weaknesses": "226;96;270;28;34", "wc_correctness": "7;49;106;9;3", "wc_clarity": "5;14;8;12;4", "wc_relation_to_prior_work": "12;1;55;13;17", "wc_documentation": "1;4;9;23;3", "wc_additional_feedback": "1;14;66;14;1", "wc_review": "395;326;711;233;257", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "559;966;1080;40;47", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;2;2;1;1", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 95.6, 38.018942647054246 ], "wc_strengths_avg": [ 67.8, 30.87005021051958 ], "wc_weaknesses_avg": [ 130.8, 99.58795107843117 ], "wc_correctness_avg": [ 34.8, 39.29580130242925 ], "wc_clarity_avg": [ 8.6, 3.8781438859330635 ], "wc_relation_to_prior_work_avg": [ 19.6, 18.478095139921752 ], "wc_documentation_avg": [ 8.0, 7.9498427657407165 ], "wc_additional_feedback_avg": [ 19.2, 24.111408088288826 ], "wc_review_avg": [ 384.4, 172.82546108719052 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 538.4, 439.65286306357655 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15302234923717650723&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sequence Model Imitation Learning with Unobserved Contexts", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53941", "id": "3nbKUphLBg5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/708e58b0b99e3e62d42022b4564bad7a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3nbKUphLBg5", "openreview": "https://openreview.net/forum?id=3nbKUphLBg5", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53941", "video": "https://nips.cc/virtual/2022/poster/53941", "author_site": "Gokul Swamy, Sanjiban Choudhury, J. Bagnell, Steven Wu", "tldr": "We theoretically characterize and empirically validate how off-policy and on-policy imitation learning algorithms handle hidden state.", "abstract": "We consider imitation learning problems where the learner's ability to mimic the expert increases throughout the course of an episode as more information is revealed. One example of this is when the expert has access to privileged information: while the learner might not be able to accurately reproduce expert behavior early on in an episode, by considering the entire history of states and actions, they might be able to eventually identify the hidden context and act as the expert would. We prove that on-policy imitation learning algorithms (with or without access to a queryable expert) are better equipped to handle these sorts of asymptotically realizable problems than off-policy methods. This is because on-policy algorithms provably learn to recover from their initially suboptimal actions, while off-policy methods treat their suboptimal past actions as though they came from the expert. This often manifests as a latching behavior: a naive repetition of past actions. We conduct experiments in a toy bandit domain that show that there exist sharp phase transitions of whether off-policy approaches are able to match expert performance asymptotically, in contrast to the uniformly good performance of on-policy approaches. We demonstrate that on several continuous control tasks, on-policy approaches are able to use history to identify the context while off-policy approaches actually perform worse when given access to history.", "keywords": "imitation learning;causal inference", "primary_area": "", "supplementary_material": "/attachment/c1e9ca1e1c71503b89efd80709f29bb6526ee6b8.pdf", "author": "Gokul Swamy;Sanjiban Choudhury;Drew Bagnell;Steven Wu", "authorids": "~Gokul_Swamy1;~Sanjiban_Choudhury2;~Drew_Bagnell2;~Steven_Wu1", "gender": ";M;;M", "homepage": "https://gokul.dev/;http://www.sanjibanchoudhury.com/;https://robotwhisperer.org/;https://zstevenwu.com/", "dblp": "31/11509;;;137/8350", "google_scholar": "Sbpra_AAAAAJ;;7t4jbPQAAAAJ;MbF6rTEAAAAJ", "orcid": ";;;", "linkedin": ";;;zstevenwu/", "or_profile": "~Gokul_Swamy1;~Sanjiban_Choudhury2;~Drew_Bagnell2;~Zhiwei_Steven_Wu1", "aff": "Carnegie Mellon University;;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu;cmu.edu", "position": "PhD student;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nswamy2022sequence,\ntitle={Sequence Model Imitation Learning with Unobserved Contexts},\nauthor={Gokul Swamy and Sanjiban Choudhury and Drew Bagnell and Steven Wu},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3nbKUphLBg5}\n}", "github": "", "project": "", "reviewers": "Pth8;GRay;wNyh", "pdf_size": 726869, "rating": "3;6;7", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "1;3;3", "presentation": "1;2;3", "contribution": "1;3;3", "wc_summary": "60;41;133", "wc_strengths_and_weaknesses": "341;49;327", "wc_questions": "115;111;101", "wc_limitations": "19;9;10", "wc_review": "535;210;571", "wc_reply_reviewers": "95;0;0", "wc_reply_authors": "1017;509;494", "reply_reviewers": "1;0;0", "reply_authors": "3;2;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "contribution_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 78.0, 39.65686153324121 ], "wc_strengths_and_weaknesses_avg": [ 239.0, 134.47180621478492 ], "wc_questions_avg": [ 109.0, 5.887840577551898 ], "wc_limitations_avg": [ 12.666666666666666, 4.496912521077347 ], "wc_review_avg": [ 438.6666666666667, 162.35831429964472 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 44.78342947514801 ], "wc_reply_authors_avg": [ 673.3333333333334, 243.08617584899577 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2920440114291350523&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "cmu.edu;;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Quo Vadis: Is Trajectory Forecasting the Key Towards Long-Term Multi-Object Tracking?", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54937", "id": "3r0yLLCo4fF", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/647dc4a76b3efdd676f50f32949299a8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3r0yLLCo4fF", "openreview": "https://openreview.net/forum?id=3r0yLLCo4fF", "poster": "/media/PosterPDFs/NeurIPS%202022/54937.png?t=1669639120.69344", "slides": "https://nips.cc/virtual/2022/poster/54937", "video": "https://nips.cc/virtual/2022/poster/54937", "author_site": "Patrick Dendorfer, Vladimir Yugay, Aljosa Osep, Laura Leal-Taix\u00e9", "tldr": "Stochastic trajectory prediction to solving long-term occlusions in pedestrian multi-object tracking.", "abstract": "Recent developments in monocular multi-object tracking have been very successful in tracking visible objects and bridging short occlusion gaps, mainly relying on data-driven appearance models. \nWhile significant advancements have been made in short-term tracking performance, bridging longer occlusion gaps remains elusive: state-of-the-art object trackers only bridge less than 10% of occlusions longer than three seconds. \nWe suggest that the missing key is reasoning about future trajectories over a longer time horizon. Intuitively, the longer the occlusion gap, the larger the search space for possible associations. \nIn this paper, we show that even a small yet diverse set of trajectory predictions for moving agents will significantly reduce this search space and thus improve long-term tracking robustness. Our experiments suggest that the crucial components of our approach are reasoning in a bird's-eye view space and generating a small yet diverse set of forecasts while accounting for their localization uncertainty. This way, we can advance state-of-the-art trackers on the MOTChallenge dataset and significantly improve their long-term tracking performance. This paper's source code and experimental data are available at https://github.com/dendorferpatrick/QuoVadis.", "keywords": "multi-object tracking;tracking;trajectory prediction;computer vision", "primary_area": "", "supplementary_material": "/attachment/4bdfde367ea8abd4c7657ef3cfa9824097234255.zip", "author": "Patrick Dendorfer;Vladimir Yugay;Aljosa Osep;Laura Leal-Taix\u00e9", "authorids": "~Patrick_Dendorfer1;~Vladimir_Yugay1;~Aljosa_Osep2;~Laura_Leal-Taix\u00e91", "gender": ";M;M;F", "homepage": "https://dvl.in.tum.de/team/dendorfer/;https://vladimiryugay.github.io/;https://aljosaosep.github.io;https://dvl.in.tum.de/team/lealtaixe/", "dblp": ";;128/7869;47/8483", "google_scholar": ";sae4r88AAAAJ;https://scholar.google.de/citations?user=X7EN55cAAAAJ;tT2TC-UAAAAJ", "orcid": ";;0000-0001-8105-4737;", "linkedin": ";vladimir-yugay;;", "or_profile": "~Patrick_Dendorfer1;~Vladimir_Yugay1;~Aljosa_Osep2;~Laura_Leal-Taix\u00e91", "aff": "Department of Informatics, Technical University Munich;Department of Informatics, Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;Technical University Munich", "aff_domain": "in.tum.de;in.tum.de;tum.de;tum.de", "position": "PhD student;MS student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ndendorfer2022quo,\ntitle={Quo Vadis: Is Trajectory Forecasting the Key Towards Long-Term Multi-Object Tracking?},\nauthor={Patrick Dendorfer and Vladimir Yugay and Aljosa Osep and Laura Leal-Taix{\\'e}},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3r0yLLCo4fF}\n}", "github": "", "project": "", "reviewers": "yuJE;Tgjz;cbmW;XrjC", "pdf_size": 6773540, "rating": "5;6;6;7", "confidence": "5;4;5;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "contribution": "2;3;3;3", "wc_summary": "117;66;40;122", "wc_strengths_and_weaknesses": "425;255;166;261", "wc_questions": "133;128;2;94", "wc_limitations": "4;56;8;19", "wc_review": "679;505;216;496", "wc_reply_reviewers": "0;25;45;15", "wc_reply_authors": "929;793;365;389", "reply_reviewers": "0;1;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 34.54254622925183 ], "wc_strengths_and_weaknesses_avg": [ 276.75, 93.49431800917101 ], "wc_questions_avg": [ 89.25, 52.561273766909416 ], "wc_limitations_avg": [ 21.75, 20.522853115490545 ], "wc_review_avg": [ 474.0, 165.85686600198378 ], "wc_reply_reviewers_avg": [ 21.25, 16.345871038277526 ], "wc_reply_authors_avg": [ 619.0, 246.8764873372918 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17768927827009981298&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "in.tum.de;in.tum.de;tum.de;tum.de", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": "Department of Informatics;Department of Informatics;", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM;TUM", "aff_campus_unique_index": "0", "aff_campus_unique": "Munich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Diffusion-LM Improves Controllable Text Generation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53701", "id": "3s9IrEsjLyk", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/1be5bc25d50895ee656b8c2d9eb89d6a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3s9IrEsjLyk", "openreview": "https://openreview.net/forum?id=3s9IrEsjLyk", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53701", "video": "https://nips.cc/virtual/2022/poster/53701", "author_site": "Xiang Li, John Thickstun, Ishaan Gulrajani, Percy Liang, Tatsunori Hashimoto", "tldr": "We propose a non-autoregressive language model based on continuous diffusions, which demonstrate strong performance in controllable text generation. ", "abstract": "Controlling the behavior of language models (LMs) without re-training is a major open problem in natural language generation. While recent works have demonstrated successes on controlling simple sentence attributes (e.g., sentiment), there has been little progress on complex, fine-grained controls (e.g., syntactic structure). To address this challenge, we develop a new non-autoregressive language model based on continuous diffusions that we call Diffusion-LM. Building upon the recent successes of diffusion models in continuous domains, Diffusion-LM iteratively denoises a sequence of Gaussian vectors into word vectors, yielding a sequence of intermediate latent variables. The continuous, hierarchical nature of these intermediate variables enables a simple gradient-based algorithm to perform complex, controllable generation tasks. We demonstrate successful control of Diffusion-LM for six challenging fine-grained control tasks, significantly outperforming prior work.", "keywords": "controllable text generation;controlled generation;infilling;language model;diffusion model", "primary_area": "", "supplementary_material": "/attachment/6748cf2900c496d29f5e610341d549e826a68659.pdf", "author": "Xiang Lisa Li;John Thickstun;Ishaan Gulrajani;Percy Liang;Tatsunori Hashimoto", "authorids": "~Xiang_Lisa_Li1;~John_Thickstun1;~Ishaan_Gulrajani1;~Percy_Liang1;~Tatsunori_Hashimoto1", "gender": "F;M;M;;M", "homepage": "https://xiangli1999.github.io;https://johnthickstun.com/;https://ishaan.io;https://cs.stanford.edu/~pliang/;https://thashim.github.io", "dblp": "40/1491-63;190/7644;164/5562;04/1701;", "google_scholar": "nzA4P0oAAAAJ;RkuzIZMAAAAJ;;pouyVyUAAAAJ;5ygiTwsAAAAJ", "orcid": ";;;;", "linkedin": ";john-thickstun-87779865;;;", "or_profile": "~Xiang_Lisa_Li1;~John_Thickstun1;~Ishaan_Gulrajani1;~Percy_Liang1;~Tatsunori_Hashimoto1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2022diffusionlm,\ntitle={Diffusion-{LM} Improves Controllable Text Generation},\nauthor={Xiang Lisa Li and John Thickstun and Ishaan Gulrajani and Percy Liang and Tatsunori Hashimoto},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3s9IrEsjLyk}\n}", "github": "", "project": "", "reviewers": "sK4s;Pvpw;GmR4;C1jB", "pdf_size": 1237669, "rating": "6;7;7;8", "confidence": "3;4;5;4", "soundness": "3;4;3;4", "novelty": "4;3;3;4", "presentation": "4;4;1;4", "contribution": "4;3;3;4", "wc_summary": "106;80;74;152", "wc_strengths_and_weaknesses": "202;96;108;107", "wc_questions": "84;83;139;30", "wc_limitations": "1;41;1;11", "wc_review": "393;300;322;300", "wc_reply_reviewers": "14;0;0;20", "wc_reply_authors": "511;630;903;345", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 1.299038105676658 ], "contribution_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 103.0, 30.740852297878796 ], "wc_strengths_and_weaknesses_avg": [ 128.25, 42.83908845902303 ], "wc_questions_avg": [ 84.0, 38.54218468120353 ], "wc_limitations_avg": [ 13.5, 16.393596310755 ], "wc_review_avg": [ 328.75, 38.16657569130351 ], "wc_reply_reviewers_avg": [ 8.5, 8.760707733967616 ], "wc_reply_authors_avg": [ 597.25, 203.48510387740916 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 824, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17910853149942433121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-objective Deep Data Generation with Correlated Property Control", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54871", "id": "3uj_8G7fxgs", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b9c2e8a0bbed5fcfaf62856a3a719ada-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3uj_8G7fxgs", "openreview": "https://openreview.net/forum?id=3uj_8G7fxgs", "poster": "/media/PosterPDFs/NeurIPS%202022/54871.png?t=1669409168.7342277", "slides": "https://nips.cc/virtual/2022/poster/54871", "video": "https://nips.cc/virtual/2022/poster/54871", "author_site": "Shiyu Wang, Xiaojie Guo, Xuanyang Lin, Bo Pan, Yuanqi Du, Yinkai Wang, Yanfang Ye, Ashley Petersen, Austin Leitgeb, Saleh Alkhalifa, Kevin Minbiole, William M. Wuest, Amarda Shehu, Liang Zhao", "tldr": "We proposed a novel deep generative framework that controls correlated properties of interest while handling correlation and conflicts of properties under a multi-objective optimization scheme.", "abstract": "Developing deep generative models has been an emerging field due to the ability to model and generate complex data for various purposes, such as image synthesis and molecular design. However, the advance of deep generative models is limited by the challenges to generate objects that possess multiple desired properties because: 1) the existence of complex correlation among real-world properties is common but hard to identify; 2) controlling individual property enforces an implicit partially control of its correlated properties, which is difficult to model; 3) controlling multiple properties under variour manners simultaneously is hard and underexplored. We address these challenges by proposing a novel deep generative framework that recovers semantics and correlation of properties through disentangled latent vectors. The correlation is handled via an explainable mask pooling layer, and properties are precisely retained by the generated objects via the mutual dependence between latent vectors and properties. Our generative model preserves properties of interest while handles correlation and conflicts of properties under a multi-objective optimization framework. The experiments demonstrate our model's superior performance in generating objects with desired properties.", "keywords": "deep generative models;controllable generation;correlated properties;disentangled learning;variational autoencoders", "primary_area": "", "supplementary_material": "/attachment/a4b344890e3af3724602673482b4f5ccb50cc02e.pdf", "author": "Shiyu Wang;Xiaojie Guo;Xuanyang Lin;Bo Pan;Yuanqi Du;Yinkai Wang;Yanfang Ye;Ashley Ann Petersen;Austin Leitgeb;Saleh AlKhalifa;Kevin Minbiole;William Wuest;Amarda Shehu;Liang Zhao", "authorids": "~Shiyu_Wang2;~Xiaojie_Guo1;mike.lin@emory.edu;~Bo_Pan2;~Yuanqi_Du1;~Yinkai_Wang1;yye7@nd.edu;~Ashley_Ann_Petersen1;austin.leitgeb@vanderbilt.edu;salehesam@gmail.com;~Kevin_Minbiole1;william.wuest@emory.edu;~Amarda_Shehu1;~Liang_Zhao6", "gender": "M;F;;;M;M;;F;;;M;;F;M", "homepage": "https://sites.google.com/view/about-shiyuwang;https://sites.google.com/view/xiaojie-guo-personal-site;;https://pb0316.github.io/;https://yuanqidu.github.io/;https://yinkaiw.github.io;;;;;https://kminbiol.clasit.org;;https://cs.gmu.edu/~ashehu/;https://cs.emory.edu/~lzhao41/", "dblp": ";43/8066;;;266/2837;308/6333;;;;;;;53/3810;63/5422-2", "google_scholar": "https://scholar.google.com/citations?hl=en;ad7m0r0AAAAJ;;;fAc_zZMAAAAJ;PfRyo6EAAAAJ;;yWgtnx0AAAAJ;;;;;https://scholar.google.com.tw/citations?user=HkB_Gz0AAAAJ;qnvyqtwAAAAJ", "orcid": ";;;0009-0005-7501-7581;;;;;;;;;0000-0001-5230-4610;0000-0002-2648-9989", "linkedin": "shiyu-wang-647a7b91/;;;bo-pan;;yinkai-wang;;ashley-petersen-09b05a1b5;;;;;;", "or_profile": "~Shiyu_Wang2;~Xiaojie_Guo1;mike.lin@emory.edu;~Bo_Pan2;~Yuanqi_Du1;~Yinkai_Wang1;yye7@nd.edu;~Ashley_Ann_Petersen1;austin.leitgeb@vanderbilt.edu;salehesam@gmail.com;~Kevin_Minbiole1;william.wuest@emory.edu;~Amarda_Shehu1;~Liang_Zhao6", "aff": "Emory University;George Mason University;;Tsinghua University;University of Amsterdam;George Mason University;;Villanova University;;;Villanova University;;George Mason University;Emory University", "aff_domain": "emory.edu;gmu.edu;;mails.tsinghua.edu.cn;uva.nl;gmu.edu;;villanova.edu;;;villanova.edu;;gmu.edu;emory.edu", "position": "PhD student;PhD student;;Undergrad student;Researcher;Undergrad student;;Undergrad student;;;Full Professor;;Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2022multiobjective,\ntitle={Multi-objective Deep Data Generation with Correlated Property Control},\nauthor={Shiyu Wang and Xiaojie Guo and Xuanyang Lin and Bo Pan and Yuanqi Du and Yinkai Wang and Yanfang Ye and Ashley Ann Petersen and Austin Leitgeb and Saleh AlKhalifa and Kevin Minbiole and William Wuest and Amarda Shehu and Liang Zhao},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3uj_8G7fxgs}\n}", "github": "", "project": "", "reviewers": "P6bk;GPtJ;2J2q", "pdf_size": 915154, "rating": "5;6;8", "confidence": "2;4;5", "soundness": "2;1;3", "novelty": "2;1;4", "presentation": "2;3;3", "contribution": "2;1;4", "wc_summary": "95;120;83", "wc_strengths_and_weaknesses": "67;80;351", "wc_questions": "208;462;36", "wc_limitations": "63;29;25", "wc_review": "433;691;495", "wc_reply_reviewers": "0;496;6", "wc_reply_authors": "1297;3576;419", "reply_reviewers": "0;2;1", "reply_authors": "4;7;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "contribution_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_summary_avg": [ 99.33333333333333, 15.412837362262522 ], "wc_strengths_and_weaknesses_avg": [ 166.0, 130.92236885523675 ], "wc_questions_avg": [ 235.33333333333334, 174.98444375302495 ], "wc_limitations_avg": [ 39.0, 17.048949136725895 ], "wc_review_avg": [ 539.6666666666666, 109.96160946237353 ], "wc_reply_reviewers_avg": [ 167.33333333333334, 232.41533703456165 ], "wc_reply_authors_avg": [ 1764.0, 1330.4708439746685 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.9285714285714286, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2229842987277560649&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "emory.edu;gmu.edu;;mails.tsinghua.edu.cn;uva.nl;gmu.edu;;villanova.edu;;;villanova.edu;;gmu.edu;emory.edu", "author_num": 14, "aff_unique_index": "0;1;2;3;1;4;4;1;0", "aff_unique_norm": "Emory University;George Mason University;Tsinghua University;University of Amsterdam;Villanova University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.emory.edu;https://www.gmu.edu;https://www.tsinghua.edu.cn;https://www.uva.nl;https://www.villanova.edu", "aff_unique_abbr": "Emory;GMU;THU;UvA;Villanova", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;0;0;0;0", "aff_country_unique": "United States;China;Netherlands" }, { "title": "Learning Infinite-Horizon Average-Reward Restless Multi-Action Bandits via Index Awareness", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55105", "id": "3v44ls_4dbg", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/71f003060ce1e8b6b4856023b67cda5d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3v44ls_4dbg", "openreview": "https://openreview.net/forum?id=3v44ls_4dbg", "poster": "/media/PosterPDFs/NeurIPS%202022/55105.png?t=1669435317.1155744", "slides": "https://nips.cc/virtual/2022/poster/55105", "video": "https://nips.cc/virtual/2022/poster/55105", "author_site": "GUOJUN XIONG, Shufan Wang, Jian Li", "tldr": "", "abstract": "We consider the online restless bandits with average-reward and multiple actions, where the state of each arm evolves according to a Markov decision process (MDP), and the reward of pulling an arm depends on both the current state of the corresponding MDP and the action taken. Since finding the optimal control is typically intractable for restless bandits, existing learning algorithms are often computationally expensive or with a regret bound that is exponential in the number of arms and states. In this paper, we advocate \\textit{index-aware reinforcement learning} (RL) solutions to design RL algorithms operating on a much smaller dimensional subspace by exploiting the inherent structure in restless bandits. Specifically, we first propose novel index policies to address dimensionality concerns, which are provably optimal. We then leverage the indices to develop two low-complexity index-aware RL algorithms, namely, (i) GM-R2MAB, which has access to a generative model; and (ii) UC-R2MAB, which learns the model using an upper confidence style online exploitation method. We prove that both algorithms achieve a sub-linear regret that is only polynomial in the number of arms and states. A key differentiator between our algorithms and existing ones stems from the fact that our RL algorithms contain a novel exploitation that leverages our proposed provably optimal index policies for decision-makings. ", "keywords": "Restless Bandits;Reinforcement Learning;Index Policy;Finite-time Analysis", "primary_area": "", "supplementary_material": "/attachment/1ebdf9b2cf520fb39dcd11c2a8b1d7baf464f1f4.zip", "author": "GUOJUN XIONG;Shufan Wang;Jian Li", "authorids": "~GUOJUN_XIONG1;~Shufan_Wang2;~Jian_Li14", "gender": ";M;M", "homepage": "https://xionggj001.github.io/;;https://sites.google.com/stonybrook.edu/jianli", "dblp": "214/2134.html;;33/5448-8", "google_scholar": "FIBwLnoAAAAJ;https://scholar.google.com/citations?view_op=list_works;h039Yq4AAAAJ", "orcid": ";;", "linkedin": "guojun-%E5%9B%BD%E9%92%A7-xiong-48696aa6/;;", "or_profile": "~GUOJUN_XIONG1;~Shufan_Wang2;~Jian_Li14", "aff": "State University of New York at Stony Brook;State University of New York at Binghamton;State University of New York, Binghamton", "aff_domain": "stonybrook.edu;binghamton.edu;binghamton.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxiong2022learning,\ntitle={Learning Infinite-Horizon Average-Reward Restless Multi-Action Bandits via Index Awareness},\nauthor={GUOJUN XIONG and Shufan Wang and Jian Li},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3v44ls_4dbg}\n}", "github": "", "project": "", "reviewers": "2MaK;mAyS;2ySi;4AZS", "pdf_size": 981465, "rating": "5;6;7;7", "confidence": "4;2;4;3", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "contribution": "3;3;3;3", "wc_summary": "52;53;109;52", "wc_strengths_and_weaknesses": "564;119;708;138", "wc_questions": "79;58;109;12", "wc_limitations": "6;1;84;5", "wc_review": "701;231;1010;207", "wc_reply_reviewers": "0;95;0;0", "wc_reply_authors": "1482;1243;2045;20", "reply_reviewers": "0;1;0;0", "reply_authors": "3;3;4;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.5, 24.540782383616055 ], "wc_strengths_and_weaknesses_avg": [ 382.25, 258.8941627383669 ], "wc_questions_avg": [ 64.5, 35.316426772820606 ], "wc_limitations_avg": [ 24.0, 34.69149751740331 ], "wc_review_avg": [ 537.25, 336.5860774007148 ], "wc_reply_reviewers_avg": [ 23.75, 41.13620667976084 ], "wc_reply_authors_avg": [ 1197.5, 739.5561168701129 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1863265574677410595&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "stonybrook.edu;binghamton.edu;binghamton.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "State University of New York at Stony Brook;State University of New York at Binghamton", "aff_unique_dep": ";", "aff_unique_url": "https://www.stonybrook.edu;https://www.binghamton.edu", "aff_unique_abbr": "SUNY Stony Brook;SUNY Binghamton", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Stony Brook;Binghamton", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Optical Flow from Continuous Spike Streams", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55189", "id": "3vYkhJIty7E", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/33951c28630e48c441cb59db356f2037-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3vYkhJIty7E", "openreview": "https://openreview.net/forum?id=3vYkhJIty7E", "poster": "/media/PosterPDFs/NeurIPS%202022/55189.png?t=1668862739.446795", "slides": "https://nips.cc/virtual/2022/poster/55189", "video": "https://nips.cc/virtual/2022/poster/55189", "author_site": "Rui Zhao, Ruiqin Xiong, Jing Zhao, Zhaofei Yu, Xiaopeng Fan, Tiejun Huang", "tldr": "Optical flow estimation for spiking camera from continuous spike streams with efficient spike representation and temporal motion clues.", "abstract": "Spike camera is an emerging bio-inspired vision sensor with ultra-high temporal resolution. It records scenes by accumulating photons and outputting continuous binary spike streams. Optical flow is a key task for spike cameras and their applications. A previous attempt has been made for spike-based optical flow. However, the previous work only focuses on motion between two moments, and it uses graphics-based data for training, whose generalization is limited. In this paper, we propose a tailored network, Spike2Flow that extracts information from binary spikes with temporal-spatial representation based on the differential of spike firing time and spatial information aggregation. The network utilizes continuous motion clues through joint correlation decoding. Besides, a new dataset with real-world scenes is proposed for better generalization. Experimental results show that our approach achieves state-of-the-art performance on existing synthetic datasets and real data captured by spike cameras. The source code and dataset are available at \\url{https://github.com/ruizhao26/Spike2Flow}.", "keywords": "Optical Flow;Neuromorphic Camera;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/57fb2e39cea28cca6d7b5a8d9c2becf4cea1dcbc.zip", "author": "Rui Zhao;Ruiqin Xiong;Jing Zhao;Zhaofei Yu;Xiaopeng Fan;Tiejun Huang", "authorids": "~Rui_Zhao11;~Ruiqin_Xiong1;~Jing_Zhao7;~Zhaofei_Yu1;~Xiaopeng_Fan1;~Tiejun_Huang1", "gender": "M;M;F;M;M;M", "homepage": "http:\\\\ruizhao26.github.io;http://idm.pku.edu.cn/staff/xiongruiqin/home.html;;https://yuzhaofei.github.io;http://homepage.hit.edu.cn/xiaopengfan;https://idm.pku.edu.cn/~tjhuang/", "dblp": "26/2578-10;12/6908;69/5882-11;166/0573;76/1458;h/TiejunHuang", "google_scholar": "Ju7_T9cAAAAJ;https://scholar.google.com.tw/citations?user=46Rur-YAAAAJ;BVdxnEcAAAAJ;qaUgD50AAAAJ;;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ", "orcid": "0000-0002-8892-9222;0000-0001-9796-0478;;;;0000-0002-4234-6099", "linkedin": "rui-zhao-247055189/;;;;;", "or_profile": "~Rui_Zhao11;~Ruiqin_Xiong1;~Jing_Zhao7;~Zhaofei_Yu1;~Xiaopeng_Fan1;~Tiejun_Huang1", "aff": "Peking University;Peking University;Peking University;Peking University;Harbin Institute of Technology;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;hit.edu.cn;ict.ac.cn", "position": "PhD student;Researcher;PhD student;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nzhao2022learning,\ntitle={Learning Optical Flow from Continuous Spike Streams},\nauthor={Rui Zhao and Ruiqin Xiong and Jing Zhao and Zhaofei Yu and Xiaopeng Fan and Tiejun Huang},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3vYkhJIty7E}\n}", "github": "", "project": "", "reviewers": "PUft;pFP6;zkLu;p5Ne", "pdf_size": 2597662, "rating": "4;5;5;7", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "1;3;3;3", "contribution": "2;2;2;3", "wc_summary": "44;59;55;41", "wc_strengths_and_weaknesses": "98;221;174;65", "wc_questions": "69;4;143;68", "wc_limitations": "1;3;1;16", "wc_review": "212;287;373;190", "wc_reply_reviewers": "130;17;119;0", "wc_reply_authors": "2377;857;1984;280", "reply_reviewers": "1;1;1;0", "reply_authors": "6;2;5;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 49.75, 7.46240577829965 ], "wc_strengths_and_weaknesses_avg": [ 139.5, 61.451200151014135 ], "wc_questions_avg": [ 71.0, 49.208739061268375 ], "wc_limitations_avg": [ 5.25, 6.2599920127744575 ], "wc_review_avg": [ 265.5, 71.73039801924983 ], "wc_reply_reviewers_avg": [ 66.5, 58.44014031468439 ], "wc_reply_authors_avg": [ 1374.5, 842.9461726587291 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2221735347909611790&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;hit.edu.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Peking University;Harbin Institute of Technology;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Computing Technology", "aff_unique_url": "http://www.pku.edu.cn;http://www.hit.edu.cn/;http://www.ict.ac.cn", "aff_unique_abbr": "Peking U;HIT;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Washing The Unwashable : On The (Im)possibility of Fairwashing Detection", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54741", "id": "3vmKQUctNy", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5b84864ff8474fd742c66f219b2eaac1-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3vmKQUctNy", "openreview": "https://openreview.net/forum?id=3vmKQUctNy", "poster": "/media/PosterPDFs/NeurIPS%202022/54741.png?t=1670445305.7735431", "slides": "https://nips.cc/virtual/2022/poster/54741", "video": "https://nips.cc/virtual/2022/poster/54741", "author_site": "Ali Shahin Shamsabadi, Mohammad Yaghini, Natalie Dullerud, Sierra Wyllie, Ulrich A\u00efvodji, Aisha Alaagib, S\u00e9bastien Gambs, Nicolas Papernot", "tldr": "", "abstract": "The use of black-box models (e.g., deep neural networks) in high-stakes decision-making systems, whose internal logic is complex, raises the need for providing explanations about their decisions. Model explanation techniques mitigate this problem by generating an interpretable and high-fidelity surrogate model (e.g., a logistic regressor or decision tree) to explain the logic of black-box models. \nIn this work, we investigate the issue of fairwashing, in which model explanation techniques are manipulated to rationalize decisions taken by an unfair black-box model using deceptive surrogate models. More precisely, we theoretically characterize and analyze fairwashing, proving that this phenomenon is difficult to avoid due to an irreducible factor---the unfairness of the black-box model. \nBased on the theory developed, we propose a novel technique, called FRAUD-Detect (FaiRness AUDit Detection), to detect fairwashed models by measuring a divergence over subpopulation-wise fidelity measures of the interpretable model. \nWe empirically demonstrate that this divergence is significantly larger in purposefully fairwashed interpretable models than in honest ones. \nFurthermore, we show that our detector is robust to an informed adversary trying to bypass our detector. The code implementing FRAUD-Detect is available at https://github.com/cleverhans-lab/FRAUD-Detect.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/139a7bfbe4f8676b81961b5315fd5274164830c4.pdf", "author": "Ali Shahin Shamsabadi;Mohammad Yaghini;Natalie Dullerud;Sierra Wyllie;Ulrich A\u00efvodji;Aisha Alaagib Alryeh Mkean;S\u00e9bastien Gambs;Nicolas Papernot", "authorids": "~Ali_Shahin_Shamsabadi1;~Mohammad_Yaghini1;~Natalie_Dullerud1;sierra.wyllie@mail.utoronto.ca;~Ulrich_A\u00efvodji1;~Aisha_Alaagib_Alryeh_Mkean1;~S\u00e9bastien_Gambs2;~Nicolas_Papernot1", "gender": "M;M;;;M;F;M;M", "homepage": "https://alishahin.github.io;https://m-yaghini.github.io;;;https://aivodji.github.io/;https://aishaalaagib.netlify.app/;https://sebastiengambs.openum.ca;https://www.papernot.fr", "dblp": "198/1244;175/1555;;;217/4301;;09/2378;162/1405", "google_scholar": "1kVnWYwAAAAJ;t0PeZ3cAAAAJ;;;47kuuqIAAAAJ;dmwkh1AAAAAJ;https://scholar.google.fr/citations?user=2q1NjMgAAAAJ;cGxq0cMAAAAJ", "orcid": ";;;;0000-0003-4247-1444;;0000-0002-7326-7377;", "linkedin": "ali-shahin-shamsabadi-492544259/;myaghini/;natalie-dullerud-777ba5178/;;umaivodji/;aishaalaagib/;;nicolaspapernot", "or_profile": "~Ali_Shahin_Shamsabadi1;~Mohammad_Yaghini1;~Natalie_Dullerud1;sierra.wyllie@mail.utoronto.ca;~Ulrich_A\u00efvodji1;~Aisha_Alaagib_Alryeh_Mkean1;~S\u00e9bastien_Gambs2;~Nicolas_Papernot1", "aff": "Vector;University of Toronto, Vector Institute;Toronto University;;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;;Universit\u00e9 du Qu\u00e9bec \u00e0 Montr\u00e9al;Google", "aff_domain": "vectorinstitute.ai;utoronto.ca;utoronto.ca;;etsmtl.ca;;uqam.ca;google.com", "position": "Postdoc;PhD student;MS student;;Assistant Professor;;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nshamsabadi2022washing,\ntitle={Washing The Unwashable : On The (Im)possibility of Fairwashing Detection},\nauthor={Ali Shahin Shamsabadi and Mohammad Yaghini and Natalie Dullerud and Sierra Wyllie and Ulrich A{\\\"\\i}vodji and Aisha Alaagib Alryeh Mkean and S{\\'e}bastien Gambs and Nicolas Papernot},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3vmKQUctNy}\n}", "github": "", "project": "", "reviewers": "bYAU;53g2;vBkK;WPPT", "pdf_size": 1312143, "rating": "3;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;4;3", "novelty": "1;2;4;3", "presentation": "3;3;3;4", "contribution": "1;2;4;3", "wc_summary": "111;53;89;72", "wc_strengths_and_weaknesses": "315;286;227;80", "wc_questions": "4;56;16;18", "wc_limitations": "21;7;5;4", "wc_review": "451;402;337;174", "wc_reply_reviewers": "178;114;0;34", "wc_reply_authors": "2012;1456;644;265", "reply_reviewers": "1;1;0;1", "reply_authors": "4;4;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 81.25, 21.3819433167334 ], "wc_strengths_and_weaknesses_avg": [ 227.0, 90.60077262363716 ], "wc_questions_avg": [ 23.5, 19.512816301087856 ], "wc_limitations_avg": [ 9.25, 6.869315832017043 ], "wc_review_avg": [ 341.0, 104.55381389504642 ], "wc_reply_reviewers_avg": [ 81.5, 69.40280974139303 ], "wc_reply_authors_avg": [ 1094.25, 682.5519668860386 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12909889614592271783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "vectorinstitute.ai;utoronto.ca;utoronto.ca;;etsmtl.ca;;uqam.ca;google.com", "author_num": 8, "aff_unique_index": "0;1;1;2;3;4", "aff_unique_norm": "Vector Institute;University of Toronto;Universit\u00e9 du Qu\u00e9bec;Universit\u00e9 du Qu\u00e9bec \u00e0 Montr\u00e9al;Google", "aff_unique_dep": ";;;;Google", "aff_unique_url": "https://vectorinstitute.ai/;https://www.utoronto.ca;https://www.etsmtl.ca;https://www.uqam.ca;https://www.google.com", "aff_unique_abbr": "Vector;U of T;ETS;UQAM;Google", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";Toronto;\u00c9cole de technologie sup\u00e9rieure;Montr\u00e9al;Mountain View", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Nonlinear MCMC for Bayesian Machine Learning", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54122", "id": "3vpvnMVOUKE", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/b6341525cd84f3be0ef203e4d7cd8556-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3vpvnMVOUKE", "openreview": "https://openreview.net/forum?id=3vpvnMVOUKE", "poster": "/media/PosterPDFs/NeurIPS%202022/54122.png?t=1669260381.893556", "slides": "https://nips.cc/virtual/2022/poster/54122", "video": "https://nips.cc/virtual/2022/poster/54122", "tldr": "A theoretical and empirical investigation of nonlinear markov chain monte carlo with applications to Bayesian machine learning.", "abstract": "We explore the application of a nonlinear MCMC technique first introduced in [1] to problems in Bayesian machine learning. We provide a convergence guarantee in total variation that uses novel results for long-time convergence and large-particle (``propagation of chaos'') convergence. We apply this nonlinear MCMC technique to sampling problems including a Bayesian neural network on CIFAR10.", "keywords": "bayesian machine learning;markov chain monte carlo", "primary_area": "", "supplementary_material": "/attachment/5c661f0fe3af4d1363872ee9c2565f255bb2aed2.zip", "author": "James Vuckovic", "authorids": "~James_Vuckovic1", "gender": "M", "homepage": "http://www.jamesvuckovic.com/", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~James_Vuckovic1", "aff": "Microsoft", "aff_domain": "microsoft.com", "position": "Applied Scientist", "bibtex": "@inproceedings{\nvuckovic2022nonlinear,\ntitle={Nonlinear {MCMC} for Bayesian Machine Learning},\nauthor={James Vuckovic},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3vpvnMVOUKE}\n}", "github": "", "project": "", "reviewers": "RMta;tYtV;3nmg;A9qb", "pdf_size": 1480984, "rating": "5;7;7;7", "confidence": "3;4;1;4", "soundness": "3;4;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;2", "contribution": "2;3;3;3", "wc_summary": "41;85;135;87", "wc_strengths_and_weaknesses": "238;290;321;198", "wc_questions": "36;632;149;141", "wc_limitations": "2;42;52;1", "wc_review": "317;1049;657;427", "wc_reply_reviewers": "59;53;47;31", "wc_reply_authors": "676;806;318;339", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.0, 33.25657829663178 ], "wc_strengths_and_weaknesses_avg": [ 261.75, 47.26719263929264 ], "wc_questions_avg": [ 239.5, 230.95508221297058 ], "wc_limitations_avg": [ 24.25, 23.025800746119558 ], "wc_review_avg": [ 612.5, 280.2869065796688 ], "wc_reply_reviewers_avg": [ 47.5, 10.428326807307105 ], "wc_reply_authors_avg": [ 534.75, 211.43955992197866 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1057865905287553340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "microsoft.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Okapi: Generalising Better by Making Statistical Matches Match", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53382", "id": "3wg-rYuo5AN", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0918183ced31affb7ce0345e45ac1943-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3wg-rYuo5AN", "openreview": "https://openreview.net/forum?id=3wg-rYuo5AN", "poster": "/media/PosterPDFs/NeurIPS%202022/53382.png?t=1669372257.9345357", "slides": "https://nips.cc/virtual/2022/poster/53382", "video": "https://nips.cc/virtual/2022/poster/53382", "author_site": "Myles Bartlett, Sara Romiti, Viktoriia Sharmanska, Novi Quadrianto", "tldr": "We use statistical matching to define a semi-supervised consistency loss for training distributionally-robust models.", "abstract": "We propose Okapi, a simple, efficient, and general method for robust semi-supervised learning based on online statistical matching. Our method uses a nearest-neighbours-based matching procedure to generate cross-domain views for a consistency loss, while eliminating statistical outliers. In order to perform the online matching in a runtime- and memory-efficient way, we draw upon the self-supervised literature and combine a memory bank with a slow-moving momentum encoder. The consistency loss is applied within the feature space, rather than on the predictive distribution, making the method agnostic to both the modality and the task in question. We experiment on the WILDS 2.0 datasets Sagawa et al., which significantly expands the range of modalities, applications, and shifts available for studying and benchmarking real-world unsupervised adaptation. Contrary to Sagawa et al., we show that it is in fact possible to leverage additional unlabelled data to improve upon empirical risk minimisation (ERM) results with the right method. Our method outperforms the baseline methods in terms of out-of-distribution (OOD) generalisation on the iWildCam (a multi-class classification task) and PovertyMap (a regression task) image datasets as well as the CivilComments (a binary classification task) text dataset. Furthermore, from a qualitative perspective, we show the matches obtained from the learned encoder are strongly semantically related. Code for our paper is publicly available at https://github.com/wearepal/okapi/.", "keywords": "Domain Generalisation;Semi-Supervised Learning;Statistical Matching", "primary_area": "", "supplementary_material": "/attachment/74f089acb83c8ff9ea621b6f67027e37eb7fd356.pdf", "author": "Myles Bartlett;Sara Romiti;Viktoriia Sharmanska;Novi Quadrianto", "authorids": "~Myles_Bartlett1;~Sara_Romiti1;~Viktoriia_Sharmanska1;~Novi_Quadrianto1", "gender": "F;F;M;M", "homepage": "http://www.sussex.ac.uk/profiles/460797;https://www.imperial.ac.uk/people/sharmanska.v;http://www.sussex.ac.uk/profiles/335583;", "dblp": ";119/1466;http://dblp.uni-trier.de/pers/hd/q/Quadrianto:Novi;", "google_scholar": ";https://scholar.google.co.uk/citations?user=8TDBdicAAAAJ;I-rLzGcAAAAJ;", "orcid": ";;;0000-0002-1318-1395", "linkedin": "sara-romiti-18561a12b/;viktoriiasharmanska;;", "or_profile": "~Sara_Romiti1;~Viktoriia_Sharmanska1;~Novi_Quadrianto1;~Myles_Scott_Bartlett1", "aff": "University of Sussex;University of Sussex;Monash Indonesia;University of Sussex", "aff_domain": "sussex.ac.uk;sussex.ac.uk;monash.edu;sussex.ac.uk", "position": "PhD student;Lecturer;Full Professor;PhD student", "bibtex": "@inproceedings{\nbartlett2022okapi,\ntitle={Okapi: Generalising Better by Making Statistical Matches Match},\nauthor={Myles Bartlett and Sara Romiti and Viktoriia Sharmanska and Novi Quadrianto},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3wg-rYuo5AN}\n}", "github": "", "project": "", "reviewers": "SyKB;xMLR;mbZt;88f2", "pdf_size": 1712602, "rating": "3;6;6;6", "confidence": "5;4;3;3", "soundness": "2;2;2;3", "novelty": "2;2;3;2", "presentation": "1;3;3;2", "contribution": "2;2;3;2", "wc_summary": "64;50;119;49", "wc_strengths_and_weaknesses": "137;328;54;475", "wc_questions": "37;30;205;76", "wc_limitations": "38;40;1;2", "wc_review": "276;448;379;602", "wc_reply_reviewers": "79;131;0;152", "wc_reply_authors": "917;1015;710;2048", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;4", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.5, 28.622543562723422 ], "wc_strengths_and_weaknesses_avg": [ 248.5, 164.22926048667455 ], "wc_questions_avg": [ 87.0, 70.3455755538328 ], "wc_limitations_avg": [ 20.25, 18.766659265836314 ], "wc_review_avg": [ 426.25, 118.49973628662639 ], "wc_reply_reviewers_avg": [ 90.5, 58.619535992704684 ], "wc_reply_authors_avg": [ 1172.5, 517.3231581903134 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14348083558003086680&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "sussex.ac.uk;sussex.ac.uk;monash.edu;sussex.ac.uk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Sussex;Monash University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sussex.ac.uk;https://www.monash.edu.id", "aff_unique_abbr": "Sussex;Monash", "aff_campus_unique_index": "1", "aff_campus_unique": ";Indonesia", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;Indonesia" }, { "title": "The Power and Limitation of Pretraining-Finetuning for Linear Regression under Covariate Shift", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52894", "id": "3y80RPgHL7s", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/d5c04aa72b92c53bda5b525b60958295-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3y80RPgHL7s", "openreview": "https://openreview.net/forum?id=3y80RPgHL7s", "poster": "/media/PosterPDFs/NeurIPS%202022/2b3e69a7084c76e56be15598fc72ded6.png?t=1667177797.9238393", "slides": "https://nips.cc/virtual/2022/poster/52894", "video": "https://nips.cc/virtual/2022/poster/52894", "author_site": "Jingfeng Wu, Difan Zou, Vladimir Braverman, Quanquan Gu, Sham Kakade", "tldr": "We study the risk bounds of pretraining-finetuning for linear regression under covariate shift", "abstract": "We study linear regression under covariate shift, where the marginal distribution over the input covariates differs in the source and the target domains, while the conditional distribution of the output given the input covariates is similar across the two domains. We investigate a transfer learning approach with pretraining on the source data and finetuning based on the target data (both conducted by online SGD) for this problem. We establish sharp instance-dependent excess risk upper and lower bounds for this approach. Our bounds suggest that for a large class of linear regression instances, transfer learning with $O(N^2)$ source data (and scarce or no target data) is as effective as supervised learning with $N$ target data. In addition, we show that finetuning, even with only a small amount of target data, could drastically reduce the amount of source data required by pretraining. Our theory sheds light on the effectiveness and limitation of pretraining as well as the benefits of finetuning for tackling covariate shift problems.", "keywords": "covariate shift;linear regression;risk bound;pretraining;finetuning", "primary_area": "", "supplementary_material": "/attachment/f4bd1409c663e927ba78d53751b181ce08dd54f0.pdf", "author": "Jingfeng Wu;Difan Zou;Vladimir Braverman;Quanquan Gu;Sham M. Kakade", "authorids": "~Jingfeng_Wu1;~Difan_Zou1;~Vladimir_Braverman1;~Quanquan_Gu1;~Sham_M._Kakade1", "gender": "M;M;Unspecified;M;M", "homepage": "https://uuujf.github.io;https://difanzou.github.io/;http://www.cs.jhu.edu/~vova/;http://web.cs.ucla.edu/~qgu/;https://shamulent.github.io", "dblp": ";161/8923;14/4758;50/4597;s/SMKakade", "google_scholar": "z-KILD8AAAAJ;Cp4fcTQAAAAJ;https://scholar.google.com.tw/citations?user=DTthB48AAAAJ;GU9HgNAAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ", "orcid": "0009-0009-3414-4487;;;;", "linkedin": "jingfeng-wu-79205b184/;;;;", "or_profile": "~Jingfeng_Wu1;~Difan_Zou1;~Vladimir_Braverman1;~Quanquan_Gu1;~Sham_M._Kakade1", "aff": "Johns Hopkins University;University of California, Los Angeles;Department of Computer Science, Whiting School of Engineering;University of California, Los Angeles;Harvard University", "aff_domain": "jhu.edu;ucla.edu;cs.jhu.edu;cs.ucla.edu;harvard.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwu2022the,\ntitle={The Power and Limitation of Pretraining-Finetuning for Linear Regression under Covariate Shift},\nauthor={Jingfeng Wu and Difan Zou and Vladimir Braverman and Quanquan Gu and Sham M. Kakade},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3y80RPgHL7s}\n}", "github": "", "project": "", "reviewers": "XiiA;df3S;5qgQ;a3cN", "pdf_size": 589714, "rating": "4;5;7;7", "confidence": "4;4;3;3", "soundness": "3;2;4;4", "novelty": "3;2;4;3", "presentation": "3;3;4;4", "contribution": "3;2;4;3", "wc_summary": "33;89;62;94", "wc_strengths_and_weaknesses": "71;581;69;651", "wc_questions": "15;2;22;3", "wc_limitations": "12;2;9;3", "wc_review": "131;674;162;751", "wc_reply_reviewers": "0;686;0;0", "wc_reply_authors": "127;1856;298;549", "reply_reviewers": "0;5;0;0", "reply_authors": "1;6;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.5, 24.336187047275914 ], "wc_strengths_and_weaknesses_avg": [ 343.0, 274.12041149830486 ], "wc_questions_avg": [ 10.5, 8.381527307120106 ], "wc_limitations_avg": [ 6.5, 4.153311931459037 ], "wc_review_avg": [ 429.5, 284.5175741496472 ], "wc_reply_reviewers_avg": [ 171.5, 297.04671349806245 ], "wc_reply_authors_avg": [ 707.5, 679.8611990693395 ], "reply_reviewers_avg": [ 1.25, 2.165063509461097 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17075639018342470195&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "email": "jhu.edu;ucla.edu;cs.jhu.edu;cs.ucla.edu;harvard.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Johns Hopkins University;University of California, Los Angeles;Harvard University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jhu.edu;https://www.ucla.edu;https://www.harvard.edu", "aff_unique_abbr": "JHU;UCLA;Harvard", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Los Angeles;Baltimore", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Graph Few-shot Learning with Task-specific Structures", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53759", "id": "3yO3MiSOkH4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/fe47dd3fd8e7eb43187d42d65083e383-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=3yO3MiSOkH4", "openreview": "https://openreview.net/forum?id=3yO3MiSOkH4", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/53759", "video": "https://nips.cc/virtual/2022/poster/53759", "author_site": "Song Wang, Chen Chen, Jundong Li", "tldr": "This work proposes to conduct graph few-shot learning via constructing a task-specific structure for each meta-task.", "abstract": "Graph few-shot learning is of great importance among various graph learning tasks. Under the few-shot scenario, models are often required to conduct classification given limited labeled samples. Existing graph few-shot learning methods typically leverage Graph Neural Networks (GNNs) and perform classification across a series of meta-tasks. Nevertheless, these methods generally rely on the original graph (i.e., the graph that the meta-task is sampled from) to learn node representations. Consequently, the learned representations for the same nodes are identical in all meta-tasks. Since the class sets are different across meta-tasks, node representations should be task-specific to promote classification performance. Therefore, to adaptively learn node representations across meta-tasks, we propose a novel framework that learns a task-specific structure for each meta-task. To handle the variety of nodes across meta-tasks, we extract relevant nodes and learn task-specific structures based on node influence and mutual information. In this way, we can learn node representations with the task-specific structure tailored for each meta-task. We further conduct extensive experiments on five node classification datasets under both single- and multiple-graph settings to validate the superiority of our framework over the state-of-the-art baselines.", "keywords": "Graph Neural Networks;Few-shot Learning;Graph Mining", "primary_area": "", "supplementary_material": "/attachment/fd8c0d04ce806efa9849e609f9cc347073ff143e.pdf", "author": "Song Wang;Chen Chen;Jundong Li", "authorids": "~Song_Wang6;chenannie45@gmail.com;~Jundong_Li2", "gender": "M;;M", "homepage": "https://songw-sw.github.io/;;https://jundongli.github.io/", "dblp": ";;144/7997.html", "google_scholar": ";;uY6ek7sAAAAJ", "orcid": "0000-0003-1273-7694;;", "linkedin": ";;", "or_profile": "~Song_Wang6;chenannie45@gmail.com;~Jundong_Li2", "aff": "University of Virginia;;University of Virginia", "aff_domain": "virginia.edu;;virginia.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nwang2022graph,\ntitle={Graph Few-shot Learning with Task-specific Structures},\nauthor={Song Wang and Chen Chen and Jundong Li},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=3yO3MiSOkH4}\n}", "github": "", "project": "", "reviewers": "zmmr;PD5y;SEHi;AUxb", "pdf_size": 521151, "rating": "5;6;6;6", "confidence": "4;5;2;3", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;2;3", "contribution": "2;3;3;3", "wc_summary": "69;397;23;127", "wc_strengths_and_weaknesses": "33;409;81;188", "wc_questions": "191;247;53;56", "wc_limitations": "15;79;9;14", "wc_review": "308;1132;166;385", "wc_reply_reviewers": "18;128;38;32", "wc_reply_authors": "452;816;280;384", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;3;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 154.0, 145.0551619212498 ], "wc_strengths_and_weaknesses_avg": [ 177.75, 144.82295225550402 ], "wc_questions_avg": [ 136.75, 84.60607247709824 ], "wc_limitations_avg": [ 29.25, 28.81297450802329 ], "wc_review_avg": [ 497.75, 374.5159375780956 ], "wc_reply_reviewers_avg": [ 54.0, 43.3358973600409 ], "wc_reply_authors_avg": [ 483.0, 201.7795827134153 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14828691288632747352&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "virginia.edu;;virginia.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "\ud83c\udfd8\ufe0f ProcTHOR: Large-Scale Embodied AI Using Procedural Generation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54832", "id": "4-bV1bi74M", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/27c546ab1e4f1d7d638e6a8dfbad9a07-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4-bV1bi74M", "openreview": "https://openreview.net/forum?id=4-bV1bi74M", "poster": "/media/PosterPDFs/NeurIPS%202022/54832.png?t=1669623936.9711347", "slides": "https://nips.cc/virtual/2022/poster/54832", "video": "https://nips.cc/virtual/2022/poster/54832", "author_site": "Matt Deitke, Eli VanderBilt, Alvaro Herrasti, Luca Weihs, Kiana Ehsani, Jordi Salvador, Winson Han, Eric Kolve, Aniruddha Kembhavi, Roozbeh Mottaghi", "tldr": "We procedurally generate realistic, interactive, simulated 3D homes to scale up the diversity and size of training data in Embodied AI, and find that it helps significantly with performance.", "abstract": "Massive datasets and high-capacity models have driven many recent advancements in computer vision and natural language understanding. This work presents a platform to enable similar success stories in Embodied AI. We propose ProcTHOR, a framework for procedural generation of Embodied AI environments. ProcTHOR enables us to sample arbitrarily large datasets of diverse, interactive, customizable, and performant virtual environments to train and evaluate embodied agents across navigation, interaction, and manipulation tasks. We demonstrate the power and potential of ProcTHOR via a sample of 10,000 generated houses and a simple neural model. Models trained using only RGB images on ProcTHOR, with no explicit mapping and no human task supervision produce state-of-the-art results across 6 embodied AI benchmarks for navigation, rearrangement, and arm manipulation, including the presently running Habitat 2022, AI2-THOR Rearrangement 2022, and RoboTHOR challenges. We also demonstrate strong 0-shot results on these benchmarks, via pre-training on ProcTHOR with no fine-tuning on the downstream benchmark, often beating previous state-of-the-art systems that access the downstream training data.", "keywords": "Embodied AI;Large-Scale Environments;Procedural Generation", "primary_area": "", "supplementary_material": "/attachment/e6021c4d83e477b3e269fa713590c1d3f0962e61.zip", "author": "Matt Deitke;Eli VanderBilt;Alvaro Herrasti;Luca Weihs;Kiana Ehsani;Jordi Salvador;Winson Han;Eric Kolve;Aniruddha Kembhavi;Roozbeh Mottaghi", "authorids": "~Matt_Deitke1;~Eli_VanderBilt1;~Alvaro_Herrasti1;~Luca_Weihs1;~Kiana_Ehsani1;~Jordi_Salvador3;~Winson_Han1;~Eric_Kolve1;~Aniruddha_Kembhavi1;~Roozbeh_Mottaghi1", "gender": "M;M;;M;F;;M;M;M;", "homepage": "https://mattdeitke.com;https://www.elivanderbilt.com/;;https://lucaweihs.github.io/;https://ehsanik.github.io/;;;;https://anikem.github.io/;http://roozbehm.info", "dblp": ";263/1958;178/0393;203/6449;198/0910;53/5830;255/5528;177/9026;81/7583;36/633", "google_scholar": "k4VxCcYAAAAJ;;;F_RBceUAAAAJ;RScZCLEAAAAJ;https://scholar.google.de/citations?user=YuRVs2oAAAAJ;;;JnUevM0AAAAJ;CCV58dgAAAAJ", "orcid": ";;;0000-0002-6846-6718;;;;;;", "linkedin": ";eli-vanderbilt-a9710716;;;kiana-ehsani-1b81b0162/;;winsonhan/;eric-kolve-b500452/;;roozbeh-mottaghi-63397aa0", "or_profile": "~Matt_Deitke1;~Eli_VanderBilt1;~Alvaro_Herrasti1;~Luca_Weihs1;~Kiana_Ehsani1;~Jordi_Salvador3;~Winson_Han1;~Eric_Kolve1;~Aniruddha_Kembhavi1;~Roozbeh_Mottaghi1", "aff": "Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for AI;Ai2;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for AI", "aff_domain": "cs.washington.edu;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org", "position": "Undergrad student;Researcher;Researcher;Research Scientist;Researcher;Research Engineer;Researcher;Principal Software Engineer;Research Manager;Research Manager", "bibtex": "@inproceedings{\ndeitke2022,\ntitle={\ud83c\udfd8\ufe0f Proc{THOR}: Large-Scale Embodied {AI} Using Procedural Generation},\nauthor={Matt Deitke and Eli VanderBilt and Alvaro Herrasti and Luca Weihs and Kiana Ehsani and Jordi Salvador and Winson Han and Eric Kolve and Aniruddha Kembhavi and Roozbeh Mottaghi},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4-bV1bi74M}\n}", "github": "", "project": "", "reviewers": "L9s9;AqAB;QxMz", "pdf_size": 4201158, "rating": "4;8;9", "confidence": "4;4;4", "soundness": "3;4;2", "novelty": "2;3;3", "presentation": "4;4;4", "contribution": "2;3;3", "wc_summary": "61;99;66", "wc_strengths_and_weaknesses": "470;539;735", "wc_questions": "14;113;306", "wc_limitations": "5;6;138", "wc_review": "550;757;1245", "wc_reply_reviewers": "0;393;164", "wc_reply_authors": "1138;2080;2504", "reply_reviewers": "0;1;4", "reply_authors": "2;5;5", "rating_avg": [ 7.0, 2.160246899469287 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 4.0, 0.0 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 75.33333333333333, 16.858891488535722 ], "wc_strengths_and_weaknesses_avg": [ 581.3333333333334, 112.25071145530536 ], "wc_questions_avg": [ 144.33333333333334, 121.24997136311231 ], "wc_limitations_avg": [ 49.666666666666664, 62.46243315430128 ], "wc_review_avg": [ 850.6666666666666, 291.3604106409944 ], "wc_reply_reviewers_avg": [ 185.66666666666666, 161.17140634188877 ], "wc_reply_authors_avg": [ 1907.3333333333333, 570.8761297825961 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 235, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10015166589318312593&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.washington.edu;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org", "author_num": 10, "aff_unique_index": "0;1;1;1;1;2;3;1;1;2", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for AI;AI2", "aff_unique_dep": "Department of Computer Science;;;", "aff_unique_url": "https://www.washington.edu;https://allenai.org;https://allenai.org;https://www.ai2.edu", "aff_unique_abbr": "UW;AI2;AI2;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "401LFvBGIb", "title": "Deep feedforward functionality by equilibrium-point control in a shallow recurrent network.", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recurrent neural network based machine learning systems are typically employed for their sequential functionality in handling time-varying signals, such as for speech processing. However, neurobiologists find recurrent connections in the vision system and debate about equilibrium-point control in the motor system. Thus, we need a deeper understanding of how recurrent dynamics can be exploited to attain combinational stable-input stable-output functionality. Here, we study how a simplified Cohen-Grossberg neural network model can realize combinational multi-input Boolean functionality. We place our problem within the discipline of algebraic geometry, and solve a special case of it using piecewise-linear algebra. We demonstrate a connectance-efficient realization of the parity function as a proof-of-concept. Small-scale systems of this kind can be easily built, say for hobby robotics, as a network of two-terminal devices of resistors and tunnel diodes. Large-scale systems may be energy-efficiently built as an interconnected network of multi-electrode nanoclusters with non-monotonic transport mechanisms. ", "keywords": "recurrent physical network;combinational logic;equilibrium-point control;piecewise-linear;parity function", "primary_area": "", "supplementary_material": "", "author": "Celestine Preetham Lawrence", "authorids": "~Celestine_Preetham_Lawrence1", "gender": "", "homepage": "", "dblp": "190/2562", "google_scholar": "p6QKLIMAAAAJ", "orcid": "0000-0002-5429-1320", "linkedin": "", "or_profile": "~Celestine_Preetham_Lawrence1", "aff": "University of Groningen", "aff_domain": "rug.nl", "position": "Postdoc", "bibtex": "@misc{\nlawrence2022deep,\ntitle={Deep feedforward functionality by equilibrium-point control in a shallow recurrent network.},\nauthor={Celestine Preetham Lawrence},\nyear={2022},\nurl={https://openreview.net/forum?id=401LFvBGIb}\n}", "github": "", "project": "", "reviewers": "NkSi;ZLGu;HA6S;jxFt", "site": "https://openreview.net/forum?id=401LFvBGIb", "pdf_size": 355352, "rating": "2;4;6;7", "confidence": "1;2;1;2", "soundness": "2;3;3;4", "novelty": "1;2;3;3", "presentation": "1;3;2;2", "contribution": "1;2;3;3", "wc_summary": "65;74;60;191", "wc_strengths_and_weaknesses": "92;149;288;139", "wc_questions": "38;60;137;18", "wc_limitations": "1;16;37;35", "wc_review": "196;299;522;383", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "83;141;201;136", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.920286436967152 ], "confidence_avg": [ 1.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "contribution_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 97.5, 54.21485036408382 ], "wc_strengths_and_weaknesses_avg": [ 167.0, 73.09924760214705 ], "wc_questions_avg": [ 63.25, 45.09642447023932 ], "wc_limitations_avg": [ 22.25, 14.7542366796795 ], "wc_review_avg": [ 350.0, 119.362892056116 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 140.25, 41.79339062579154 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.39056673294247163, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5998485954172621999&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Groningen", "aff_unique_dep": "", "aff_unique_url": "https://www.rug.nl", "aff_unique_abbr": "RUG", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "title": "Alleviating Adversarial Attacks on Variational Autoencoders with MCMC", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54239", "id": "458a8dN8L6", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/39e9c5913c970e3e49c2df629daff636-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=458a8dN8L6", "openreview": "https://openreview.net/forum?id=458a8dN8L6", "poster": "/media/PosterPDFs/NeurIPS%202022/b0d6951563a26ffeb2405a9653b3b422.png?t=1666531767.8679411", "slides": "https://nips.cc/virtual/2022/poster/54239", "video": "https://nips.cc/virtual/2022/poster/54239", "author_site": "Anna Kuzina, Max Welling, Jakub Tomczak", "tldr": "We show that MCMC can be used to fix the latent code of the VAE which was corrupted by an adversarial attack", "abstract": "Variational autoencoders (VAEs) are latent variable models that can generate complex objects and provide meaningful latent representations. Moreover, they could be further used in downstream tasks such as classification. As previous work has shown, one can easily fool VAEs to produce unexpected latent representations and reconstructions for a visually slightly modified input. Here, we examine several objective functions for adversarial attacks construction proposed previously and present a solution to alleviate the effect of these attacks. Our method utilizes the Markov Chain Monte Carlo (MCMC) technique in the inference step that we motivate with a theoretical analysis. Thus, we do not incorporate any extra costs during training and the performance on non-attacked inputs is not decreased. We validate our approach on a variety of datasets (MNIST, Fashion MNIST, Color MNIST, CelebA) and VAE configurations ($\\beta$-VAE, NVAE, $\\beta$-TCVAE), and show that our approach consistently improves the model robustness to adversarial attacks.", "keywords": "VAE;MCMC;Adversarial Attack", "primary_area": "", "supplementary_material": "/attachment/c3c37fd1312a5e91ef89a16371208d7b05ff3924.pdf", "author": "Anna Kuzina;Max Welling;Jakub Mikolaj Tomczak", "authorids": "~Anna_Kuzina1;~Max_Welling1;~Jakub_Mikolaj_Tomczak1", "gender": "F;M;M", "homepage": ";https://staff.fnwi.uva.nl/m.welling/;https://jmtomczak.github.io/", "dblp": ";16/2286;80/8238", "google_scholar": "IMoc7ioAAAAJ;https://scholar.google.nl/citations?user=8200InoAAAAJ;https://scholar.google.pl/citations?user=XB99pR4AAAAJ", "orcid": ";0000-0003-1484-2121;0000-0001-8634-6878", "linkedin": ";;jakub-tomczak-04305314a/", "or_profile": "~Anna_Kuzina1;~Max_Welling1;~Jakub_Mikolaj_Tomczak1", "aff": "VU Amsterdam;University of Amsterdam;Vrije Universiteit Amsterdam", "aff_domain": "vu.nl;uva.nl;vu.nl", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkuzina2022alleviating,\ntitle={Alleviating Adversarial Attacks on Variational Autoencoders with {MCMC}},\nauthor={Anna Kuzina and Max Welling and Jakub Mikolaj Tomczak},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=458a8dN8L6}\n}", "github": "", "project": "", "reviewers": "d2M4;bo5D;yegK;knXo", "pdf_size": 2154793, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "2;3;4;4", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "contribution": "3;2;3;3", "wc_summary": "50;64;146;70", "wc_strengths_and_weaknesses": "151;330;169;113", "wc_questions": "198;129;162;13", "wc_limitations": "24;54;13;1", "wc_review": "423;577;490;197", "wc_reply_reviewers": "84;189;242;14", "wc_reply_authors": "1151;1205;1296;375", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;3;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 37.37311868174771 ], "wc_strengths_and_weaknesses_avg": [ 190.75, 82.89865801085081 ], "wc_questions_avg": [ 125.5, 69.38479660559653 ], "wc_limitations_avg": [ 23.0, 19.6596032513375 ], "wc_review_avg": [ 421.75, 140.77886027383514 ], "wc_reply_reviewers_avg": [ 132.25, 88.84924028938008 ], "wc_reply_authors_avg": [ 1006.75, 368.4035660793744 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6383056796634697288&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "vu.nl;uva.nl;vu.nl", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Vrije Universiteit Amsterdam;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.vu.nl;https://www.uva.nl", "aff_unique_abbr": "VU;UvA", "aff_campus_unique_index": "0", "aff_campus_unique": "Amsterdam;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Lower Bounds on Randomly Preconditioned Lasso via Robust Sparse Designs", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54482", "id": "45p8yDYVr5", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/9a8d52eb05eb7b13f54b3d9eada667b7-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=45p8yDYVr5", "openreview": "https://openreview.net/forum?id=45p8yDYVr5", "poster": "/media/PosterPDFs/NeurIPS%202022/54482.png?t=1669681552.5144918", "slides": "https://nips.cc/virtual/2022/poster/54482", "video": "https://nips.cc/virtual/2022/poster/54482", "author_site": "Jonathan Kelner, Frederic Koehler, Raghu Meka, Dhruv Rohatgi", "tldr": "We construct an ill-conditioned Gaussian SLR task where Lasso with randomized preconditioning provably fails, based on a new connection to erasure-robustness.", "abstract": "Sparse linear regression with ill-conditioned Gaussian random covariates is widely believed to exhibit a statistical/computational gap, but there is surprisingly little formal evidence for this belief. Recent work has shown that, for certain covariance matrices, the broad class of Preconditioned Lasso programs provably cannot succeed on polylogarithmically sparse signals with a sublinear number of samples. However, this lower bound only holds against deterministic preconditioners, and in many contexts randomization is crucial to the success of preconditioners. We prove a stronger lower bound that rules out randomized preconditioners. For an appropriate covariance matrix, we construct a single signal distribution on which any invertibly-preconditioned Lasso program fails with high probability, unless it receives a linear number of samples. Surprisingly, at the heart of our lower bound is a new robustness result in compressed sensing. In particular, we study recovering a sparse signal when a few measurements can be erased adversarially. To our knowledge, this natural question has not been studied before for sparse measurements. We surprisingly show that standard sparse Bernoulli measurements are almost-optimally robust to adversarial erasures: if $b$ measurements are erased, then all but $O(b)$ of the coordinates of the signal are identifiable.", "keywords": "sparse linear regression;statistical/computational gaps;compressed sensing with adversarial erasure;preconditioning", "primary_area": "", "supplementary_material": "/attachment/bc89ffc9ef17cdfe2b1231229f2ad735de2be8e3.pdf", "author": "Jonathan Kelner;Frederic Koehler;Raghu Meka;Dhruv Rohatgi", "authorids": "~Jonathan_Kelner1;~Frederic_Koehler1;~Raghu_Meka1;~Dhruv_Rohatgi1", "gender": "M;;M;M", "homepage": "https://math.mit.edu/~kelner/;https://frkoehle.github.io/;http://raghumeka.org;http://www.mit.edu/~drohatgi/", "dblp": "64/4772.html;132/1904;76/1906;223/4465", "google_scholar": ";;xuDZ9-sAAAAJ;NUd_d6UAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jonathan_Kelner1;~Frederic_Koehler1;~Raghu_Meka1;~Dhruv_Rohatgi1", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;University of California, Los Angeles;Massachusetts Institute of Technology", "aff_domain": "mit.edu;berkeley.edu;ucla.edu;mit.edu", "position": "Full Professor;Postdoc;Associate Professor;PhD student", "bibtex": "@inproceedings{\nkelner2022lower,\ntitle={Lower Bounds on Randomly Preconditioned Lasso via Robust Sparse Designs},\nauthor={Jonathan Kelner and Frederic Koehler and Raghu Meka and Dhruv Rohatgi},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=45p8yDYVr5}\n}", "github": "", "project": "", "reviewers": "S6CT;ahcD;nP3f;dzk8", "pdf_size": 338228, "rating": "5;6;7;8", "confidence": "2;3;3;4", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "contribution": "3;3;3;4", "wc_summary": "92;59;87;148", "wc_strengths_and_weaknesses": "151;70;78;78", "wc_questions": "64;15;63;165", "wc_limitations": "1;2;45;1", "wc_review": "308;146;273;392", "wc_reply_reviewers": "0;0;0;19", "wc_reply_authors": "273;140;384;649", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.5, 32.283896914715854 ], "wc_strengths_and_weaknesses_avg": [ 94.25, 32.927002596653104 ], "wc_questions_avg": [ 76.75, 54.66431651452344 ], "wc_limitations_avg": [ 12.25, 18.91262805640718 ], "wc_review_avg": [ 279.75, 88.50529645168136 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 361.5, 187.12095018997738 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9486832980505139, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13274478308865047188&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "mit.edu;berkeley.edu;ucla.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;https://www.ucla.edu", "aff_unique_abbr": "MIT;UC Berkeley;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Learning of Group Invariant and Equivariant Representations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53365", "id": "47lpv23LDPr", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/cf3d7d8e79703fe947deffb587a83639-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=47lpv23LDPr", "openreview": "https://openreview.net/forum?id=47lpv23LDPr", "poster": "/media/PosterPDFs/NeurIPS%202022/53365.png?t=1669046197.4071531", "slides": "https://nips.cc/virtual/2022/poster/53365", "video": "https://nips.cc/virtual/2022/poster/53365", "author_site": "Robin Winter, Marco Bertolini, Tuan Le, Frank Noe, Djork-Arn\u00e9 Clevert", "tldr": "We propose an unsupervised learning framework to extract separated group invariant and equivariant representations.", "abstract": "Equivariant neural networks, whose hidden features transform according to representations of a group $G$ acting on the data, exhibit training efficiency and an improved generalisation performance. In this work, we extend group invariant and equivariant representation learning to the field of unsupervised deep learning. We propose a general learning strategy based on an encoder-decoder framework in which the latent representation is separated in an invariant term and an equivariant group action component. The key idea is that the network learns to encode and decode data to and from a group-invariant representation by additionally learning to predict the appropriate group action to align input and output pose to solve the reconstruction task. We derive the necessary conditions on the equivariant encoder, and we present a construction valid for any $G$, both discrete and continuous. We describe explicitly our construction for rotations, translations and permutations. We test the validity and the robustness of our approach in a variety of experiments with diverse data types employing different network architectures.", "keywords": "equivariance;invariance;representation learning;autoencoder;unsupervised learning", "primary_area": "", "supplementary_material": "/attachment/db6fcd75a790a8227f864bcaf05f3b0fd7d6f542.pdf", "author": "Robin Winter;Marco Bertolini;Tuan Le;Frank Noe;Djork-Arn\u00e9 Clevert", "authorids": "~Robin_Winter1;~Marco_Bertolini1;~Tuan_Le2;~Frank_Noe1;~Djork-Arn\u00e9_Clevert2", "gender": "M;M;M;M;M", "homepage": ";;https://tuanle618.github.io/;;", "dblp": ";75/4248;;;", "google_scholar": ";7w453WkAAAAJ;Fk1A1p4AAAAJ;QGiLc_cAAAAJ;id2clmMAAAAJ", "orcid": "0000-0002-0576-593X;;0000-0001-7634-502X;;", "linkedin": ";;tuan-le618/;;", "or_profile": "~Robin_Winter1;~Marco_Bertolini1;~Tuan_Le2;~Frank_Noe1;~Djork-Arne_Clevert1", "aff": ";Bayer Ag;Bayer Ag;Freie Universit\u00e4t Berlin;Bayer AG", "aff_domain": ";bayer.com;bayer.com;fu-berlin.de;bayer.com", "position": ";Researcher;PhD student;Professor;Director", "bibtex": "@inproceedings{\nwinter2022unsupervised,\ntitle={Unsupervised Learning of Group Invariant and Equivariant Representations},\nauthor={Robin Winter and Marco Bertolini and Tuan Le and Frank Noe and Djork-Arn{\\'e} Clevert},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=47lpv23LDPr}\n}", "github": "", "project": "", "reviewers": "4UN9;FQV4;CwTC;E8UY", "pdf_size": 4302436, "rating": "6;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;2;4", "novelty": "2;3;2;4", "presentation": "3;3;3;3", "contribution": "2;3;2;4", "wc_summary": "133;97;98;88", "wc_strengths_and_weaknesses": "530;547;48;140", "wc_questions": "715;140;2;220", "wc_limitations": "12;10;2;7", "wc_review": "1390;794;150;455", "wc_reply_reviewers": "579;0;0;0", "wc_reply_authors": "3496;2533;759;418", "reply_reviewers": "6;0;0;0", "reply_authors": "9;6;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 104.0, 17.190113437671084 ], "wc_strengths_and_weaknesses_avg": [ 316.25, 224.69799175782592 ], "wc_questions_avg": [ 269.25, 268.9083254568367 ], "wc_limitations_avg": [ 7.75, 3.766629793329841 ], "wc_review_avg": [ 697.25, 460.2800098852871 ], "wc_reply_reviewers_avg": [ 144.75, 250.714354395595 ], "wc_reply_authors_avg": [ 1801.5, 1265.632351830499 ], "reply_reviewers_avg": [ 1.5, 2.598076211353316 ], "reply_authors_avg": [ 4.25, 3.418698582794336 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9670387465470871003&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": ";bayer.com;bayer.com;fu-berlin.de;bayer.com", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Bayer AG;Freie Universit\u00e4t Berlin", "aff_unique_dep": ";", "aff_unique_url": "https://www.bayer.com;https://www.fu-berlin.de", "aff_unique_abbr": "Bayer;FU Berlin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "A new dataset for multilingual keyphrase generation", "status": "Accept", "track": "Datasets & Benchmarks", "site": "https://nips.cc/virtual/2022/poster/55697", "id": "47qVX2pa-2", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/f88709551258331f9ab31b33c71021a4-Abstract-Datasets_and_Benchmarks.html", "pdf": "https://openreview.net/pdf?id=47qVX2pa-2", "openreview": "https://openreview.net/forum?id=47qVX2pa-2", "poster": "/media/PosterPDFs/NeurIPS%202022/854d6fae5ee42911677c739ee1734486.png?t=1667065898.082987", "slides": "https://nips.cc/virtual/2022/poster/55697", "video": "https://nips.cc/virtual/2022/poster/55697", "author_site": "Fr\u00e9d\u00e9ric Piedboeuf, Philippe Langlais", "tldr": "", "abstract": " Keyphrases are an important tool for efficiently dealing with the ever-increasing amount of information present on the internet. While there are many recent papers on English keyphrase generation, keyphrase generation for other languages remains vastly understudied, mostly due to the absence of datasets. To address this, we present a novel dataset called Papyrus, composed of 16427 pairs of abstracts and keyphrases. We release four versions of this dataset, corresponding to different subtasks. Papyrus-e considers only English keyphrases, Papyrus-f considers French keyphrases, Papyrus-m considers keyphrase generation in any language (mostly French and English), and Papyrus-a considers keyphrase generation in several languages. We train a state-of-the-art model on all four tasks and show that they lead to better results for non-English languages, with an average improvement of 14.2\\% on keyphrase extraction and 2.0\\% on generation. We also show an improvement of 0.4\\% on extraction and 0.7\\% on generation over English state-of-the-art results by concatenating Papyrus-e with the Kp20K training set.", "keywords": "Keyphrase generation;multilingual keyphrase generation;dataset;keyphrases", "primary_area": "", "supplementary_material": "/attachment/46631e89869022a494a9cb19806fc31d03ae36e3.zip", "author": "Fr\u00e9d\u00e9ric Piedboeuf;Philippe Langlais", "authorids": "~Fr\u00e9d\u00e9ric_Piedboeuf1;~Philippe_Langlais2", "gender": ";M", "homepage": ";http://www-labs.iro.umontreal.ca/~felipe/brand_new_home/creative-design/public_html/index.php?lg=en", "dblp": ";66/1102", "google_scholar": "https://scholar.google.ca/citations?user=TerngKQAAAAJ;VHd-kDEAAAAJ", "orcid": ";0000-0002-7319-1595", "linkedin": "fr%C3%A9d%C3%A9ric-piedboeuf-31ba72126/;", "or_profile": "~Fr\u00e9d\u00e9ric_Piedboeuf1;~Philippe_Langlais2", "aff": ";Universit\u00e9 de Montr\u00e9al", "aff_domain": ";umontreal.ca", "position": ";Full Professor", "bibtex": "@inproceedings{\npiedboeuf2022a,\ntitle={A new dataset for multilingual keyphrase generation},\nauthor={Fr{\\'e}d{\\'e}ric Piedboeuf and Philippe Langlais},\nbooktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2022},\nurl={https://openreview.net/forum?id=47qVX2pa-2}\n}", "github": "", "project": "", "reviewers": "PUsF;CCHh;2e1j;Tm7j;SPa3;HBXh", "pdf_size": 163462, "rating": "4;5;6;6;6;9", "confidence": "4;3;3;3;4;4", "wc_summary_and_contributions": "15;36;118;68;48;80", "wc_strengths": "34;31;35;47;47;231", "wc_weaknesses": "70;46;78;83;100;65", "wc_correctness": "1;1;158;8;12;23", "wc_clarity": "1;10;31;9;9;5", "wc_relation_to_prior_work": "1;27;64;5;26;18", "wc_documentation": "1;10;41;5;19;14", "wc_additional_feedback": "1;1;33;1;24;36", "wc_review": "124;162;558;226;285;472", "wc_reply_reviewers": "0;0;0;0;0;0", "wc_reply_authors": "174;318;403;146;284;74", "reply_reviewers": "0;0;0;0;0;0", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.0, 1.5275252316519468 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 60.833333333333336, 33.087846039830936 ], "wc_strengths_avg": [ 70.83333333333333, 71.89904187276923 ], "wc_weaknesses_avg": [ 73.66666666666667, 16.599866130651645 ], "wc_correctness_avg": [ 33.833333333333336, 56.02801878425551 ], "wc_clarity_avg": [ 10.833333333333334, 9.529194905948536 ], "wc_relation_to_prior_work_avg": [ 23.5, 20.56493779875511 ], "wc_documentation_avg": [ 15.0, 13.0 ], "wc_additional_feedback_avg": [ 16.0, 15.427248620541512 ], "wc_review_avg": [ 304.5, 159.032229437935 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 233.16666666666666, 111.84575191853386 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.21821789023599236, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6336672595983672983&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";umontreal.ca", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": "", "aff_unique_url": "https://www.umontreal.ca", "aff_unique_abbr": "UdeM", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Use-Case-Grounded Simulations for Explanation Evaluation", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54499", "id": "48Js-sP8wnv", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0b9536e186a77feff516893a5f393f7a-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=48Js-sP8wnv", "openreview": "https://openreview.net/forum?id=48Js-sP8wnv", "poster": "/media/PosterPDFs/NeurIPS%202022/a75a52f7209c01df2598a77ebc4de539.png?t=1666544605.027938", "slides": "https://nips.cc/virtual/2022/poster/54499", "video": "https://nips.cc/virtual/2022/poster/54499", "author_site": "Valerie Chen, Nari Johnson, Nicholay Topin, Gregory Plumb, Ameet Talwalkar", "tldr": "", "abstract": "A growing body of research runs human subject evaluations to study whether providing users with explanations of machine learning models can help them with practical real-world use cases. However, running user studies is challenging and costly, and consequently each study typically only evaluates a limited number of different settings, e.g., studies often only evaluate a few arbitrarily selected model explanation methods. To address these challenges and aid user study design, we introduce Simulated Evaluations (SimEvals). SimEvals involve training algorithmic agents that take as input the information content (such as model explanations) that would be presented to the user, to predict answers to the use case of interest. The algorithmic agent's test set accuracy provides a measure of the predictiveness of the information content for the downstream use case. We run a comprehensive evaluation on three real-world use cases (forward simulation, model debugging, and counterfactual reasoning) to demonstrate that SimEvals can effectively identify which explanation methods will help humans for each use case. These results provide evidence that \\simevals{} can be used to efficiently screen an important set of user study design decisions, e.g., selecting which explanations should be presented to the user, before running a potentially costly user study.", "keywords": "interpretability;explanation;evaluation;user study", "primary_area": "", "supplementary_material": "/attachment/f1ba57c8c771867b81a6a566e1a6ee0a65e4f3d3.pdf", "author": "Valerie Chen;Nari Johnson;Nicholay Topin;Gregory Plumb;Ameet Talwalkar", "authorids": "~Valerie_Chen2;~Nari_Johnson1;~Nicholay_Topin2;~Gregory_Plumb2;~Ameet_Talwalkar1", "gender": "F;F;;;M", "homepage": "https://valeriechen.github.io/;;;https://gdplumb.github.io;http://www.cs.cmu.edu/~atalwalk/", "dblp": "234/6033;302/3945;165/3324;;56/5528", "google_scholar": "94yn2j0AAAAJ;https://scholar.google.com/citations?hl=en;IiSg8R0AAAAJ;_f4rfHYAAAAJ;https://scholar.google.com.tw/citations?user=TW7U1W0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Valerie_Chen2;~Nari_Johnson1;~Nicholay_Topin2;~Gregory_Plumb2;~Ameet_Talwalkar1", "aff": "Microsoft Research;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "research.microsoft.com;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "Intern;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2022usecasegrounded,\ntitle={Use-Case-Grounded Simulations for Explanation Evaluation},\nauthor={Valerie Chen and Nari Johnson and Nicholay Topin and Gregory Plumb and Ameet Talwalkar},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=48Js-sP8wnv}\n}", "github": "", "project": "", "reviewers": "hGqz;hdF6;jL67;Diw4", "pdf_size": 764562, "rating": "5;6;6;8", "confidence": "4;3;4;3", "soundness": "2;3;3;4", "novelty": "1;3;2;4", "presentation": "3;3;4;4", "contribution": "1;3;2;4", "wc_summary": "87;66;113;35", "wc_strengths_and_weaknesses": "412;157;513;302", "wc_questions": "40;122;34;2", "wc_limitations": "11;9;120;1", "wc_review": "550;354;780;340", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "787;643;331;374", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.5, 0.5 ], "contribution_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 75.25, 28.586491565073178 ], "wc_strengths_and_weaknesses_avg": [ 346.0, 132.1949318241815 ], "wc_questions_avg": [ 49.5, 44.2803568187972 ], "wc_limitations_avg": [ 35.25, 49.07328703072579 ], "wc_review_avg": [ 506.0, 178.65609421455514 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 533.75, 188.87744042103068 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11874048066850318533&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "research.microsoft.com;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Microsoft;Carnegie Mellon University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cmu.edu", "aff_unique_abbr": "MSR;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Biological Learning of Irreducible Representations of Commuting Transformations", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54711", "id": "48TmED6BvGZ", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/834f4c0b8d241b4943a9dcb77fd85675-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=48TmED6BvGZ", "openreview": "https://openreview.net/forum?id=48TmED6BvGZ", "poster": "/media/PosterPDFs/NeurIPS%202022/54711.png?t=1669577014.9403508", "slides": "https://nips.cc/virtual/2022/poster/54711", "video": "https://nips.cc/virtual/2022/poster/54711", "author_site": "Alexander Genkin, David Lipshutz, Siavash Golkar, Tiberiu Tesileanu, Dmitri Chklovskii", "tldr": "Suggested biologically plausible algorithms that learn commutative groups of image transformations from data and detect their velocity. ", "abstract": "A longstanding challenge in neuroscience is to understand neural mechanisms underlying the brain\u2019s remarkable ability to learn and detect transformations of objects due to motion. Translations and rotations of images can be viewed as orthogonal transformations in the space of pixel intensity vectors. Every orthogonal transformation can be decomposed into rotations within irreducible two-dimensional subspaces (or representations). For sets of commuting transformations, known as toroidal groups, Cohen and Welling proposed a mathematical framework for learning the irreducible representations. We explore the possibility that the brain also learns irreducible representations using a biologically plausible learning mechanism. The first is based on SVD of the anti-symmetrized outer product of the vectors representing consecutive images and is implemented by a single-layer neural network. The second is based on PCA of the difference between consecutive frames and is implemented in a two-layer network but with greater biological plausibility. Both networks learn image rotations (replicating Cohen and Welling\u2019s results) as well as translations. It would be interesting to search for the proposed networks in nascent connectomics and physiology datasets.", "keywords": "learning;transformation;biologically plausible", "primary_area": "", "supplementary_material": "/attachment/40028585fa4a3fbeeb82f478c18af2ee51e97844.zip", "author": "Alexander Genkin;David Lipshutz;Siavash Golkar;Tiberiu Tesileanu;Dmitri Chklovskii", "authorids": "~Alexander_Genkin1;~David_Lipshutz1;~Siavash_Golkar1;~Tiberiu_Tesileanu1;~Dmitri_Chklovskii1", "gender": "M;M;;M;", "homepage": ";https://lipshutzlab.com;;http://www.ttesileanu.com;", "dblp": "86/2177;173/4650;222/3276;160/0174;06/2796", "google_scholar": "e8-OKAUAAAAJ;XeWdtXcAAAAJ;UzaZt7MAAAAJ;8--imZAAAAAJ;7Bgb5TUAAAAJ", "orcid": ";0000-0001-9347-8326;;0000-0003-3107-3088;", "linkedin": "alexgenkin/;;;ttesileanu/;", "or_profile": "~Alexander_Genkin1;~David_Lipshutz1;~Siavash_Golkar1;~Tiberiu_Tesileanu1;~Dmitri_Chklovskii1", "aff": "Neuroscience institute, NYU Langone Health;Flatiron Institute;Flatiron Institute;Flatiron Institute;Simons Foundation", "aff_domain": "nyulangone.org;flatironinstitute.org;flatironinstitute.org;flatironinstitute.org;simonsfoundation.org", "position": "Researcher;Associate Research Scientist;Associate Research Scientist;Associate Research Scientist;Group Leader", "bibtex": "@inproceedings{\ngenkin2022biological,\ntitle={Biological Learning of Irreducible Representations of Commuting Transformations},\nauthor={Alexander Genkin and David Lipshutz and Siavash Golkar and Tiberiu Tesileanu and Dmitri Chklovskii},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=48TmED6BvGZ}\n}", "github": "", "project": "", "reviewers": "nuRj;YACa;vnKM;pueb", "pdf_size": 1933186, "rating": "6;6;7;8", "confidence": "4;3;4;3", "soundness": "2;3;4;4", "novelty": "2;2;4;4", "presentation": "2;3;4;4", "contribution": "2;2;4;4", "wc_summary": "56;37;138;58", "wc_strengths_and_weaknesses": "222;91;200;108", "wc_questions": "129;100;2;16", "wc_limitations": "28;4;2;14", "wc_review": "435;232;342;196", "wc_reply_reviewers": "34;507;0;0", "wc_reply_authors": "859;1709;389;284", "reply_reviewers": "1;1;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 72.25, 38.8353897881816 ], "wc_strengths_and_weaknesses_avg": [ 155.25, 56.60995937112126 ], "wc_questions_avg": [ 61.75, 53.9646875280493 ], "wc_limitations_avg": [ 12.0, 10.295630140987 ], "wc_review_avg": [ 301.25, 94.1046624774777 ], "wc_reply_reviewers_avg": [ 135.25, 215.07832875489802 ], "wc_reply_authors_avg": [ 810.25, 562.2541129240408 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3880184203860837510&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "nyulangone.org;flatironinstitute.org;flatironinstitute.org;flatironinstitute.org;simonsfoundation.org", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "NYU Langone Health;Flatiron Institute;Simons Foundation", "aff_unique_dep": "Neuroscience institute;;", "aff_unique_url": "https://nyulangone.org;https://flatironinstitute.org;https://www.simonsfoundation.org", "aff_unique_abbr": "NYU Langone;Flatiron;Simons Foundation", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Layer Freezing & Data Sieving: Missing Pieces of a Generic Framework for Sparse Training", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53887", "id": "493VFz-ZvDD", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/794a425a2e47e05d29d30f79b79a692d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=493VFz-ZvDD", "openreview": "https://openreview.net/forum?id=493VFz-ZvDD", "poster": "/media/PosterPDFs/NeurIPS%202022/701d804549a4a23d3cae801dac6c2c75.png?t=1666419354.886178", "slides": "https://nips.cc/virtual/2022/poster/53887", "video": "https://nips.cc/virtual/2022/poster/53887", "author_site": "Geng Yuan, Yanyu Li, Sheng Li, Zhenglun Kong, Sergey Tulyakov, Xulong Tang, Yanzhi Wang, Jian Ren", "tldr": "", "abstract": "Recently, sparse training has emerged as a promising paradigm for efficient deep learning on edge devices. The current research mainly devotes the efforts to reducing training costs by further increasing model sparsity. However, increasing sparsity is not always ideal since it will inevitably introduce severe accuracy degradation at an extremely high sparsity level. This paper intends to explore other possible directions to effectively and efficiently reduce sparse training costs while preserving accuracy. To this end, we investigate two techniques, namely, layer freezing and data sieving. First, the layer freezing approach has shown its success in dense model training and fine-tuning, yet it has never been adopted in the sparse training domain. Nevertheless, the unique characteristics of sparse training may hinder the incorporation of layer freezing techniques. Therefore, we analyze the feasibility and potentiality of using the layer freezing technique in sparse training and find it has the potential to save considerable training costs. Second, we propose a data sieving method for dataset-efficient training, which further reduces training costs by ensuring only a partial dataset is used throughout the entire training process. We show that both techniques can be well incorporated into the sparse training algorithm to form a generic framework, which we dub SpFDE. Our extensive experiments demonstrate that SpFDE can significantly reduce training costs while preserving accuracy from three dimensions: weight sparsity, layer freezing, and dataset sieving. Our code and models will be released.", "keywords": "Sparse training;model compression;efficient training", "primary_area": "", "supplementary_material": "/attachment/4dba966c4b86cf283322d94a5fe272d1a2561c16.pdf", "author": "Geng Yuan;Yanyu Li;Sheng Li;Zhenglun Kong;Sergey Tulyakov;Xulong Tang;Yanzhi Wang;Jian Ren", "authorids": "~Geng_Yuan1;~Yanyu_Li1;~Sheng_Li16;~Zhenglun_Kong1;~Sergey_Tulyakov1;~Xulong_Tang1;~Yanzhi_Wang3;~Jian_Ren2", "gender": "M;;M;M;M;M;M;M", "homepage": ";;https://shengli99.github.io/;https://sites.google.com/husky.neu.edu/zlk/home?authuser=1;http://www.stulyakov.com/;http://xzt102.github.io/;https://web.northeastern.edu/yanzhiwang/;https://alanspike.github.io/", "dblp": "205/3007;194/5818;23/3439-19;211/6323;40/6115;66/10956;;59/2180-5", "google_scholar": "tBIAgtgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;XYa4NVYAAAAJ;mgzXR0sAAAAJ;jmjRUi4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ", "orcid": "0000-0001-9844-992X;;;0000-0002-8120-4456;;;;", "linkedin": ";;2702851b2;zhenglun-kong-35b527150/;sergeytulyakov/;;;", "or_profile": "~Geng_Yuan1;~Yanyu_Li1;~Sheng_Li16;~Zhenglun_Kong1;~Sergey_Tulyakov1;~Xulong_Tang1;~Yanzhi_Wang3;~Jian_Ren2", "aff": "Northeastern University;Northeastern University;University of Pittsburgh;Northeastern University;;University of Pittsburgh;Northeastern University;Snap Inc.", "aff_domain": "northeastern.edu;northeastern.edu;pitt.edu;northeastern.edu;;pitt.edu;northeastern.edu;snapchat.com", "position": "PhD student;PhD student;PhD student;PhD student;;Assistant Professor;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nyuan2022layer,\ntitle={Layer Freezing \\& Data Sieving: Missing Pieces of a Generic Framework for Sparse Training},\nauthor={Geng Yuan and Yanyu Li and Sheng Li and Zhenglun Kong and Sergey Tulyakov and Xulong Tang and Yanzhi Wang and Jian Ren},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=493VFz-ZvDD}\n}", "github": "", "project": "", "reviewers": "AURL;EhKj;y6sL;363A", "pdf_size": 3615919, "rating": "3;6;6;7", "confidence": "5;4;3;4", "soundness": "1;3;2;4", "novelty": "2;3;2;3", "presentation": "2;3;4;4", "contribution": "2;3;2;3", "wc_summary": "104;71;102;84", "wc_strengths_and_weaknesses": "468;165;23;139", "wc_questions": "365;62;120;84", "wc_limitations": "45;1;1;11", "wc_review": "982;299;246;318", "wc_reply_reviewers": "947;44;0;35", "wc_reply_authors": "5520;1329;338;900", "reply_reviewers": "6;1;0;1", "reply_authors": "14;4;2;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.25, 13.571569548139964 ], "wc_strengths_and_weaknesses_avg": [ 198.75, 164.3873094250283 ], "wc_questions_avg": [ 157.75, 121.43388118643001 ], "wc_limitations_avg": [ 14.5, 18.07622748252522 ], "wc_review_avg": [ 461.25, 301.8106815538509 ], "wc_reply_reviewers_avg": [ 256.5, 398.99906014926904 ], "wc_reply_authors_avg": [ 2021.75, 2050.0605326428777 ], "reply_reviewers_avg": [ 2.0, 2.345207879911715 ], "reply_authors_avg": [ 5.75, 4.815340071064556 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8941325294447745327&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "northeastern.edu;northeastern.edu;pitt.edu;northeastern.edu;;pitt.edu;northeastern.edu;snapchat.com", "author_num": 8, "aff_unique_index": "0;0;1;0;1;0;2", "aff_unique_norm": "Northeastern University;University of Pittsburgh;Snap Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://www.pitt.edu;https://www.snapinc.com", "aff_unique_abbr": "NEU;Pitt;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Robust Dynamics through Variational Sparse Gating", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53057", "id": "49TS-pwQWBa", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0a97df4ce5b403ea87645010e9005130-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=49TS-pwQWBa", "openreview": "https://openreview.net/forum?id=49TS-pwQWBa", "poster": "/media/PosterPDFs/NeurIPS%202022/53057.png?t=1669128619.6405094", "slides": "https://nips.cc/virtual/2022/poster/53057", "video": "https://nips.cc/virtual/2022/poster/53057", "author_site": "Arnav Kumar Jain, Shivakanth Sujit, Shruti Joshi, Vincent Michalski, Danijar Hafner, Samira Ebrahimi Kahou", "tldr": "", "abstract": "Learning world models from their sensory inputs enables agents to plan for actions by imagining their future outcomes. World models have previously been shown to improve sample-efficiency in simulated environments with few objects, but have not yet been applied successfully to environments with many objects. In environments with many objects, often only a small number of them are moving or interacting at the same time. In this paper, we investigate integrating this inductive bias of sparse interactions into the latent dynamics of world models trained from pixels. First, we introduce Variational Sparse Gating (VSG), a latent dynamics model that updates its feature dimensions sparsely through stochastic binary gates. Moreover, we propose a simplified architecture Simple Variational Sparse Gating (SVSG) that removes the deterministic pathway of previous models, resulting in a fully stochastic transition function that leverages the VSG mechanism. We evaluate the two model architectures in the BringBackShapes (BBS) environment that features a large number of moving objects and partial observability, demonstrating clear improvements over prior models.", "keywords": "Deep Reinforcement Learning;Model Based Reinforcement Learning;World Models", "primary_area": "", "supplementary_material": "/attachment/7d9e9848f02bf019dd245929815e5c91c506e37e.zip", "author": "Arnav Kumar Jain;Shiva Kanth Sujit;Shruti Joshi;Vincent Michalski;Danijar Hafner;Samira Ebrahimi Kahou", "authorids": "~Arnav_Kumar_Jain2;~Shiva_Kanth_Sujit1;~Shruti_Joshi1;~Vincent_Michalski1;~Danijar_Hafner1;~Samira_Ebrahimi_Kahou1", "gender": "M;;;;;F", "homepage": "https://arnavkj1995.github.io/;https://shivakanthsujit.github.io/;https://shrutij01.github.io/;https://vmichals.github.io/;https://danijar.com;https://saebrahimi.github.io", "dblp": "190/7826;320/2346;223/5692;131/6644;184/8088;20/11069", "google_scholar": "https://scholar.google.co.in/citations?user=tu7wKckAAAAJ;https://scholar.google.ca/citations?user=oXpK8V8AAAAJ;ypwj0KwAAAAJ;9BGzHdUAAAAJ;VINmGpYAAAAJ;https://scholar.google.ca/citations?user=F99FuaAAAAAJ", "orcid": ";0000-0002-1744-0841;;;0000-0002-9534-7271;", "linkedin": ";shivakanthsujit/;;vincent-michalski-87930a12b;;", "or_profile": "~Arnav_Kumar_Jain2;~Shiva_Kanth_Sujit1;~Shruti_Joshi1;~Vincent_Michalski1;~Danijar_Hafner1;~Samira_Ebrahimi_Kahou1", "aff": "Universit\u00e9 de Montr\u00e9al;\u00c9cole de technologie sup\u00e9rieure;Universit\u00e9 de Montr\u00e9al;University of Montreal;University of Toronto;\u00c9cole de technologie sup\u00e9rieure", "aff_domain": "umontreal.ca;etsmtl.ca;umontreal.ca;umontreal.ca;cs.toronto;etsmtl.ca", "position": "PhD student;MS student;MS student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\njain2022learning,\ntitle={Learning Robust Dynamics through Variational Sparse Gating},\nauthor={Arnav Kumar Jain and Shiva Kanth Sujit and Shruti Joshi and Vincent Michalski and Danijar Hafner and Samira Ebrahimi Kahou},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=49TS-pwQWBa}\n}", "github": "", "project": "", "reviewers": "TPk3;4uKx;ANCN;Dxye", "pdf_size": 2276606, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;2", "presentation": "3;3;4;3", "contribution": "2;3;2;2", "wc_summary": "64;112;121;75", "wc_strengths_and_weaknesses": "172;147;102;213", "wc_questions": "412;87;164;2", "wc_limitations": "12;45;9;1", "wc_review": "660;391;396;291", "wc_reply_reviewers": "0;236;0;0", "wc_reply_authors": "1190;1020;768;452", "reply_reviewers": "0;1;0;0", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "contribution_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 24.031229681395832 ], "wc_strengths_and_weaknesses_avg": [ 158.5, 40.23990556648959 ], "wc_questions_avg": [ 166.25, 153.0169516752964 ], "wc_limitations_avg": [ 16.75, 16.798437427332342 ], "wc_review_avg": [ 434.5, 136.76348196795809 ], "wc_reply_reviewers_avg": [ 59.0, 102.19099764656376 ], "wc_reply_authors_avg": [ 857.5, 278.1200280454466 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5582932369755688869&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 11, "email": "umontreal.ca;etsmtl.ca;umontreal.ca;umontreal.ca;cs.toronto;etsmtl.ca", "author_num": 6, "aff_unique_index": "0;1;0;2;3;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;\u00c9cole de technologie sup\u00e9rieure;University of Montreal;University of Toronto", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.umontreal.ca;https://www.etsmtl.ca;https://wwwumontreal.ca;https://www.utoronto.ca", "aff_unique_abbr": "UdeM;ETS;UM;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Learning dynamics of deep linear networks with multiple pathways", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53004", "id": "4B7azgAbzda", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/dc3ca8bcd613e43ce540352b58d55d6d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4B7azgAbzda", "openreview": "https://openreview.net/forum?id=4B7azgAbzda", "poster": "/media/PosterPDFs/NeurIPS%202022/53004.png?t=1669620884.6923668", "slides": "https://nips.cc/virtual/2022/poster/53004", "video": "https://nips.cc/virtual/2022/poster/53004", "author_site": "Jianghong Shi, Eric Shea-Brown, Michael Buice", "tldr": "We show in the large width, large depth limit that deep linear networks will concentrate features of the training set (defined by singular values) in separate pathways.", "abstract": " Not only have deep networks become standard in machine learning, they are increasingly of interest in neuroscience as models of cortical computation that capture relationships between structural and functional properties. In addition they are a useful target of theoretical research into the properties of network computation. Deep networks typically have a serial or approximately serial organization across layers, and this is often mirrored in models that purport to represent computation in mammalian brains. There are, however, multiple examples of parallel pathways in mammalian brains. In some cases, such as the mouse, the entire visual system appears arranged in a largely parallel, rather than serial fashion. While these pathways may be formed by differing cost functions that drive different computations, here we present a new mathematical analysis of learning dynamics in networks that have parallel computational pathways driven by the same cost function. We use the approximation of deep linear networks with large hidden layer sizes to show that, as the depth of the parallel pathways increases, different features of the training set (defined by the singular values of the input-output correlation) will typically concentrate in one of the pathways. This result is derived analytically and demonstrated with numerical simulation. Thus, rather than sharing stimulus and task features across multiple pathways, parallel network architectures learn to produce sharply diversified representations with specialized and specific pathways, a mechanism which may hold important consequences for codes in both biological and artificial systems.", "keywords": "deep networks;linear networks;dynamical systems;theory;parallel pathways", "primary_area": "", "supplementary_material": "", "author": "Jianghong Shi;Eric Todd SheaBrown;Michael A Buice", "authorids": "jhshi@uw.edu;~Eric_Todd_SheaBrown1;~Michael_A_Buice1", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "jhshi@uw.edu;~Eric_Todd_SheaBrown1;~Michael_A_Buice1", "aff": ";;Allen Institute", "aff_domain": ";;alleninstitute.org", "position": ";;Associate Investigator", "bibtex": "@inproceedings{\nshi2022learning,\ntitle={Learning dynamics of deep linear networks with multiple pathways},\nauthor={Jianghong Shi and Eric Todd SheaBrown and Michael A Buice},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4B7azgAbzda}\n}", "github": "", "project": "", "reviewers": "pqBi;8s1V;ykV7", "pdf_size": 5981104, "rating": "5;6;8", "confidence": "2;3;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "contribution": "2;3;3", "wc_summary": "47;65;136", "wc_strengths_and_weaknesses": "93;93;184", "wc_questions": "44;9;113", "wc_limitations": "8;11;39", "wc_review": "192;178;472", "wc_reply_reviewers": "0;62;0", "wc_reply_authors": "864;1065;423", "reply_reviewers": "0;1;0", "reply_authors": "3;3;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.66666666666667, 38.42163742245016 ], "wc_strengths_and_weaknesses_avg": [ 123.33333333333333, 42.89781139198388 ], "wc_questions_avg": [ 55.333333333333336, 43.2075096353503 ], "wc_limitations_avg": [ 19.333333333333332, 13.960261060914616 ], "wc_review_avg": [ 280.6666666666667, 135.41376919977114 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 29.227080289043965 ], "wc_reply_authors_avg": [ 784.0, 268.13056521030944 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1858961490157353003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";;alleninstitute.org", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Allen Institute for Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://allenai.org", "aff_unique_abbr": "AI2", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "On the Efficient Implementation of High Accuracy Optimality of Profile Maximum Likelihood", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54001", "id": "4BoN6bk-FEz", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/2a8ce71baac4c89bf9ff479d8240c7d9-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4BoN6bk-FEz", "openreview": "https://openreview.net/forum?id=4BoN6bk-FEz", "poster": "/media/PosterPDFs/NeurIPS%202022/4d0b954f0bef437c29dfa73fafdf3fa5.png?t=1667455505.0921104", "slides": "https://nips.cc/virtual/2022/poster/54001", "video": "https://nips.cc/virtual/2022/poster/54001", "author_site": "Moses Charikar, Zhihao Jiang, Kirankumar Shiragur, Aaron Sidford", "tldr": "", "abstract": "We provide an efficient unified plug-in approach for estimating symmetric properties of distributions given $n$ independent samples. Our estimator is based on profile-maximum-likelihood (PML) and is sample optimal for estimating various symmetric properties when the estimation error $\\epsilon \\gg n^{-1/3}$. This result improves upon the previous best accuracy threshold of $\\epsilon \\gg n^{-1/4}$ achievable by polynomial time computable PML-based universal estimators \\cite{ACSS20, ACSS20b}. Our estimator reaches a theoretical limit for universal symmetric property estimation as \\cite{Han20} shows that a broad class of universal estimators (containing many well known approaches including ours) cannot be sample optimal for every $1$-Lipschitz property when $\\epsilon \\ll n^{-1/3}$.", "keywords": "property estimation;symmetric property estimation;profile maximum likelihood", "primary_area": "", "supplementary_material": "/attachment/07b4f239ae0562b3d3cc872d5b55f0cd1bb7b469.pdf", "author": "Moses Charikar;Zhihao Jiang;Kirankumar Shiragur;Aaron Sidford", "authorids": "~Moses_Charikar1;~Zhihao_Jiang1;~Kirankumar_Shiragur1;~Aaron_Sidford1", "gender": "M;M;M;", "homepage": "https://profiles.stanford.edu/moses-charikar;https://sites.google.com/view/zhihaojiang/home;https://sites.google.com/view/kiran-shiragur;", "dblp": "https://dblp.uni-trier.de/pers/hd/c/Charikar:Moses;;;", "google_scholar": "zX3ba1kAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Moses_Charikar1;~Zhihao_Jiang1;~Kirankumar_Shiragur1;~Aaron_Sidford1", "aff": "Stanford University;Stanford University;Stanford University;", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;", "position": "Full Professor;PhD student;PhD student;", "bibtex": "@inproceedings{\ncharikar2022on,\ntitle={On the Efficient Implementation of High Accuracy Optimality of Profile Maximum Likelihood},\nauthor={Moses Charikar and Zhihao Jiang and Kirankumar Shiragur and Aaron Sidford},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4BoN6bk-FEz}\n}", "github": "", "project": "", "reviewers": "w8eP;BTch;qCnJ", "pdf_size": 325117, "rating": "5;5;8", "confidence": "2;4;4", "soundness": "2;3;4", "novelty": "3;2;4", "presentation": "1;3;3", "contribution": "3;2;4", "wc_summary": "181;30;1099", "wc_strengths_and_weaknesses": "119;236;53", "wc_questions": "48;17;66", "wc_limitations": "8;51;1", "wc_review": "356;334;1219", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "708;481;284", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 436.6666666666667, 472.3800259207504 ], "wc_strengths_and_weaknesses_avg": [ 136.0, 75.67033764957046 ], "wc_questions_avg": [ 43.666666666666664, 20.237478982214054 ], "wc_limitations_avg": [ 20.0, 22.105806175452337 ], "wc_review_avg": [ 636.3333333333334, 412.105434189952 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 491.0, 173.24164241505756 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5000000000000001, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2813886643512901562&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "stanford.edu;stanford.edu;stanford.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Error Correction Code Transformer", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/55163", "id": "4F0Pd2Wjl0", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/fcd3909db30887ce1da519c4468db668-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4F0Pd2Wjl0", "openreview": "https://openreview.net/forum?id=4F0Pd2Wjl0", "poster": "/media/PosterPDFs/NeurIPS%202022/5c50b4df4b176845cd235b6a510c6903.png?t=1666187264.5892208", "slides": "https://nips.cc/virtual/2022/poster/55163", "video": "https://nips.cc/virtual/2022/poster/55163", "author_site": "Yoni Choukroun, Lior Wolf", "tldr": "We propose a novel SOTA Neural error correction decoder based on Transformers. ", "abstract": "Error correction code is a major part of the physical communication layer, ensuring the reliable transfer of data over noisy channels.\nRecently, neural decoders were shown to outperform classical decoding techniques.\nHowever, the existing neural approaches present strong overfitting, due to the exponential training complexity, or a restrictive inductive bias, due to reliance on Belief Propagation.\nRecently, Transformers have become methods of choice in many applications, thanks to their ability to represent complex interactions between elements.\nIn this work, we propose to extend for the first time the Transformer architecture to the soft decoding of linear codes at arbitrary block lengths.\nWe encode each channel's output dimension to a high dimension for a better representation of the bits' information to be processed separately.\nThe element-wise processing allows the analysis of channel output reliability, while the algebraic code and the interaction between the bits are inserted into the model via an adapted masked self-attention module.\nThe proposed approach demonstrates the power and flexibility of Transformers and outperforms existing state-of-the-art neural decoders by large margins, at a fraction of their time complexity.", "keywords": "ECC;Deep Learning;Transformers", "primary_area": "", "supplementary_material": "/attachment/636fae5b78748a87ef4db785f9735649e0549391.pdf", "author": "Yoni Choukroun;Lior Wolf", "authorids": "~Yoni_Choukroun1;~Lior_Wolf1", "gender": "M;M", "homepage": "https://yonilc.github.io/;http://www.cs.tau.ac.il/~wolf", "dblp": "186/8305;83/4103", "google_scholar": "https://scholar.google.co.il/citations?user=gjo4ebcAAAAJ;UbFrXTsAAAAJ", "orcid": ";0000-0001-5578-8892", "linkedin": ";", "or_profile": "~Yoni_Choukroun1;~Lior_Wolf1", "aff": "School of Computer Science, Tel Aviv University;Tel Aviv University", "aff_domain": "cs.tau.ac.il;tau.ac.il", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchoukroun2022error,\ntitle={Error Correction Code Transformer},\nauthor={Yoni Choukroun and Lior Wolf},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4F0Pd2Wjl0}\n}", "github": "", "project": "", "reviewers": "FPkL;MQDw;Nt2o;AayN", "pdf_size": 748961, "rating": "6;7;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "contribution": "2;3;3;3", "wc_summary": "29;67;148;103", "wc_strengths_and_weaknesses": "32;177;330;213", "wc_questions": "126;85;50;88", "wc_limitations": "51;2;13;3", "wc_review": "238;331;541;407", "wc_reply_reviewers": "0;18;0;0", "wc_reply_authors": "295;250;325;275", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "contribution_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.75, 43.9907660765302 ], "wc_strengths_and_weaknesses_avg": [ 188.0, 106.35553582207181 ], "wc_questions_avg": [ 87.25, 26.901440481877547 ], "wc_limitations_avg": [ 17.25, 19.954636052807377 ], "wc_review_avg": [ 379.25, 110.91973449301075 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 286.25, 27.471576219794887 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=903759423999065870&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cs.tau.ac.il;tau.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "0", "aff_campus_unique": "Tel Aviv;", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Verification and search algorithms for causal DAGs", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54621", "id": "4F7vp67j79I", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/5340b0c0b76dc0115f5cc91c20c1251d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4F7vp67j79I", "openreview": "https://openreview.net/forum?id=4F7vp67j79I", "poster": "/media/PosterPDFs/NeurIPS%202022/8133415ea4647b6345849fb38311cf32.png?t=1667624832.7033725", "slides": "https://nips.cc/virtual/2022/poster/54621", "video": "https://nips.cc/virtual/2022/poster/54621", "author_site": "Davin Choo, Kirankumar Shiragur, Arnab Bhattacharyya", "tldr": "We provide (near)-optimal algorithms for two fundamental problems in causal inference: verification and search.", "abstract": "We study two problems related to recovering causal graphs from interventional data: (i) $\\textit{verification}$, where the task is to check if a purported causal graph is correct, and (ii) $\\textit{search}$, where the task is to recover the correct causal graph. For both, we wish to minimize the number of interventions performed. For the first problem, we give a characterization of a minimal sized set of atomic interventions that is necessary and sufficient to check the correctness of a claimed causal graph. Our characterization uses the notion of $\\textit{covered edges}$, which enables us to obtain simple proofs and also easily reason about earlier known results. We also generalize our results to the settings of bounded size interventions and node-dependent interventional costs. For all the above settings, we provide the first known provable algorithms for efficiently computing (near)-optimal verifying sets on general graphs. For the second problem, we give a simple adaptive algorithm based on graph separators that produces an atomic intervention set which fully orients any essential graph while using $\\mathcal{O}(\\log n)$ times the optimal number of interventions needed to $\\textit{verify}$ (verifying size) the underlying DAG on $n$ vertices. This approximation is tight as $\\textit{any}$ search algorithm on an essential line graph has worst case approximation ratio of $\\Omega(\\log n)$ with respect to the verifying size. With bounded size interventions, each of size $\\leq k$, our algorithm gives an $\\mathcal{O}(\\log n \\cdot \\log k)$ factor approximation. Our result is the first known algorithm that gives a non-trivial approximation guarantee to the verifying size on general unweighted graphs and with bounded size interventions.", "keywords": "Causality;Causal Inference;Active Structure Learning;Interventions", "primary_area": "", "supplementary_material": "/attachment/c3afcf3896535582562e1e6a5faba8e49316ebf6.pdf", "author": "Davin Choo;Kirankumar Shiragur;Arnab Bhattacharyya", "authorids": "~Davin_Choo1;~Kirankumar_Shiragur1;~Arnab_Bhattacharyya1", "gender": ";M;M", "homepage": "http://davinchoo.com/;https://sites.google.com/view/kiran-shiragur;https://warwick.ac.uk/fac/sci/dcs/people/arnab_bhattacharyya/", "dblp": "230/4363.html;;64/574.html", "google_scholar": "cPtzhPsAAAAJ;;eECXWqUAAAAJ", "orcid": "0000-0002-4545-7341;;", "linkedin": ";;", "or_profile": "~Davin_Choo1;~Kirankumar_Shiragur1;~Arnab_Bhattacharyya1", "aff": "National University of Singapore;Stanford University;National University of Singapore", "aff_domain": "u.nus.edu;stanford.edu;nus.edu.sg", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchoo2022verification,\ntitle={Verification and search algorithms for causal {DAG}s},\nauthor={Davin Choo and Kirankumar Shiragur and Arnab Bhattacharyya},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4F7vp67j79I}\n}", "github": "", "project": "", "reviewers": "A2Kf;ua5U;vQyV", "pdf_size": 734127, "rating": "5;5;7", "confidence": "4;2;2", "soundness": "3;2;4", "novelty": "2;2;3", "presentation": "3;3;3", "contribution": "2;2;3", "wc_summary": "83;193;66", "wc_strengths_and_weaknesses": "240;101;257", "wc_questions": "52;145;19", "wc_limitations": "27;15;12", "wc_review": "402;454;354", "wc_reply_reviewers": "466;0;0", "wc_reply_authors": "2169;1275;971", "reply_reviewers": "2;0;0", "reply_authors": "5;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 114.0, 56.290911048469155 ], "wc_strengths_and_weaknesses_avg": [ 199.33333333333334, 69.87767088912516 ], "wc_questions_avg": [ 72.0, 53.34791467339656 ], "wc_limitations_avg": [ 18.0, 6.48074069840786 ], "wc_review_avg": [ 403.3333333333333, 40.83571421630281 ], "wc_reply_reviewers_avg": [ 155.33333333333334, 219.67450668862074 ], "wc_reply_authors_avg": [ 1471.6666666666667, 508.4678510540815 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5973326212150461189&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "u.nus.edu;stanford.edu;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.stanford.edu", "aff_unique_abbr": "NUS;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Sharp Analysis of Stochastic Optimization under Global Kurdyka-Lojasiewicz Inequality", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53584", "id": "4FSfANJp8Qx", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/65ae674df2fb642518ae8d2b5435e1b8-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4FSfANJp8Qx", "openreview": "https://openreview.net/forum?id=4FSfANJp8Qx", "poster": "/media/PosterPDFs/NeurIPS%202022/53584.png?t=1669486531.5598605", "slides": "https://nips.cc/virtual/2022/poster/53584", "video": "https://nips.cc/virtual/2022/poster/53584", "author_site": "Ilyas Fatkhullin, Jalal Etesami, Niao He, Negar Kiyavash", "tldr": "", "abstract": "We study the complexity of finding the global solution to stochastic nonconvex optimization when the objective function satisfies global Kurdyka-{\\L}ojasiewicz (KL) inequality and the queries from stochastic gradient oracles satisfy mild expected smoothness assumption. We first introduce a general framework to analyze Stochastic Gradient Descent (SGD) and its associated nonlinear dynamics under the setting. As a byproduct of our analysis, we obtain a sample complexity of $\\mathcal{O}(\\epsilon^{-(4-\\alpha)/\\alpha})$ for SGD when the objective satisfies the so called $\\alpha$-P{\\L} condition, where $\\alpha$ is the degree of gradient domination. Furthermore, we show that a modified SGD with variance reduction and restarting (PAGER) achieves an improved sample complexity of $\\mathcal{O}(\\epsilon^{-2/\\alpha})$ when the objective satisfies the average smoothness assumption. This leads to the first optimal algorithm for the important case of $\\alpha=1$ which appears in applications such as policy optimization in reinforcement learning. ", "keywords": "stochastic optimization;nonconvex optimization;first order method;Kurdyka-Lojasiewicz condition;variance reduction", "primary_area": "", "supplementary_material": "/attachment/f40211f5cd8cdc5f480506f983fe3e3864d6577b.pdf", "author": "Ilyas Fatkhullin;Jalal Etesami;Niao He;Negar Kiyavash", "authorids": "~Ilyas_Fatkhullin1;~Jalal_Etesami2;~Niao_He3;~Negar_Kiyavash1", "gender": "Not Specified;M;F;", "homepage": "https://ai.ethz.ch/people/ilyas-fatkhullin.html;https://www.cs.cit.tum.de/en/dss/members/prof-jalal-etesami/;https://people.epfl.ch/negar.kiyavash?lang=en;http://people.inf.ethz.ch/niaohe", "dblp": "294/8711;76/10800;85/4976;https://dblp.uni-trier.de/pers/h/He:Niao.html", "google_scholar": "UCOWHb4AAAAJ;3Usg1G0AAAAJ;7tBDvOwAAAAJ;iNcA81MAAAAJ", "orcid": ";;0000-0002-8545-7709;", "linkedin": ";;;", "or_profile": "~Ilyas_Fatkhullin1;~Jalal_Etesami2;~Negar_Kiyavash1;~Niao_He1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;epfl.ch;epfl.ch;ethz.ch", "position": "PhD student;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nfatkhullin2022sharp,\ntitle={Sharp Analysis of Stochastic Optimization under Global Kurdyka-Lojasiewicz Inequality},\nauthor={Ilyas Fatkhullin and Jalal Etesami and Niao He and Negar Kiyavash},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4FSfANJp8Qx}\n}", "github": "", "project": "", "reviewers": "FAsV;ekoT;cXuP;ws9Y", "pdf_size": 493337, "rating": "4;5;6;7", "confidence": "3;5;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;4;2;3", "contribution": "2;3;2;3", "wc_summary": "81;76;62;122", "wc_strengths_and_weaknesses": "461;115;194;253", "wc_questions": "231;192;5;114", "wc_limitations": "26;1;5;14", "wc_review": "799;384;266;503", "wc_reply_reviewers": "116;0;0;0", "wc_reply_authors": "2821;1131;773;605", "reply_reviewers": "2;0;0;0", "reply_authors": "10;6;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.25, 22.331312097590683 ], "wc_strengths_and_weaknesses_avg": [ 255.75, 128.21734477051066 ], "wc_questions_avg": [ 135.5, 86.32062326002982 ], "wc_limitations_avg": [ 11.5, 9.604686356149273 ], "wc_review_avg": [ 488.0, 198.14514881772908 ], "wc_reply_reviewers_avg": [ 29.0, 50.22947341949744 ], "wc_reply_authors_avg": [ 1332.5, 880.1322343829931 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 5.25, 3.112474899497183 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1859659750569595263&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ethz.ch;epfl.ch;epfl.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.epfl.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;EPFL;ETH Zurich", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Generating Training Data with Language Models: Towards Zero-Shot Language Understanding", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52982", "id": "4G1Sfp_1sz7", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/0346c148ba1c21c6b4780a961ea141dc-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4G1Sfp_1sz7", "openreview": "https://openreview.net/forum?id=4G1Sfp_1sz7", "poster": "/media/PosterPDFs/NeurIPS%202022/b92f8ab7a47e21fb033068842d6663b0.png?t=1667439586.0680916", "slides": "https://nips.cc/virtual/2022/poster/52982", "video": "https://nips.cc/virtual/2022/poster/52982", "author_site": "Yu Meng, Jiaxin Huang, Yu Zhang, Jiawei Han", "tldr": "We propose SuperGen, a supervision generation approach for zero-shot natural language understanding", "abstract": "Pretrained language models (PLMs) have demonstrated remarkable performance in various natural language processing tasks: Unidirectional PLMs (e.g., GPT) are well known for their superior text generation capabilities; bidirectional PLMs (e.g., BERT) have been the prominent choice for natural language understanding (NLU) tasks. While both types of models have achieved promising few-shot learning performance, their potential for zero-shot learning has been underexplored. In this paper, we present a simple approach that uses both types of PLMs for fully zero-shot learning of NLU tasks without requiring any task-specific data: A unidirectional PLM generates class-conditioned texts guided by prompts, which are used as the training data for fine-tuning a bidirectional PLM. With quality training data selected based on the generation probability and regularization techniques (label smoothing and temporal ensembling) applied to the fine-tuning stage for better generalization and stability, our approach demonstrates strong performance across seven classification tasks of the GLUE benchmark (e.g., 72.3/73.8 on MNLI-m/mm and 92.8 on SST-2), significantly outperforming zero-shot prompting methods and achieving even comparable results to strong few-shot approaches using 32 training samples per class.", "keywords": "Zero-Shot Learning;Natural Language Understanding;Pretrained Language Models", "primary_area": "", "supplementary_material": "/attachment/93137285db75acd3fb0914c5c55666cfa2eb4ea1.pdf", "author": "Yu Meng;Jiaxin Huang;Yu Zhang;Jiawei Han", "authorids": "~Yu_Meng1;~Jiaxin_Huang1;~Yu_Zhang26;~Jiawei_Han1", "gender": "M;F;M;M", "homepage": "https://yumeng5.github.io/;https://teapot123.github.io/;https://yuzhimanhua.github.io/;http://hanj.cs.illinois.edu/", "dblp": "30/4233-1;187/2874-1;50/671-44;h/JiaweiHan.html", "google_scholar": "S2-yZKcAAAAJ;DnxrVXgAAAAJ;N0PrmgIAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "orcid": "0000-0003-2554-2888;;0000-0003-0540-6758;0000-0002-3629-2696", "linkedin": ";;;", "or_profile": "~Yu_Meng1;~Jiaxin_Huang1;~Yu_Zhang26;~Jiawei_Han1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmeng2022generating,\ntitle={Generating Training Data with Language Models: Towards Zero-Shot Language Understanding},\nauthor={Yu Meng and Jiaxin Huang and Yu Zhang and Jiawei Han},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4G1Sfp_1sz7}\n}", "github": "", "project": "", "reviewers": "XfNA;V4Sv;kf6s", "pdf_size": 561631, "rating": "5;6;6", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "3;3;4", "presentation": "3;3;3", "contribution": "3;3;4", "wc_summary": "62;79;75", "wc_strengths_and_weaknesses": "258;140;187", "wc_questions": "33;77;29", "wc_limitations": "30;1;27", "wc_review": "383;297;318", "wc_reply_reviewers": "160;20;255", "wc_reply_authors": "1121;604;438", "reply_reviewers": "2;1;2", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "contribution_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 7.2571803523590805 ], "wc_strengths_and_weaknesses_avg": [ 195.0, 48.5042953424402 ], "wc_questions_avg": [ 46.333333333333336, 21.746008573733455 ], "wc_limitations_avg": [ 19.333333333333332, 13.02134998974974 ], "wc_review_avg": [ 332.6666666666667, 36.609045633862436 ], "wc_reply_reviewers_avg": [ 145.0, 96.52288157046839 ], "wc_reply_authors_avg": [ 721.0, 290.8481849121061 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 245, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14481752723663721801&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 10, "email": "illinois.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Non-Stationary Bandits under Recharging Payoffs: Improved Planning with Sublinear Regret", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53636", "id": "4JYq_Kw4zw", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/7fccdff3f1457cb7b846596c76c23abd-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4JYq_Kw4zw", "openreview": "https://openreview.net/forum?id=4JYq_Kw4zw", "poster": "/media/PosterPDFs/NeurIPS%202022/f69543a0f1b1c844dbd3eeee30ea0404.png?t=1666243583.7075527", "slides": "https://nips.cc/virtual/2022/poster/53636", "video": "https://nips.cc/virtual/2022/poster/53636", "author_site": "Orestis Papadigenopoulos, Constantine Caramanis, Sanjay Shakkottai", "tldr": "", "abstract": "The stochastic multi-armed bandit setting has been recently studied in the non-stationary regime, where the mean payoff of each action is a non-decreasing function of the number of rounds passed since it was last played. This model captures natural behavioral aspects of the users which crucially determine the performance of recommendation platforms, ad placement systems, and more. Even assuming prior knowledge of the mean payoff functions, computing an optimal planning in the above model is NP-hard, while the state-of-the-art is a $1/4$-approximation algorithm for the case where at most one arm can be played per round. We first focus on the setting where the mean payoff functions are known. In this setting, we significantly improve the best-known guarantees for the planning problem by developing a polynomial-time $(1-{1}/{e})$-approximation algorithm (asymptotically and in expectation), based on a novel combination of randomized LP rounding and a time-correlated (interleaved) scheduling method. Furthermore, our algorithm achieves improved guarantees -- compared to prior work -- for the case where more than one arms can be played at each round. Moving to the bandit setting, when the mean payoff functions are initially unknown, we show how our algorithm can be transformed into a bandit algorithm with sublinear regret.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/14380b052b02fde706ad28ba3e469623ae61daba.pdf", "author": "Orestis Papadigenopoulos;Constantine Caramanis;Sanjay Shakkottai", "authorids": "~Orestis_Papadigenopoulos1;~Constantine_Caramanis1;~Sanjay_Shakkottai1", "gender": "M;M;M", "homepage": "http://www.columbia.edu/~vp2499/;http://users.ece.utexas.edu/~cmcaram/constantine_caramanis/Home.html;https://sites.google.com/view/sanjay-shakkottai/", "dblp": "163/9826.html;96/5760;61/4596", "google_scholar": "86YS2vQAAAAJ;47YTUrEAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Orestis_Papadigenopoulos1;~Constantine_Caramanis1;~Sanjay_Shakkottai1", "aff": "Columbia University;University of Texas, Austin;University of Texas at Austin", "aff_domain": "columbia.edu;utexas.edu;utexas.edu", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\npapadigenopoulos2022nonstationary,\ntitle={Non-Stationary Bandits under Recharging Payoffs: Improved Planning with Sublinear Regret},\nauthor={Orestis Papadigenopoulos and Constantine Caramanis and Sanjay Shakkottai},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4JYq_Kw4zw}\n}", "github": "", "project": "", "reviewers": "uVwc;3yso;Y2yC", "pdf_size": 583581, "rating": "6;7;7", "confidence": "2;5;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "1;3;3", "contribution": "3;3;3", "wc_summary": "102;29;119", "wc_strengths_and_weaknesses": "120;26;143", "wc_questions": "16;61;49", "wc_limitations": "14;2;6", "wc_review": "252;118;317", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "251;549;150", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.33333333333333, 39.041288685470185 ], "wc_strengths_and_weaknesses_avg": [ 96.33333333333333, 50.61181241128948 ], "wc_questions_avg": [ 42.0, 19.026297590440446 ], "wc_limitations_avg": [ 7.333333333333333, 4.988876515698588 ], "wc_review_avg": [ 229.0, 82.85328398239062 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 316.6666666666667, 169.37991485284067 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7559289460184542, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4215882717566734433&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "columbia.edu;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Columbia University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.utexas.edu", "aff_unique_abbr": "Columbia;UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CARD: Classification and Regression Diffusion Models", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/52972", "id": "4L2zYEJ9d_", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/72dad95a24fae750f8ab1cb3dab5e58d-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4L2zYEJ9d_", "openreview": "https://openreview.net/forum?id=4L2zYEJ9d_", "poster": "/media/PosterPDFs/NeurIPS%202022/52972.png?t=1670365810.339474", "slides": "https://nips.cc/virtual/2022/poster/52972", "video": "https://nips.cc/virtual/2022/poster/52972", "author_site": "Xizewen Han, Huangjie Zheng, Mingyuan Zhou", "tldr": "", "abstract": "Learning the distribution of a continuous or categorical response variable y given its covariates x is a fundamental problem in statistics and machine learning. Deep neural network-based supervised learning algorithms have made great progress in predicting the mean of y given x, but they are often criticized for their ability to accurately capture the uncertainty of their predictions. In this paper, we introduce classification and regression diffusion (CARD) models, which combine a denoising diffusion-based conditional generative model and a pre-trained conditional mean estimator, to accurately predict the distribution of y given x. We demonstrate the outstanding ability of CARD in conditional distribution prediction with both toy examples and real-world datasets, the experimental results on which show that CARD, in general, outperforms state-of-the-art methods, including Bayesian neural network-based one, designed for uncertainty estimation, especially when the conditional distribution of y given x is multi-modal. In addition, we utilize the stochastic nature of the generative model outputs to obtain a finer granularity in model confidence assessment at the instance level for classification tasks.", "keywords": "", "primary_area": "", "supplementary_material": "/attachment/8e05acb79c8fa3f245ec5fa0135a3c04cbf5924e.pdf", "author": "Xizewen Han;Huangjie Zheng;Mingyuan Zhou", "authorids": "~Xizewen_Han1;~Huangjie_Zheng1;~Mingyuan_Zhou1", "gender": ";M;M", "homepage": ";;http://mingyuanzhou.github.io", "dblp": ";192/2170;", "google_scholar": ";Vl5wCXsAAAAJ;LXwCIisAAAAJ", "orcid": ";0000-0003-0508-5034;", "linkedin": ";;", "or_profile": "~Xizewen_Han1;~Huangjie_Zheng1;~Mingyuan_Zhou1", "aff": ";University of Texas, Austin;The University of Texas at Austin", "aff_domain": ";utexas.edu;utexas.edu", "position": ";PhD student;Associate Professor", "bibtex": "@inproceedings{\nhan2022card,\ntitle={{CARD}: Classification and Regression Diffusion Models},\nauthor={Xizewen Han and Huangjie Zheng and Mingyuan Zhou},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4L2zYEJ9d_}\n}", "github": "", "project": "", "reviewers": "8bC3;VuVw;fUR1;RrLR", "pdf_size": 982517, "rating": "5;5;5;6", "confidence": "3;4;3;3", "soundness": "2;3;2;3", "novelty": "3;2;2;3", "presentation": "2;2;2;3", "contribution": "3;2;2;3", "wc_summary": "53;71;68;70", "wc_strengths_and_weaknesses": "288;219;148;122", "wc_questions": "0;154;127;310", "wc_limitations": "0;73;14;41", "wc_review": "341;517;357;543", "wc_reply_reviewers": "0;54;18;24", "wc_reply_authors": "697;1039;609;801", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "contribution_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.5, 7.297259759663212 ], "wc_strengths_and_weaknesses_avg": [ 194.25, 64.73165763364939 ], "wc_questions_avg": [ 147.75, 110.25510192276818 ], "wc_limitations_avg": [ 32.0, 27.883686987197372 ], "wc_review_avg": [ 439.5, 91.14137370042214 ], "wc_reply_reviewers_avg": [ 24.0, 19.44222209522358 ], "wc_reply_authors_avg": [ 786.5, 160.84386839416663 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 141, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13161498921981862309&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": ";utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Algorithms and Hardness for Learning Linear Thresholds from Label Proportions", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/53052", "id": "4LZo68TuF-4", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/08a9e28c96d016dd63903ab51cd085b0-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4LZo68TuF-4", "openreview": "https://openreview.net/forum?id=4LZo68TuF-4", "poster": "/media/PosterPDFs/NeurIPS%202022/53052.png?t=1669713679.4802215", "slides": "https://nips.cc/virtual/2022/poster/53052", "video": "https://nips.cc/virtual/2022/poster/53052", "tldr": "This work provides algorithmic and hardness results for learning linear thresholds from label proportions for bag size >= 3.", "abstract": "We study the learnability of linear threshold functions (LTFs) in the learning from label proportions (LLP) framework. In this, the feature-vector classifier is learnt from bags of feature-vectors and their corresponding observed label proportions which are satisfied by (i.e., consistent with) some unknown LTF. This problem has been investigated in recent work (Saket21) which gave an algorithm to produce an LTF that satisfies at least $(2/5)$-fraction of a satisfiable collection of bags, each of size $\\leq 2$, by solving and rounding a natural SDP relaxation. However, this SDP relaxation is specific to at most $2$-sized bags and does not apply to bags of larger size. \n \nIn this work we provide a fairly non-trivial SDP relaxation of a non-quadratic formulation for bags of size $3$. We analyze its rounding procedure using novel matrix decomposition techniques to obtain an algorithm which outputs an LTF satisfying at least $(1/12)$-fraction of the bags of size $\\leq 3$. We also apply our techniques to bags of size $q \\geq 4$ to provide a $\\Omega\\left(1/q\\right)$-approximation guarantee for a weaker notion of satisfiability. We include comparative experiments on simulated data demonstrating the applicability of our algorithmic techniques.\n \nFrom the complexity side we provide a hardness reduction to produce instances with bags of any constant size $q$. Our reduction proves the NP-hardness of satisfying more than $({1}/{q}) + o(1)$ fraction of a satisfiable collection of such bags using as hypothesis any function of constantly many LTFs, showing thereby that the problem is harder to approximate as the bag size $q$ increases. Using a strengthened analysis, for $q=2$ we obtain a $({4}/{9}) +o(1)$ hardness factor for this problem, improving upon the $({1}/{2}) + o(1)$ factor shown by Saket21.\n", "keywords": "learning from label proportions;linear thresholds;algorithm;hardness", "primary_area": "", "supplementary_material": "/attachment/fb4687a4bfbca0cc229cbe6fe11da9f9eb2acb2b.pdf", "author": "Rishi Saket", "authorids": "~Rishi_Saket1", "gender": "M", "homepage": "", "dblp": "73/3493", "google_scholar": "Gl4bKJgAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Rishi_Saket1", "aff": "Google", "aff_domain": "google.com", "position": "Researcher", "bibtex": "@inproceedings{\nsaket2022algorithms,\ntitle={Algorithms and Hardness for Learning Linear Thresholds from Label Proportions},\nauthor={Rishi Saket},\nbooktitle={Advances in Neural Information Processing Systems},\neditor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\nyear={2022},\nurl={https://openreview.net/forum?id=4LZo68TuF-4}\n}", "github": "", "project": "", "reviewers": "RxJW;Bsrj;gUha", "pdf_size": 752307, "rating": "4;7;7", "confidence": "4;3;3", "soundness": "3;4;4", "novelty": "2;3;3", "presentation": "2;4;4", "contribution": "2;3;3", "wc_summary": "82;312;139", "wc_strengths_and_weaknesses": "356;199;97", "wc_questions": "11;125;41", "wc_limitations": "1;59;1", "wc_review": "450;695;278", "wc_reply_reviewers": "0;4;26", "wc_reply_authors": "815;725;628", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "contribution_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 177.66666666666666, 97.79684157590275 ], "wc_strengths_and_weaknesses_avg": [ 217.33333333333334, 106.52803491204662 ], "wc_questions_avg": [ 59.0, 48.249352327259274 ], "wc_limitations_avg": [ 20.333333333333332, 27.34146220587984 ], "wc_review_avg": [ 474.3333333333333, 171.10685420390254 ], "wc_reply_reviewers_avg": [ 10.0, 11.430952132988164 ], "wc_reply_authors_avg": [ 722.6666666666666, 76.36025726049441 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17801009864628655703&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Local Linear Convergence of Gradient Methods for Subspace Optimization via Strict Complementarity", "status": "Accept", "track": "main", "site": "https://nips.cc/virtual/2022/poster/54921", "id": "4MT-e8mn3X", "proceeding": "https://proceedings.neurips.cc/paper_files/paper/2022/hash/c4b0ffe9946b3a45063ac158b3cd2eff-Abstract-Conference.html", "pdf": "https://openreview.net/pdf?id=4MT-e8mn3X", "openreview": "https://openreview.net/forum?id=4MT-e8mn3X", "poster": "", "slides": "https://nips.cc/virtual/2022/poster/54921", "video": "https://nips.cc/virtual/2022/poster/54921", "author_site": "Ron Fisher, Dan Garber", "tldr": "We prove local linear convergence to optimal solutions of several efficient gradient methods for generalized subspace recovery problems under a strict complementarity condition", "abstract": "We consider optimization problems in which the goal is to find a $k$-dimensional subspace of $\\mathbb{R}^n$, $k<